Try to fix lint workflow

dask · Feb 29, 2024 · 17fb95f · 17fb95f
1 parent f7c9950
commit 17fb95f
Show file tree

Hide file tree

Showing 16 changed files with 22 additions and 31 deletions.
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -5,6 +5,8 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
-      - uses: pre-commit/[email protected]
+      - uses: actions/[email protected]
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+      - uses: pre-commit/[email protected]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,17 +1,19 @@
 repos:
--   repo: https://github.com/python/black
-    rev: 22.3.0
+-   repo: https://github.com/psf/black
+    rev: 23.12.1
     hooks:
     - id: black
       language_version: python3
+      args:
+          - --target-version=py39
 -   repo: https://github.com/pycqa/flake8
-    rev: 3.7.9
+    rev: 7.0.0
     hooks:
     - id: flake8
       language_version: python3
       args: ["--ignore=E501,W503,E203,E741,E731"]
--   repo: https://github.com/timothycrosley/isort
-    rev: 4.3.21
+-   repo: https://github.com/pycqa/isort
+    rev: 5.13.2
     hooks:
     - id: isort
       language_version: python3
diff --git a/dask_ml/_partial.py b/dask_ml/_partial.py
@@ -29,7 +29,7 @@ def fit(
     shuffle_blocks=True,
     random_state=None,
     assume_equal_chunks=False,
-    **kwargs
+    **kwargs,
 ):
     """Fit scikit learn model against dask arrays
 

diff --git a/dask_ml/metrics/pairwise.py b/dask_ml/metrics/pairwise.py
@@ -51,7 +51,7 @@ def pairwise_distances(
     Y: ArrayLike,
     metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean",
     n_jobs: Optional[int] = None,
-    **kwargs: Any
+    **kwargs: Any,
 ):
     if isinstance(Y, da.Array):
         raise TypeError("`Y` must be a numpy array")
@@ -62,7 +62,7 @@ def pairwise_distances(
         dtype=float,
         chunks=chunks,
         metric=metric,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -203,7 +203,7 @@ def pairwise_kernels(
     metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "linear",
     filter_params: bool = False,
     n_jobs: Optional[int] = 1,
-    **kwds
+    **kwds,
 ):
     from sklearn.gaussian_process.kernels import Kernel as GPKernel
 

diff --git a/dask_ml/metrics/regression.py b/dask_ml/metrics/regression.py
@@ -187,7 +187,6 @@ def mean_squared_log_error(
     multioutput: Optional[str] = "uniform_average",
     compute: bool = True,
 ) -> ArrayLike:
-
     result = mean_squared_error(
         np.log1p(y_true),
         np.log1p(y_pred),

diff --git a/dask_ml/model_selection/_hyperband.py b/dask_ml/model_selection/_hyperband.py
@@ -530,7 +530,6 @@ def _get_meta(
     SHAs: Dict[int, SuccessiveHalvingSearchCV],
     key: Callable[[int, int], str],
 ) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]:
-
     meta_ = []
     history_ = {}
     for bracket in brackets:

diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py
@@ -1009,7 +1009,6 @@ def __init__(
         predict_proba_meta=None,
         transform_meta=None,
     ):
-
         self.n_initial_parameters = n_initial_parameters
         self.decay_rate = decay_rate
         self.fits_per_score = fits_per_score

diff --git a/dask_ml/model_selection/_search.py b/dask_ml/model_selection/_search.py
@@ -80,7 +80,6 @@
     _check_param_grid = None
 
 if SK_VERSION <= packaging.version.parse("0.21.dev0"):
-
     _RETURN_TRAIN_SCORE_DEFAULT = "warn"
 
     def handle_deprecated_train_score(results, return_train_score):
@@ -414,7 +413,7 @@ def do_fit_and_score(
             xtest = X_test + (n,)
             ytest = y_test + (n,)
 
-            for (name, m) in fit_ests:
+            for name, m in fit_ests:
                 dsk[(score_name, m, n)] = (
                     score,
                     (name, m, n),
@@ -879,7 +878,7 @@ def _do_featureunion(
 
     fit_steps = []
     tr_Xs = []
-    for (step_name, step) in est.transformer_list:
+    for step_name, step in est.transformer_list:
         fits, out_Xs = _do_fit_step(
             dsk,
             next_token,

diff --git a/dask_ml/preprocessing/_block_transformer.py b/dask_ml/preprocessing/_block_transformer.py
@@ -69,7 +69,7 @@ def __init__(
         func: Callable[..., Union[ArrayLike, DataFrameType]],
         *,
         validate: bool = False,
-        **kw_args: Any
+        **kw_args: Any,
     ):
         self.func: Callable[..., Union[ArrayLike, DataFrameType]] = func
         self.validate = validate

diff --git a/dask_ml/preprocessing/data.py b/dask_ml/preprocessing/data.py
@@ -50,7 +50,6 @@ def _handle_zeros_in_scale(scale: NDArrayOrScalar, copy=True):
 
 
 class StandardScaler(DaskMLBaseMixin, sklearn.preprocessing.StandardScaler):
-
     __doc__ = sklearn.preprocessing.StandardScaler.__doc__
 
     def fit(
@@ -120,7 +119,6 @@ def inverse_transform(
 
 
 class MinMaxScaler(sklearn.preprocessing.MinMaxScaler):
-
     __doc__ = sklearn.preprocessing.MinMaxScaler.__doc__
 
     def fit(
@@ -202,7 +200,6 @@ def inverse_transform(
 
 
 class RobustScaler(sklearn.preprocessing.RobustScaler):
-
     __doc__ = sklearn.preprocessing.RobustScaler.__doc__
 
     def _check_array(
@@ -407,7 +404,6 @@ def _transform_col(
         X_col[lower_bounds_idx] = lower_bound_y
 
         if not inverse:
-
             if output_distribution == "normal":
                 X_col = X_col.map_blocks(stats.norm.ppf)
                 # find the value to clip the data to avoid mapping to

diff --git a/docs/dimensions.py b/docs/dimensions.py
@@ -1,6 +1,5 @@
-import numpy as np
-
 import matplotlib.pyplot as plt
+import numpy as np
 
 
 def draw_brace(ax, xspan, text):

diff --git a/tests/feature_extraction/test_text.py b/tests/feature_extraction/test_text.py
@@ -33,7 +33,7 @@
 )
 def test_basic(vect, container):
     b = db.from_sequence(JUNK_FOOD_DOCS, npartitions=2)
-    if type(vect) == dask_ml.feature_extraction.text.FeatureHasher:
+    if type(vect) is dask_ml.feature_extraction.text.FeatureHasher:
         b = b.str.split()
     elif container == "series":
         b = b.to_dataframe(columns=["text"])["text"]

diff --git a/tests/model_selection/test_hyperband.py b/tests/model_selection/test_hyperband.py
@@ -85,7 +85,7 @@ async def test_basic(c, s, a, b, array_type, library, max_iter):
         # validation/test set.
         assert abs(score - search.best_score_) < 0.1
 
-    assert type(search.best_estimator_) == type(model)
+    assert type(search.best_estimator_) is type(model)
     assert isinstance(search.best_params_, dict)
 
     num_fit_models = len(set(search.cv_results_["model_id"]))

diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py
@@ -27,7 +27,6 @@ def forward(self, x):
 
 @gen_cluster(client=True)
 async def test_pytorch(c, s, a, b):
-
     n_features = 10
     defaults = {
         "callbacks": False,

diff --git a/tests/preprocessing/test_label.py b/tests/preprocessing/test_label.py
@@ -107,7 +107,6 @@ def test_fit_transform_categorical(self):
     )
     @pytest.mark.parametrize("array", [y, s])
     def test_inverse_transform(self, array):
-
         a = dpp.LabelEncoder()
         assert_eq_ar(a.inverse_transform(a.fit_transform(array)), da.asarray(array))
 

diff --git a/tests/test_pca.py b/tests/test_pca.py
@@ -373,12 +373,10 @@ def test_pca_validation():
         # But dask-ml needs tall and skinny
         for data in [X]:
             for n_components in [-1, 3]:
-
                 with pytest.raises(ValueError, match="n_components"):
                     dd.PCA(n_components, svd_solver=solver).fit(data)
 
             if solver == "arpack":
-
                 n_components = smallest_d
                 with pytest.raises(ValueError, match="n_components"):
                     dd.PCA(n_components, svd_solver=solver).fit(data)