From b93d6dcade77261e52df5ca15f9a889370be2e49 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Nov 2024 07:18:05 -0600 Subject: [PATCH 1/3] Avoid persisting Futures Closes https://github.com/dask/dask-ml/issues/1003 --- dask_ml/model_selection/_incremental.py | 33 +++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py index 2d1d7255e..3c8abce19 100644 --- a/dask_ml/model_selection/_incremental.py +++ b/dask_ml/model_selection/_incremental.py @@ -248,7 +248,19 @@ def get_futures(partial_fit_calls): _specs[ident] = spec if DISTRIBUTED_2021_02_0: - _models, _scores, _specs = dask.persist(_models, _scores, _specs) + # https://github.com/dask/dask-ml/issues/1003 + # We only want to persist dask collections, not Futures. + # So we build a collection without futures and bring them back later. + to_persist = { + "models": {k: v for k, v in _models.items() if not isinstance(v, Future)}, + "scores": {k: v for k, v in _scores.items() if not isinstance(v, Future)}, + "specs": {k: v for k, v in _specs.items() if not isinstance(v, Future)}, + } + models_p, scores_p, specs_p = dask.persist(*list(to_persist.values())) + # Update with keys not present, which should just be futures + _models = {**_models, **models_p} + _scores = {**_scores, **scores_p} + _specs = {**_specs, **specs_p} else: _models, _scores, _specs = dask.persist( _models, _scores, _specs, priority={tuple(_specs.values()): -1} @@ -315,7 +327,24 @@ def get_futures(partial_fit_calls): _specs[ident] = spec if DISTRIBUTED_2021_02_0: - _models2, _scores2, _specs2 = dask.persist(_models, _scores, _specs) + # https://github.com/dask/dask-ml/issues/1003 + # We only want to persist dask collections, not Futures. + # So we build a collection without futures and bring them back later. + to_persist = { + "models": { + k: v for k, v in _models.items() if not isinstance(v, Future) + }, + "scores": { + k: v for k, v in _scores.items() if not isinstance(v, Future) + }, + "specs": {k: v for k, v in _specs.items() if not isinstance(v, Future)}, + } + models2_p, scores2_p, specs2_p = dask.persist(*list(to_persist.values())) + # Update with keys not present, which should just be futures + _models2 = {**_models, **models2_p} + _scores2 = {**_scores, **scores2_p} + _specs2 = {**_specs, **specs2_p} + else: _models2, _scores2, _specs2 = dask.persist( _models, _scores, _specs, priority={tuple(_specs.values()): -1} From 731fbd3d053c54e71919fe9bc28fe7b9bd308fe3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Nov 2024 07:39:35 -0600 Subject: [PATCH 2/3] ci - 1 --- .github/workflows/tests.yaml | 31 +++++++++++++------------------ ci/install.sh | 15 +++++---------- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 03ba10e36..af4a42d7e 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -10,14 +10,14 @@ jobs: matrix: # os: ["windows-latest", "ubuntu-latest", "macos-latest"] os: ["ubuntu-latest"] - python-version: ["3.9", "3.10", "3.11"] - query-planning: [true, false] + python-version: ["3.10", "3.11", "3.12", "3.13"] + # query-planning: [true, false] env: PYTHON_VERSION: ${{ matrix.python-version }} PARALLEL: "true" COVERAGE: "true" - DASK_DATAFRAME__QUERY_PLANNING: ${{ matrix.query-planning }} + # DASK_DATAFRAME__QUERY_PLANNING: ${{ matrix.query-planning }} steps: - name: Checkout source @@ -25,22 +25,17 @@ jobs: with: fetch-depth: 0 # Needed by codecov.io - - name: Setup Conda Environment - uses: conda-incubator/setup-miniconda@v2 + - name: Install the latest version of uv + uses: astral-sh/setup-uv@v3 with: - miniforge-variant: Mambaforge - miniforge-version: latest - use-mamba: true - channel-priority: strict - python-version: ${{ matrix.python-version }} - environment-file: ci/environment-${{ matrix.python-version }}.yaml - activate-environment: test-environment - auto-activate-base: false - - - name: Install - shell: bash -l {0} - run: source ci/install.sh + enable-cache: true + + - name: Install Python + run: uv python install ${{ matrix.python-version }} + + - name: Install dask-ml + run: uv pip install -e .[dev] - name: Run tests shell: bash -l {0} - run: pytest -v + run: uv run pytest -v diff --git a/ci/install.sh b/ci/install.sh index 788fb2042..c6cdb0170 100644 --- a/ci/install.sh +++ b/ci/install.sh @@ -1,22 +1,17 @@ +uv pip install -e .[dev] # Optionally, install development versions of dependenies if [[ ${UPSTREAM_DEV} ]]; then - # FIXME https://github.com/mamba-org/mamba/issues/412 - # mamba uninstall --force dask distributed scikit-learn - conda uninstall --force dask distributed scikit-learn - - python -m pip install --no-deps --pre \ + uv pip install --no-deps --pre \ -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \ scikit-learn - python -m pip install \ + uv pip install \ --upgrade \ git+https://github.com/dask/dask \ git+https://github.com/dask/distributed fi # Install dask-ml -python -m pip install --quiet --no-deps -e . - -echo mamba list -mamba list \ No newline at end of file +uv pip install --no-deps -e . +uv pip tree From 41584527da498f1b87c4aa654600a3af885f640d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Nov 2024 07:42:03 -0600 Subject: [PATCH 3/3] ci - 2 --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index af4a42d7e..db451d2b9 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -28,7 +28,7 @@ jobs: - name: Install the latest version of uv uses: astral-sh/setup-uv@v3 with: - enable-cache: true + enable-cache: false - name: Install Python run: uv python install ${{ matrix.python-version }}