From b5eb324eaf3be0f842195f30a7f47ab0e7221f67 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sat, 20 Jan 2024 20:08:23 +0000 Subject: [PATCH 1/8] Remove ibis version cap --- fugue_ibis/execution_engine.py | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fugue_ibis/execution_engine.py b/fugue_ibis/execution_engine.py index 1c8b3c4c..39685be6 100644 --- a/fugue_ibis/execution_engine.py +++ b/fugue_ibis/execution_engine.py @@ -96,7 +96,7 @@ def join( suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX)) else: # pragma: no cover # breaking change in ibis 6.0 - suffixes = dict(lname="", rname=_JOIN_RIGHT_SUFFIX) + suffixes = dict(lname="", rname="{name}" + _JOIN_RIGHT_SUFFIX) if how.lower() == "cross": tb = _df1.native.cross_join(_df2.native, **suffixes) elif how.lower() == "right_outer": diff --git a/setup.py b/setup.py index 8b2e2dff..31358669 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ def get_version() -> str: "numpy", ], "polars": ["polars"], - "ibis": SQL_DEPENDENCIES + ["ibis-framework>=3.2.0,<6"], + "ibis": SQL_DEPENDENCIES + ["ibis-framework"], "notebook": ["notebook", "jupyterlab", "ipython>=7.10.0"], "all": SQL_DEPENDENCIES + [ @@ -71,7 +71,7 @@ def get_version() -> str: "duckdb>=0.5.0", "pyarrow>=6.0.1", "pandas>=2.0.2", - "ibis-framework>=3.2.0,<6", + "ibis-framework", "polars", ], }, From 90818438811b79a2b9119f53824f9d85e3e1353c Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sat, 20 Jan 2024 20:22:19 +0000 Subject: [PATCH 2/8] fix ray --- setup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 31358669..100b12a9 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,12 @@ def get_version() -> str: "pyarrow>=7.0.0", "pandas>=2.0.2", ], - "ray": ["ray[data]>=2.4.0", "duckdb>=0.5.0", "pyarrow>=6.0.1"], + "ray": [ + "ray[data]>=2.4.0", + "duckdb>=0.5.0", + "pyarrow>=6.0.1", + "pandas<2.2", + ], "duckdb": SQL_DEPENDENCIES + [ "duckdb>=0.5.0", @@ -70,7 +75,7 @@ def get_version() -> str: "ipython>=7.10.0", "duckdb>=0.5.0", "pyarrow>=6.0.1", - "pandas>=2.0.2", + "pandas>=2.0.2,<2.2", # because of Ray "ibis-framework", "polars", ], From ba009cb4e1279d8088173085a5510d7c8eed14bb Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sun, 21 Jan 2024 05:43:30 +0000 Subject: [PATCH 3/8] update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 100b12a9..baf03d8f 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def get_version() -> str: keywords="distributed spark dask ray sql dsl domain specific language", url="http://github.com/fugue-project/fugue", install_requires=[ - "triad>=0.9.3", + "triad>=0.9.4", "adagio>=0.2.4", ], extras_require={ From 47d196092681652879b78d6d6d354a080baac778 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sun, 21 Jan 2024 05:47:03 +0000 Subject: [PATCH 4/8] update --- .readthedocs.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..61dc1129 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,17 @@ +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "3.10" + jobs: + pre_install: + - pip install -U pip + +sphinx: + configuration: docs/conf.py + +python: + install: + - requirements: requirements.txt From 6e2bdccf5934f20c384ab49bc679f56c93e0d176 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sun, 21 Jan 2024 06:58:14 +0000 Subject: [PATCH 5/8] Fix duckdb tests --- tests/fugue_duckdb/test_execution_engine.py | 66 +++++++++++++-------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/tests/fugue_duckdb/test_execution_engine.py b/tests/fugue_duckdb/test_execution_engine.py index 9b3f69ca..b41416ab 100644 --- a/tests/fugue_duckdb/test_execution_engine.py +++ b/tests/fugue_duckdb/test_execution_engine.py @@ -3,7 +3,6 @@ import duckdb import pandas as pd import pyarrow as pa -import pytest from pytest import raises import fugue.api as fa @@ -11,7 +10,6 @@ from fugue import ArrowDataFrame, DataFrame, FugueWorkflow, fsql from fugue.api import engine_context from fugue.plugins import infer_execution_engine -from fugue_duckdb import DuckExecutionEngine from fugue_duckdb.dataframe import DuckDataFrame from fugue_test.builtin_suite import BuiltInTests from fugue_test.execution_suite import ExecutionEngineTests @@ -109,39 +107,55 @@ def test_builtin_connection(): def test_configs(): - dag = FugueWorkflow() - df = dag.df([[None], [1]], "a:double") - df = dag.select("SELECT * FROM ", df, "ORDER BY a LIMIT 1") - df.assert_eq(dag.df([[None]], "a:double")) - - dag.run( - "duckdb", - { - "fugue.duckdb.pragma.threads": 2, - "fugue.duckdb.pragma.default_null_order": "NULLS FIRST", - }, + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads": 1}, + ) ) + assert df.value.iloc[0] == "1" - dag = FugueWorkflow() - df = dag.df([[None], [1]], "a:double") - df = dag.select("SELECT * FROM ", df, "ORDER BY a LIMIT 1") - df.assert_eq(dag.df([[1]], "a:double")) - - dag.run( - "duckdb", - { - "fugue.duckdb.pragma.threads": 2, - "fugue.duckdb.pragma.default_null_order": "NULLS LAST", - }, + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads": 3}, + ) ) + assert df.value.iloc[0] == "3" with raises(ValueError): # invalid config format - dag.run("duckdb", {"fugue.duckdb.pragma.threads;xx": 2}) + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads;xx": 3}, + ) + ) with raises(Exception): # non-existent config - dag.run("duckdb", {"fugue.duckdb.pragma.threads_xx": 2}) + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads_xx": 3}, + ) + ) def test_annotations(): From bcea439656d2107a079d02fbee066ce1920bae51 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sun, 21 Jan 2024 06:59:31 +0000 Subject: [PATCH 6/8] update --- fugue_ibis/execution_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fugue_ibis/execution_engine.py b/fugue_ibis/execution_engine.py index 39685be6..1f2d0846 100644 --- a/fugue_ibis/execution_engine.py +++ b/fugue_ibis/execution_engine.py @@ -92,9 +92,9 @@ def join( _df2 = self.to_df(df2) key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on) on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema] - if ibis.__version__ < "6": + if ibis.__version__ < "6": # pragma: no cover suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX)) - else: # pragma: no cover + else: # breaking change in ibis 6.0 suffixes = dict(lname="", rname="{name}" + _JOIN_RIGHT_SUFFIX) if how.lower() == "cross": From 6fa68d26cb6f4ff898f0e6d29acd9ba545520400 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sun, 21 Jan 2024 07:25:20 +0000 Subject: [PATCH 7/8] update --- .github/workflows/test_win.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_win.yml b/.github/workflows/test_win.yml index d12d0e12..bbe4739f 100644 --- a/.github/workflows/test_win.yml +++ b/.github/workflows/test_win.yml @@ -33,7 +33,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: pip install -r requirements.txt - - name: Install pyarrow - run: pip install pyarrow==7.0.0 + # - name: Install pyarrow + # run: pip install pyarrow==7.0.0 - name: Test run: python -m pytest --reruns 2 --only-rerun 'Overflow in cast' tests/fugue tests/fugue_dask tests/fugue_ibis tests/fugue_duckdb From 69688f903a4a0fa5946c034e4d5c4f65b8275583 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sun, 21 Jan 2024 07:39:04 +0000 Subject: [PATCH 8/8] update --- .github/workflows/test_win.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_win.yml b/.github/workflows/test_win.yml index bbe4739f..7e5217f4 100644 --- a/.github/workflows/test_win.yml +++ b/.github/workflows/test_win.yml @@ -33,7 +33,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: pip install -r requirements.txt - # - name: Install pyarrow - # run: pip install pyarrow==7.0.0 + - name: Install pyarrow + run: pip install pyarrow==8.0.0 - name: Test run: python -m pytest --reruns 2 --only-rerun 'Overflow in cast' tests/fugue tests/fugue_dask tests/fugue_ibis tests/fugue_duckdb