From c7708406ab8d2c2e8e0b253e99c07053dc6b8aac Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 19 Dec 2023 10:59:31 +0100 Subject: [PATCH] Fixup concat dtypes for empty dataframes (#602) --- dask_expr/_concat.py | 7 ++++++- dask_expr/tests/test_concat.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dask_expr/_concat.py b/dask_expr/_concat.py index 1301434b..d0ae12f9 100644 --- a/dask_expr/_concat.py +++ b/dask_expr/_concat.py @@ -46,9 +46,14 @@ def _frames(self): @functools.cached_property def _meta(self): + # ignore DataFrame without columns to avoid dtype upcasting meta = make_meta( methods.concat( - [meta_nonempty(df._meta) for df in self._frames], + [ + meta_nonempty(df._meta) + for df in self._frames + if df.ndim < 2 or len(df._meta.columns) > 0 + ], join=self.join, filter_warning=False, axis=self.axis, diff --git a/dask_expr/tests/test_concat.py b/dask_expr/tests/test_concat.py index c1cd54f7..505c5363 100644 --- a/dask_expr/tests/test_concat.py +++ b/dask_expr/tests/test_concat.py @@ -140,6 +140,17 @@ def test_concat_index(df, pdf): assert query._name == expected._name +def test_concat_dataframe_empty(): + df = lib.DataFrame({"a": [100, 200, 300]}, dtype="int64") + empty_df = lib.DataFrame([], dtype="int64") + df_concat = lib.concat([df, empty_df]) + + ddf = from_pandas(df, npartitions=1) + empty_ddf = from_pandas(empty_df, npartitions=1) + ddf_concat = concat([ddf, empty_ddf]) + assert_eq(df_concat, ddf_concat) + + def test_concat_after_merge(): pdf1 = lib.DataFrame( {"x": range(10), "y": [1, 2, 3, 4, 5] * 2, "z": ["cat", "dog"] * 5}