Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement to_datetime #467

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion dask_expr/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from dask_expr._categorical import CategoricalAccessor
from dask_expr._concat import Concat
from dask_expr._datetime import DatetimeAccessor
from dask_expr._expr import Eval, Query, Shift, ToNumeric, no_default
from dask_expr._expr import Eval, Query, Shift, ToDatetime, ToNumeric, no_default
from dask_expr._merge import JoinRecursive, Merge
from dask_expr._quantiles import RepartitionQuantiles
from dask_expr._reductions import (
Expand Down Expand Up @@ -1521,3 +1521,9 @@ def to_numeric(arg, errors="raise", downcast=None):
if not isinstance(arg, Series):
raise TypeError("arg must be a Series")
return new_collection(ToNumeric(frame=arg.expr, errors=errors, downcast=downcast))


def to_datetime(arg, **kwargs):
if not isinstance(arg, FrameBase):
raise TypeError("arg must be a Series or a DataFrame")
return new_collection(ToDatetime(frame=arg.expr, kwargs=kwargs))
22 changes: 12 additions & 10 deletions dask_expr/_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1721,16 +1721,18 @@ class ToNumeric(Elemwise):
_defaults = {"errors": "raise", "downcast": None}
operation = staticmethod(pd.to_numeric)

def _divisions(self):
return tuple(
pd.Index(
pd.to_numeric(
self.frame.divisions,
errors=self.errors,
downcast=self.downcast,
)
)
)

class ToDatetime(Elemwise):
_parameters = ["frame", "kwargs"]
_defaults = {"kwargs": None}
_keyword_only = ["kwargs"]
operation = staticmethod(pd.to_datetime)

@functools.cached_property
def _kwargs(self):
if (kwargs := self.operand("kwargs")) is None:
return {}
return kwargs


class AsType(Elemwise):
Expand Down
19 changes: 18 additions & 1 deletion dask_expr/tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dask.dataframe.utils import UNKNOWN_CATEGORIES
from dask.utils import M

from dask_expr import expr, from_pandas, is_scalar, optimize, to_numeric
from dask_expr import expr, from_pandas, is_scalar, optimize, to_datetime, to_numeric
from dask_expr._expr import are_co_aligned
from dask_expr._reductions import Len
from dask_expr._shuffle import Shuffle
Expand Down Expand Up @@ -370,6 +370,23 @@ def test_blockwise(func, pdf, df):
assert_eq(func(pdf), func(df))


def test_to_datetime():
pdf = lib.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
df = from_pandas(pdf, npartitions=2)
expected = lib.to_datetime(pdf)
result = to_datetime(df)
assert_eq(result, expected)

ps = lib.Series(["2018-10-26 12:00:00", "2018-10-26 13:00:15"])
ds = from_pandas(ps, npartitions=2)
expected = lib.to_datetime(ps)
result = to_datetime(ds)
assert_eq(result, expected)

with pytest.raises(TypeError, match="arg must be a Series or a DataFrame"):
to_datetime(1490195805)


def test_to_numeric(pdf, df):
pdf.x = pdf.x.astype("str")
expected = lib.to_numeric(pdf.x)
Expand Down
Loading