Skip to content

Commit

Permalink
0.5.1 (#59)
Browse files Browse the repository at this point in the history
* 📝 Add documentation for the "blind" environments (#45, #54, #55)

* 🩹 Fix trimws not importable from datar.all/datar.base

* ✨ Make as_date() return pd datetime types; Add as_pd_date() as an alias of pd.to_datetime() (#56)

* 🔖 0.5.1

* 🚨 Fix linting

* 👷 Deploy the docs on dev branch as well

* 💚 Fix docs deply in CI
  • Loading branch information
pwwang authored Sep 16, 2021
1 parent a73bddd commit ce8f2f3
Show file tree
Hide file tree
Showing 12 changed files with 159 additions and 13 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ jobs:
- name: Deploy docs
run: |
mkdocs gh-deploy --clean --force
if: success() && github.ref == 'refs/heads/master'
# if: success() && github.ref == 'refs/heads/master'

fix-index:
needs: docs
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/master'
# if: github.ref == 'refs/heads/master'
strategy:
matrix:
python-version: [3.8]
python-version: [3.9]
steps:
- uses: actions/checkout@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ repos:
files: ^tests/.+$|^datar/.+$
- id: notebooks
name: Executing all notebooks
entry: jupyter nbconvert --to notebook --execute
entry: jupyter nbconvert --output-dir /tmp --to notebook --execute
language: system
pass_filenames: true
files: ^docs/.+\.ipynb$
Expand Down
2 changes: 1 addition & 1 deletion datar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .core.defaults import f

__all__ = ('f', 'get_versions')
__version__ = "0.5.0"
__version__ = "0.5.1"

def get_versions(
prnt: bool = True
Expand Down
3 changes: 2 additions & 1 deletion datar/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from .complex import arg, as_complex, conj, im, is_complex, mod, re as re_
from .constants import LETTERS, letters, month_abb, month_name, pi
from .cum import cummax, cummin, cumprod, cumsum
from .date import as_date
from .date import as_date, as_pd_date
from .factor import (
as_categorical,
as_factor,
Expand Down Expand Up @@ -125,6 +125,7 @@
startswith,
endswith,
strtoi,
trimws,
chartr,
tolower,
toupper,
Expand Down
42 changes: 37 additions & 5 deletions datar/base/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,21 @@

import datetime
import functools
from typing import Any, Union, List, Iterable
from typing import TYPE_CHECKING, Any, Iterable, List, Union

import numpy
from pandas import Series, DataFrame
import pandas
from pandas import DataFrame, Series
from pipda import register_func

from ..core.types import IntType, is_scalar_int, is_scalar
from ..core.contexts import Context
from ..core.types import IntType, is_scalar, is_scalar_int
from .na import NA

if TYPE_CHECKING: # pragma: no cover
# pylint: disable=ungrouped-imports
from pandas import DatetimeIndex, Timestamp

# pylint: disable=invalid-name
# pylint: disable=redefined-builtin

Expand Down Expand Up @@ -144,7 +149,7 @@ def as_date(
optional: bool = False,
tz: Union[IntType, datetime.timedelta] = 0,
origin: Any = None,
) -> Iterable[datetime.date]:
) -> Union[Series, "Timestamp", "DatetimeIndex"]:
"""Convert an object to a datetime.date object
See: https://rdrr.io/r/base/as.Date.html
Expand All @@ -155,6 +160,11 @@ def as_date(
the first non-NA element, and give an error if none works.
Otherwise, the processing is via strptime
try_formats: vector of format strings to try if format is not specified.
Default formats to try:
"%Y-%m-%d"
"%Y/%m/%d"
"%Y-%m-%d %H:%M:%S"
"%Y/%m/%d %H:%M:%S"
optional: indicating to return NA (instead of signalling an error)
if the format guessing does not succeed.
origin: a datetime.date/datetime object, or something which can be
Expand All @@ -168,11 +178,33 @@ def as_date(
if not isinstance(x, Series):
x = Series([x]) if is_scalar(x) else Series(x)

return x.transform(
out = x.transform(
_as_date_dummy,
format=format,
try_formats=try_formats,
optional=optional,
tz=tz,
origin=origin,
)
return pandas.to_datetime(out)

@register_func(None, context=Context.EVAL)
def as_pd_date(
arg: Union[int, str, float, datetime.datetime, Iterable],
*args: Any,
**kwargs: Any,
) -> Union[Series, "Timestamp", "DatetimeIndex"]:
"""Alias of pandas.to_datetime(), but registered as a function
so that it can be used in verbs.
See https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html
Args:
arg: The argument to be converted to datetime
*args: and
**kwargs: Other arguments passing to `pandas.to_datetime()`
Returns:
Converted datetime
"""
return pandas.to_datetime(arg, *args, **kwargs)
6 changes: 6 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.5.1
- Add documentation about "blind" environment (#45, #54, #55)
- Change `base.as_date()` to return pandas datetime types instead python datetime types (#56)
- Add `base.as_pd_date()` to be an alias of `pandas.to_datetime()` (#56)
- Expose `trimws` to `datar.all` (#58)

## 0.5.0

Added:
Expand Down
89 changes: 89 additions & 0 deletions docs/caveats/blind.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

Related issues: [GH#45][1] [GH#54][2] [GH#55][3]

## Why?
To make `datar` in both regular calling and piping calling for verbs:

```python
# regular calling
num_rows = nrow(df)

# piping calling
num_rows = df >> nrow()
```

we need the source code available to detect the AST node, especially the piping sign (`ast.BinOp(op=ast.RShift)`), so we can preserve the slot of the first argument for the data to pipe in.

However, the source code is not always avaiable at runtime (i.e. raw python REPL, `exec()`), or the there could be some environment that modifies the AST tree (`assert` from `pytest`). We call those environments "blind".

A quick example to simulate this siutation:

```python
>>> from datar.all import *
>>> df = tibble(a="a")
>>> df >> mutate(A=f.a.str.upper())
a A
<object> <object>
0 a A
>>> source = "df >> mutate(A=f.a.str.upper())"
>>> exec(source)
/path/to/site-packages/pipda/utils.py:161: UserWarning: Failed to fet
ch the node calling the function, call it with the original function.
warnings.warn(
Traceback (most recent call last):
...
```

## Solutions

- Try switching to a REPL that maintains the source code (`ipython` instead of raw python REPL, for example)
- Save the code into a file, and run that script with python interpreter
- Stick with the regular calling:

```python
>>> source = "df2 = mutate(df, A=f.a.str.upper())"
>>> exec(source) # you still get a warning, but the code works
/home/pwwang/miniconda3/lib/python3.9/site-packages/pipda/utils.py:161: UserWarning: Failed to fet
ch the node calling the function, call it with the original function.
warnings.warn(
>>> df2
a A
<object> <object>
0 a A
```

- Stick with the piping calling:

```python
>>> from pipda import options
>>> options.assume_all_piping = True
>>> source = "df2 = df >> mutate(A=f.a.str.upper())"
>>> exec(source) # no warnings, we know we don't need the AST node anymore
>>> df2
a A
<object> <object>
0 a A
```

!!! Note

Whichever calling mode you are sticking with, you have to stick with it for all verbs, even for those simple ones (i.e. `dim()`, `nrow()`, etc)

!!! Tip

If you wonder whether a python function is registered as a verb or a plain function:

```python
>>> mutate.__pipda__
'Verb'
>>> nrow.__pipda__
'Verb'
>>> as_integer.__pipda__
'PlainFunction'
```



[1]: https://github.com/pwwang/datar/issues/45
[2]: https://github.com/pwwang/datar/issues/54
[3]: https://github.com/pwwang/datar/issues/55
4 changes: 4 additions & 0 deletions docs/reference-maps/base.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
|API|Description|Notebook example|
|---|---|---:|
|[`as_date`][41]|Cast data to date|[:material-notebook:][4]|
|[**`as_pd_date`**][150]|Alias of `pandas.to_datetime()`||

### Factor data

Expand Down Expand Up @@ -220,6 +221,7 @@
|[`chartr`][133]|Replace characters in strings||
|[`tolower`][134]|Transform strings to lower case||
|[`toupper`][135]|Transform strings to upper case||
|[`trimws`][149]|Remove leading and/or trailing whitespace from character strings.||

### Table

Expand Down Expand Up @@ -430,3 +432,5 @@
[146]: ../../api/datar.base.table/#datar.base.table.tabulate
[147]: ../../api/datar.base.verbs/#datar.base.verbs.append
[148]: ../../api/datar.base.verbs/#datar.base.verbs.proportions
[149]: ../../api/datar.base.string/#datar.base.string.trimws
[150]: ../../api/datar.base.date/#datar.base.date.as_pd_date
3 changes: 3 additions & 0 deletions docs/style.css
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@

.md-main__inner.md-grid {
max-width: 80%;
}

.md-typeset .admonition, .md-typeset details {
font-size: .7rem !important;
Expand Down
6 changes: 6 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ nav:
'Grouped/rowwise data frame': 'caveats/grouped.md'
'NAs': 'caveats/NAs.md'
'in vs %in%': 'caveats/in.md'
'"blind" enviroment': 'caveats/blind.md'
- 'Datasets': 'datasets.md'
- 'Advanced usage': 'advanced.md'
- 'Examples':
Expand Down Expand Up @@ -85,6 +86,11 @@ nav:
'fill': 'notebooks/fill.ipynb'
'filter': 'notebooks/filter.ipynb'
'filter-joins': 'notebooks/filter-joins.ipynb'
'forcats_fct_multi': 'notebooks/forcats_fct_multi.ipynb'
'forcats_lvl_addrm': 'notebooks/forcats_lvl_addrm.ipynb'
'forcats_lvl_order': 'notebooks/forcats_lvl_order.ipynb'
'forcats_lvl_value': 'notebooks/forcats_lvl_value.ipynb'
'forcats_misc': 'notebooks/forcats_misc.ipynb'
'full_seq': 'notebooks/full_seq.ipynb'
'datar': 'notebooks/datar.ipynb'
'group_by': 'notebooks/group_by.ipynb'
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "datar"
version = "0.5.0"
version = "0.5.1"
description = "Port of dplyr and other related R packages in python, using pipda."
authors = ["pwwang <[email protected]>"]
readme = "README.md"
Expand Down
7 changes: 6 additions & 1 deletion tests/test_base_date.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

import pandas
from datar.base.date import *
from .conftest import assert_iterable_equal

Expand Down Expand Up @@ -47,7 +48,7 @@ def test_as_date(x, format, try_formats, optional, tz, origin, expected):
optional,
tz,
origin
), expected)
), pandas.to_datetime(expected))

def test_as_date_error():
with pytest.raises(ValueError):
Expand All @@ -57,3 +58,7 @@ def test_as_date_error():
as_date("1990-1-1", "%Y")

assert as_date("1990-1-1", "Y", optional=True).isna().all()

def test_as_pd_date():

assert as_pd_date("Sep 16, 2021") == pandas.Timestamp('2021-09-16 00:00:00')

0 comments on commit ce8f2f3

Please sign in to comment.