BUG: Test failures with 2.2.3 on Fedora Rawhide #60589

opoplawski · 2024-12-19T04:30:31Z

Pandas version checks

I have checked that this issue has not already been reported.
I have confirmed this bug exists on the latest version of pandas.
I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

============================= test session starts ==============================
platform linux -- Python 3.13.1, pytest-8.3.4, pluggy-1.5.0 -- /usr/bin/python3
cachedir: /builddir/build/BUILD/python-pandas-2.2.3-build/pandas-2.2.3/_empty/pytest-cache
hypothesis profile 'ci' -> deadline=None, suppress_health_check=[HealthCheck.too_slow, HealthCheck.differing_executors], database=DirectoryBasedExampleDatabase(PosixPath('/builddir/build/BUILD/python-pandas-2.2.3-build/pandas-2.2.3/_empty/.hypothesis/examples'))
rootdir: /builddir/build/BUILD/python-pandas-2.2.3-build/BUILDROOT/usr/lib64/python3.13/site-packages/pandas
configfile: pyproject.toml
plugins: asyncio-0.24.0, xdist-3.6.1, hypothesis-6.104.2
asyncio: mode=Mode.STRICT, default_loop_scope=None
created: 1/1 worker

=================================== FAILURES ===================================
____________________ test_array_inference[data7-expected7] _____________________
[gw0] linux -- Python 3.13.1 /usr/bin/python3
data = [datetime.datetime(2000, 1, 1, 0, 0, tzinfo=<DstTzInfo 'CET' LMT+0:18:00 STD>), datetime.datetime(2001, 1, 1, 0, 0, tzinfo=<DstTzInfo 'CET' LMT+0:18:00 STD>)]
expected = <DatetimeArray>
['2000-01-01 00:00:00+01:00', '2001-01-01 00:00:00+01:00']
Length: 2, dtype: datetime64[ns, CET]
    @pytest.mark.parametrize(
        "data, expected",
        [
            # period
            (
                [pd.Period("2000", "D"), pd.Period("2001", "D")],
                period_array(["2000", "2001"], freq="D"),
            ),
            # interval
            ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
            # datetime
            (
                [pd.Timestamp("2000"), pd.Timestamp("2001")],
                DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
            ),
            (
                [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
                DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
            ),
            (
                np.array([1, 2], dtype="M8[ns]"),
                DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")),
            ),
            (
                np.array([1, 2], dtype="M8[us]"),
                DatetimeArray._simple_new(
                    np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]")
                ),
            ),
            # datetimetz
            (
                [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
                DatetimeArray._from_sequence(
                    ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns")
                ),
            ),
            (
                [
                    datetime.datetime(2000, 1, 1, tzinfo=cet),
                    datetime.datetime(2001, 1, 1, tzinfo=cet),
                ],
                DatetimeArray._from_sequence(
                    ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns")
                ),
            ),
            # timedelta
            (
                [pd.Timedelta("1h"), pd.Timedelta("2h")],
                TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
            ),
            (
                np.array([1, 2], dtype="m8[ns]"),
                TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
            ),
            (
                np.array([1, 2], dtype="m8[us]"),
                TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
            ),
            # integer
            ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
            ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
            ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
            ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
            # float
            ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
            ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
            ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
            ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
            # integer-like float
            ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
            ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
            ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
            ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
            # mixed-integer-float
            ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
            (
                [1, np.nan, 2.0],
                FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
            ),
            # string
            (
                ["a", "b"],
                pd.StringDtype()
                .construct_array_type()
                ._from_sequence(["a", "b"], dtype=pd.StringDtype()),
            ),
            (
                ["a", None],
                pd.StringDtype()
                .construct_array_type()
                ._from_sequence(["a", None], dtype=pd.StringDtype()),
            ),
            # Boolean
            ([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
            ([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
        ],
    )
    def test_array_inference(data, expected):
        result = pd.array(data)
>       tm.assert_equal(result, expected)
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/tests/arrays/test_array.py:377: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
left = array(['1999-12-31T23:42:00.000000000', '2000-12-31T23:42:00.000000000'],
      dtype='datetime64[ns]')
right = array(['1999-12-31T23:00:00.000000000', '2000-12-31T23:00:00.000000000'],
      dtype='datetime64[ns]')
err_msg = None
    def _raise(left, right, err_msg) -> NoReturn:
        if err_msg is None:
            if left.shape != right.shape:
                raise_assert_detail(
                    obj, f"{obj} shapes are different", left.shape, right.shape
                )
    
            diff = 0
            for left_arr, right_arr in zip(left, right):
                # count up differences
                if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
                    diff += 1
    
            diff = diff * 100.0 / left.size
            msg = f"{obj} values are different ({np.round(diff, 5)} %)"
>           raise_assert_detail(obj, msg, left, right, index_values=index_values)
E           AssertionError: DatetimeArray._ndarray are different
E           
E           DatetimeArray._ndarray values are different (100.0 %)
E           [left]:  [1999-12-31T23:42:00.000000000, 2000-12-31T23:42:00.000000000]
E           [right]: [1999-12-31T23:00:00.000000000, 2000-12-31T23:00:00.000000000]
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/_testing/asserters.py:684: AssertionError

Full build log with more test failures is here: https://kojipkgs.fedoraproject.org//work/tasks/9043/126999043/build.log



### Issue Description

We are updating Fedora to pandas 2.2.3 and numpy 2.0.5 but are getting test failures.

### Expected Behavior

No test failures

### Installed Versions

<details>

2.2.3

</details>

The text was updated successfully, but these errors were encountered:

opoplawski · 2024-12-19T04:33:08Z

Also:

___________ TestDataFrameToXArray.test_to_xarray_index_types[string] ___________
[gw0] linux -- Python 3.13.1 /usr/bin/python3
self = <pandas.tests.generic.test_to_xarray.TestDataFrameToXArray object at 0x3ff2b881f90>
index_flat = Index(['pandas_0', 'pandas_1', 'pandas_2', 'pandas_3', 'pandas_4', 'pandas_5',
       'pandas_6', 'pandas_7', 'pandas_...pandas_93', 'pandas_94', 'pandas_95',
       'pandas_96', 'pandas_97', 'pandas_98', 'pandas_99'],
      dtype='object')
df = bar       a  b  c    d      e  f          g                         h
foo                                             ....0   True  c 2013-01-03 2013-01-03 00:00:00-05:00
pandas_3  d  4  6  7.0  False  d 2013-01-04 2013-01-04 00:00:00-05:00
using_infer_string = False
    def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
        index = index_flat
        # MultiIndex is tested in test_to_xarray_with_multiindex
        if len(index) == 0:
            pytest.skip("Test doesn't make sense for empty index")
    
        from xarray import Dataset
    
        df.index = index[:4]
        df.index.name = "foo"
        df.columns.name = "bar"
        result = df.to_xarray()
        assert result.sizes["foo"] == 4
        assert len(result.coords) == 1
        assert len(result.data_vars) == 8
        tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
        assert isinstance(result, Dataset)
    
        # idempotency
        # datetimes w/tz are preserved
        # column names are lost
        expected = df.copy()
        expected["f"] = expected["f"].astype(
            object if not using_infer_string else "string[pyarrow_numpy]"
        )
        expected.columns.name = None
>       tm.assert_frame_equal(result.to_dataframe(), expected)
E       AssertionError: Attributes of DataFrame.iloc[:, 5] (column name="f") are different
E       
E       Attribute "dtype" are different
E       [left]:  CategoricalDtype(categories=['a', 'b', 'c', 'd'], ordered=False, categories_dtype=object)
E       [right]: object
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/tests/generic/test_to_xarray.py:58: AssertionError

rhshadrach · 2024-12-20T13:40:15Z

Thanks for the report. Can you post the output of pd.show_versions().

opoplawski · 2024-12-20T17:41:01Z

INSTALLED VERSIONS
------------------
commit                : 0691c5cf90477d3503834d983f69350f250a6ff7
python                : 3.13.1
python-bits           : 64
OS                    : Linux
OS-release            : 6.13.0-0.rc3.20241217gitf44d154d6e3d.30.fc42.x86_64
Version               : #1 SMP PREEMPT_DYNAMIC Tue Dec 17 22:37:32 UTC 2024
machine               : x86_64
processor             :
byteorder             : little
LC_ALL                : None
LANG                  : C.UTF-8
LOCALE                : C.UTF-8

pandas                : 2.2.3
numpy                 : 2.2.0
pytz                  : 2024.2
dateutil              : 2.8.2
pip                   : 24.3.1
Cython                : 3.0.11
sphinx                : 7.3.7
IPython               : 8.30.0
adbc-driver-postgresql: None
adbc-driver-sqlite    : None
bs4                   : 4.12.3
blosc                 : None
bottleneck            : 1.4.2
dataframe-api-compat  : None
fastparquet           : None
fsspec                : 2024.12.0
html5lib              : 1.1
hypothesis            : 6.104.2
gcsfs                 : 2024.9.0
jinja2                : 3.1.4
lxml.etree            : 5.3.0
matplotlib            : 3.9.4
numba                 : None
numexpr               : 2.10.2
odfpy                 : None
openpyxl              : 3.1.2
pandas_gbq            : None
psycopg2              : 2.9.9
pymysql               : 1.4.6
pyarrow               : 18.0.0
pyreadstat            : None
pytest                : 8.3.4
python-calamine       : None
pyxlsb                : None
s3fs                  : None
scipy                 : 1.14.1
sqlalchemy            : 2.0.36
tables                : 3.10.1
tabulate              : 0.9.0
xarray                : 2024.10.0
xlrd                  : 2.0.1
xlsxwriter            : 3.2.0
zstandard             : 0.23.0
tzdata                : None
qtpy                  : 2.4.2
pyqt5                 : None

opoplawski added Bug Needs Triage Issue that has not been reviewed by a pandas team member labels Dec 19, 2024

rhshadrach added Needs Info Clarification about behavior needed to assess issue and removed Needs Triage Issue that has not been reviewed by a pandas team member labels Dec 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

BUG: Test failures with 2.2.3 on Fedora Rawhide #60589

BUG: Test failures with 2.2.3 on Fedora Rawhide #60589

opoplawski commented Dec 19, 2024

opoplawski commented Dec 19, 2024

rhshadrach commented Dec 20, 2024

opoplawski commented Dec 20, 2024

BUG: Test failures with 2.2.3 on Fedora Rawhide #60589

BUG: Test failures with 2.2.3 on Fedora Rawhide #60589

Comments

opoplawski commented Dec 19, 2024

Pandas version checks

Reproducible Example

opoplawski commented Dec 19, 2024

rhshadrach commented Dec 20, 2024

opoplawski commented Dec 20, 2024