455 lines
15 KiB
Python
455 lines
15 KiB
Python
|
"""
|
||
|
This file contains a minimal set of tests for compliance with the extension
|
||
|
array interface test suite, and should contain no other tests.
|
||
|
The test suite for the full functionality of the array is located in
|
||
|
`pandas/tests/arrays/`.
|
||
|
|
||
|
The tests in this file are inherited from the BaseExtensionTests, and only
|
||
|
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||
|
parent method).
|
||
|
|
||
|
Additional tests should either be added to one of the BaseExtensionTests
|
||
|
classes (if they are relevant for the extension interface for all dtypes), or
|
||
|
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||
|
|
||
|
Note: we do not bother with base.BaseIndexTests because PandasArray
|
||
|
will never be held in an Index.
|
||
|
"""
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas.core.dtypes.cast import can_hold_element
|
||
|
from pandas.core.dtypes.dtypes import (
|
||
|
ExtensionDtype,
|
||
|
PandasDtype,
|
||
|
)
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core.arrays.numpy_ import PandasArray
|
||
|
from pandas.core.internals import blocks
|
||
|
from pandas.tests.extension import base
|
||
|
|
||
|
|
||
|
def _can_hold_element_patched(obj, element) -> bool:
|
||
|
if isinstance(element, PandasArray):
|
||
|
element = element.to_numpy()
|
||
|
return can_hold_element(obj, element)
|
||
|
|
||
|
|
||
|
orig_assert_attr_equal = tm.assert_attr_equal
|
||
|
|
||
|
|
||
|
def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
|
||
|
"""
|
||
|
patch tm.assert_attr_equal so PandasDtype("object") is closed enough to
|
||
|
np.dtype("object")
|
||
|
"""
|
||
|
if attr == "dtype":
|
||
|
lattr = getattr(left, "dtype", None)
|
||
|
rattr = getattr(right, "dtype", None)
|
||
|
if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype):
|
||
|
left = left.astype(lattr.numpy_dtype)
|
||
|
elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype):
|
||
|
right = right.astype(rattr.numpy_dtype)
|
||
|
|
||
|
orig_assert_attr_equal(attr, left, right, obj)
|
||
|
|
||
|
|
||
|
@pytest.fixture(params=["float", "object"])
|
||
|
def dtype(request):
|
||
|
return PandasDtype(np.dtype(request.param))
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def allow_in_pandas(monkeypatch):
|
||
|
"""
|
||
|
A monkeypatch to tells pandas to let us in.
|
||
|
|
||
|
By default, passing a PandasArray to an index / series / frame
|
||
|
constructor will unbox that PandasArray to an ndarray, and treat
|
||
|
it as a non-EA column. We don't want people using EAs without
|
||
|
reason.
|
||
|
|
||
|
The mechanism for this is a check against ABCPandasArray
|
||
|
in each constructor.
|
||
|
|
||
|
But, for testing, we need to allow them in pandas. So we patch
|
||
|
the _typ of PandasArray, so that we evade the ABCPandasArray
|
||
|
check.
|
||
|
"""
|
||
|
with monkeypatch.context() as m:
|
||
|
m.setattr(PandasArray, "_typ", "extension")
|
||
|
m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
|
||
|
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
|
||
|
yield
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def data(allow_in_pandas, dtype):
|
||
|
if dtype.numpy_dtype == "object":
|
||
|
return pd.Series([(i,) for i in range(100)]).array
|
||
|
return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def data_missing(allow_in_pandas, dtype):
|
||
|
if dtype.numpy_dtype == "object":
|
||
|
return PandasArray(np.array([np.nan, (1,)], dtype=object))
|
||
|
return PandasArray(np.array([np.nan, 1.0]))
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def na_value():
|
||
|
return np.nan
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def na_cmp():
|
||
|
def cmp(a, b):
|
||
|
return np.isnan(a) and np.isnan(b)
|
||
|
|
||
|
return cmp
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def data_for_sorting(allow_in_pandas, dtype):
|
||
|
"""Length-3 array with a known sort order.
|
||
|
|
||
|
This should be three items [B, C, A] with
|
||
|
A < B < C
|
||
|
"""
|
||
|
if dtype.numpy_dtype == "object":
|
||
|
# Use an empty tuple for first element, then remove,
|
||
|
# to disable np.array's shape inference.
|
||
|
return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
|
||
|
return PandasArray(np.array([1, 2, 0]))
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def data_missing_for_sorting(allow_in_pandas, dtype):
|
||
|
"""Length-3 array with a known sort order.
|
||
|
|
||
|
This should be three items [B, NA, A] with
|
||
|
A < B and NA missing.
|
||
|
"""
|
||
|
if dtype.numpy_dtype == "object":
|
||
|
return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object))
|
||
|
return PandasArray(np.array([1, np.nan, 0]))
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def data_for_grouping(allow_in_pandas, dtype):
|
||
|
"""Data for factorization, grouping, and unique tests.
|
||
|
|
||
|
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||
|
|
||
|
Where A < B < C and NA is missing
|
||
|
"""
|
||
|
if dtype.numpy_dtype == "object":
|
||
|
a, b, c = (1,), (2,), (3,)
|
||
|
else:
|
||
|
a, b, c = np.arange(3)
|
||
|
return PandasArray(
|
||
|
np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
|
||
|
)
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def skip_numpy_object(dtype, request):
|
||
|
"""
|
||
|
Tests for PandasArray with nested data. Users typically won't create
|
||
|
these objects via `pd.array`, but they can show up through `.array`
|
||
|
on a Series with nested data. Many of the base tests fail, as they aren't
|
||
|
appropriate for nested data.
|
||
|
|
||
|
This fixture allows these tests to be skipped when used as a usefixtures
|
||
|
marker to either an individual test or a test class.
|
||
|
"""
|
||
|
if dtype == "object":
|
||
|
mark = pytest.mark.xfail(reason="Fails for object dtype")
|
||
|
request.node.add_marker(mark)
|
||
|
|
||
|
|
||
|
skip_nested = pytest.mark.usefixtures("skip_numpy_object")
|
||
|
|
||
|
|
||
|
class BaseNumPyTests:
|
||
|
@classmethod
|
||
|
def assert_series_equal(cls, left, right, *args, **kwargs):
|
||
|
# base class tests hard-code expected values with numpy dtypes,
|
||
|
# whereas we generally want the corresponding PandasDtype
|
||
|
if (
|
||
|
isinstance(right, pd.Series)
|
||
|
and not isinstance(right.dtype, ExtensionDtype)
|
||
|
and isinstance(left.dtype, PandasDtype)
|
||
|
):
|
||
|
right = right.astype(PandasDtype(right.dtype))
|
||
|
return tm.assert_series_equal(left, right, *args, **kwargs)
|
||
|
|
||
|
|
||
|
class TestCasting(BaseNumPyTests, base.BaseCastingTests):
|
||
|
@skip_nested
|
||
|
def test_astype_str(self, data):
|
||
|
# ValueError: setting an array element with a sequence
|
||
|
super().test_astype_str(data)
|
||
|
|
||
|
|
||
|
class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests):
|
||
|
@pytest.mark.skip(reason="We don't register our dtype")
|
||
|
# We don't want to register. This test should probably be split in two.
|
||
|
def test_from_dtype(self, data):
|
||
|
pass
|
||
|
|
||
|
@skip_nested
|
||
|
def test_series_constructor_scalar_with_index(self, data, dtype):
|
||
|
# ValueError: Length of passed values is 1, index implies 3.
|
||
|
super().test_series_constructor_scalar_with_index(data, dtype)
|
||
|
|
||
|
|
||
|
class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
|
||
|
def test_check_dtype(self, data, request):
|
||
|
if data.dtype.numpy_dtype == "object":
|
||
|
request.node.add_marker(
|
||
|
pytest.mark.xfail(
|
||
|
reason=f"PandasArray expectedly clashes with a "
|
||
|
f"NumPy name: {data.dtype.numpy_dtype}"
|
||
|
)
|
||
|
)
|
||
|
super().test_check_dtype(data)
|
||
|
|
||
|
|
||
|
class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
|
||
|
@skip_nested
|
||
|
def test_getitem_scalar(self, data):
|
||
|
# AssertionError
|
||
|
super().test_getitem_scalar(data)
|
||
|
|
||
|
|
||
|
class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
|
||
|
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
|
||
|
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
|
||
|
|
||
|
|
||
|
class TestInterface(BaseNumPyTests, base.BaseInterfaceTests):
|
||
|
@skip_nested
|
||
|
def test_array_interface(self, data):
|
||
|
# NumPy array shape inference
|
||
|
super().test_array_interface(data)
|
||
|
|
||
|
|
||
|
class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
|
||
|
@skip_nested
|
||
|
def test_shift_fill_value(self, data):
|
||
|
# np.array shape inference. Shift implementation fails.
|
||
|
super().test_shift_fill_value(data)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_fillna_copy_frame(self, data_missing):
|
||
|
# The "scalar" for this array isn't a scalar.
|
||
|
super().test_fillna_copy_frame(data_missing)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_fillna_copy_series(self, data_missing):
|
||
|
# The "scalar" for this array isn't a scalar.
|
||
|
super().test_fillna_copy_series(data_missing)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_searchsorted(self, data_for_sorting, as_series):
|
||
|
# Test setup fails.
|
||
|
super().test_searchsorted(data_for_sorting, as_series)
|
||
|
|
||
|
@pytest.mark.xfail(reason="PandasArray.diff may fail on dtype")
|
||
|
def test_diff(self, data, periods):
|
||
|
return super().test_diff(data, periods)
|
||
|
|
||
|
def test_insert(self, data, request):
|
||
|
if data.dtype.numpy_dtype == object:
|
||
|
mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate")
|
||
|
request.node.add_marker(mark)
|
||
|
|
||
|
super().test_insert(data)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_insert_invalid(self, data, invalid_scalar):
|
||
|
# PandasArray[object] can hold anything, so skip
|
||
|
super().test_insert_invalid(data, invalid_scalar)
|
||
|
|
||
|
|
||
|
class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests):
|
||
|
divmod_exc = None
|
||
|
series_scalar_exc = None
|
||
|
frame_scalar_exc = None
|
||
|
series_array_exc = None
|
||
|
|
||
|
@skip_nested
|
||
|
def test_divmod(self, data):
|
||
|
super().test_divmod(data)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_divmod_series_array(self, data):
|
||
|
ser = pd.Series(data)
|
||
|
self._check_divmod_op(ser, divmod, data, exc=None)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||
|
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||
|
|
||
|
def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
|
||
|
opname = all_arithmetic_operators
|
||
|
if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
|
||
|
mark = pytest.mark.xfail(reason="Fails for object dtype")
|
||
|
request.node.add_marker(mark)
|
||
|
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||
|
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
|
||
|
|
||
|
|
||
|
class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
|
||
|
def check_reduce(self, s, op_name, skipna):
|
||
|
result = getattr(s, op_name)(skipna=skipna)
|
||
|
# avoid coercing int -> float. Just cast to the actual numpy type.
|
||
|
expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize("skipna", [True, False])
|
||
|
def test_reduce_series(self, data, all_boolean_reductions, skipna):
|
||
|
super().test_reduce_series(data, all_boolean_reductions, skipna)
|
||
|
|
||
|
|
||
|
@skip_nested
|
||
|
class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class TestMissing(BaseNumPyTests, base.BaseMissingTests):
|
||
|
@skip_nested
|
||
|
def test_fillna_series(self, data_missing):
|
||
|
# Non-scalar "scalar" values.
|
||
|
super().test_fillna_series(data_missing)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_fillna_frame(self, data_missing):
|
||
|
# Non-scalar "scalar" values.
|
||
|
super().test_fillna_frame(data_missing)
|
||
|
|
||
|
|
||
|
class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
|
||
|
@pytest.mark.parametrize(
|
||
|
"in_frame",
|
||
|
[
|
||
|
True,
|
||
|
pytest.param(
|
||
|
False,
|
||
|
marks=pytest.mark.xfail(reason="PandasArray inconsistently extracted"),
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_concat(self, data, in_frame):
|
||
|
super().test_concat(data, in_frame)
|
||
|
|
||
|
|
||
|
class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
|
||
|
@skip_nested
|
||
|
def test_setitem_invalid(self, data, invalid_scalar):
|
||
|
# object dtype can hold anything, so doesn't raise
|
||
|
super().test_setitem_invalid(data, invalid_scalar)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||
|
# ValueError: cannot set using a list-like indexer with a different
|
||
|
# length than the value
|
||
|
super().test_setitem_sequence_broadcasts(data, box_in_series)
|
||
|
|
||
|
@skip_nested
|
||
|
@pytest.mark.parametrize("setter", ["loc", None])
|
||
|
def test_setitem_mask_broadcast(self, data, setter):
|
||
|
# ValueError: cannot set using a list-like indexer with a different
|
||
|
# length than the value
|
||
|
super().test_setitem_mask_broadcast(data, setter)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_setitem_scalar_key_sequence_raise(self, data):
|
||
|
# Failed: DID NOT RAISE <class 'ValueError'>
|
||
|
super().test_setitem_scalar_key_sequence_raise(data)
|
||
|
|
||
|
# TODO: there is some issue with PandasArray, therefore,
|
||
|
# skip the setitem test for now, and fix it later (GH 31446)
|
||
|
|
||
|
@skip_nested
|
||
|
@pytest.mark.parametrize(
|
||
|
"mask",
|
||
|
[
|
||
|
np.array([True, True, True, False, False]),
|
||
|
pd.array([True, True, True, False, False], dtype="boolean"),
|
||
|
],
|
||
|
ids=["numpy-array", "boolean-array"],
|
||
|
)
|
||
|
def test_setitem_mask(self, data, mask, box_in_series):
|
||
|
super().test_setitem_mask(data, mask, box_in_series)
|
||
|
|
||
|
def test_setitem_mask_raises(self, data, box_in_series):
|
||
|
super().test_setitem_mask_raises(data, box_in_series)
|
||
|
|
||
|
@skip_nested
|
||
|
@pytest.mark.parametrize(
|
||
|
"idx",
|
||
|
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||
|
ids=["list", "integer-array", "numpy-array"],
|
||
|
)
|
||
|
def test_setitem_integer_array(self, data, idx, box_in_series):
|
||
|
super().test_setitem_integer_array(data, idx, box_in_series)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"idx, box_in_series",
|
||
|
[
|
||
|
([0, 1, 2, pd.NA], False),
|
||
|
pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail),
|
||
|
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||
|
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||
|
],
|
||
|
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
||
|
)
|
||
|
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
||
|
super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_setitem_slice(self, data, box_in_series):
|
||
|
super().test_setitem_slice(data, box_in_series)
|
||
|
|
||
|
@skip_nested
|
||
|
def test_setitem_loc_iloc_slice(self, data):
|
||
|
super().test_setitem_loc_iloc_slice(data)
|
||
|
|
||
|
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
|
||
|
# https://github.com/pandas-dev/pandas/issues/32395
|
||
|
df = expected = pd.DataFrame({"data": pd.Series(data)})
|
||
|
result = pd.DataFrame(index=df.index)
|
||
|
|
||
|
# because result has object dtype, the attempt to do setting inplace
|
||
|
# is successful, and object dtype is retained
|
||
|
key = full_indexer(df)
|
||
|
result.loc[key, "data"] = df["data"]
|
||
|
|
||
|
# base class method has expected = df; PandasArray behaves oddly because
|
||
|
# we patch _typ for these tests.
|
||
|
if data.dtype.numpy_dtype != object:
|
||
|
if not isinstance(key, slice) or key != slice(None):
|
||
|
expected = pd.DataFrame({"data": data.to_numpy()})
|
||
|
self.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
@skip_nested
|
||
|
class TestParsing(BaseNumPyTests, base.BaseParsingTests):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class Test2DCompat(BaseNumPyTests, base.NDArrayBacked2DTests):
|
||
|
pass
|