416 lines
13 KiB
Python
416 lines
13 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
Index,
|
||
|
Interval,
|
||
|
IntervalIndex,
|
||
|
Timedelta,
|
||
|
Timestamp,
|
||
|
date_range,
|
||
|
timedelta_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core.arrays import IntervalArray
|
||
|
|
||
|
|
||
|
@pytest.fixture(
|
||
|
params=[
|
||
|
(Index([0, 2, 4]), Index([1, 3, 5])),
|
||
|
(Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])),
|
||
|
(timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)),
|
||
|
(date_range("20170101", periods=3), date_range("20170102", periods=3)),
|
||
|
(
|
||
|
date_range("20170101", periods=3, tz="US/Eastern"),
|
||
|
date_range("20170102", periods=3, tz="US/Eastern"),
|
||
|
),
|
||
|
],
|
||
|
ids=lambda x: str(x[0].dtype),
|
||
|
)
|
||
|
def left_right_dtypes(request):
|
||
|
"""
|
||
|
Fixture for building an IntervalArray from various dtypes
|
||
|
"""
|
||
|
return request.param
|
||
|
|
||
|
|
||
|
class TestAttributes:
|
||
|
@pytest.mark.parametrize(
|
||
|
"left, right",
|
||
|
[
|
||
|
(0, 1),
|
||
|
(Timedelta("0 days"), Timedelta("1 day")),
|
||
|
(Timestamp("2018-01-01"), Timestamp("2018-01-02")),
|
||
|
(
|
||
|
Timestamp("2018-01-01", tz="US/Eastern"),
|
||
|
Timestamp("2018-01-02", tz="US/Eastern"),
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex])
|
||
|
def test_is_empty(self, constructor, left, right, closed):
|
||
|
# GH27219
|
||
|
tuples = [(left, left), (left, right), np.nan]
|
||
|
expected = np.array([closed != "both", False, False])
|
||
|
result = constructor.from_tuples(tuples, closed=closed).is_empty
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestMethods:
|
||
|
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
||
|
def test_set_closed(self, closed, new_closed):
|
||
|
# GH 21670
|
||
|
array = IntervalArray.from_breaks(range(10), closed=closed)
|
||
|
result = array.set_closed(new_closed)
|
||
|
expected = IntervalArray.from_breaks(range(10), closed=new_closed)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"other",
|
||
|
[
|
||
|
Interval(0, 1, closed="right"),
|
||
|
IntervalArray.from_breaks([1, 2, 3, 4], closed="right"),
|
||
|
],
|
||
|
)
|
||
|
def test_where_raises(self, other):
|
||
|
# GH#45768 The IntervalArray methods raises; the Series method coerces
|
||
|
ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
|
||
|
mask = np.array([True, False, True])
|
||
|
match = "'value.closed' is 'right', expected 'left'."
|
||
|
with pytest.raises(ValueError, match=match):
|
||
|
ser.array._where(mask, other)
|
||
|
|
||
|
res = ser.where(mask, other=other)
|
||
|
expected = ser.astype(object).where(mask, other)
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
|
||
|
def test_shift(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502
|
||
|
a = IntervalArray.from_breaks([1, 2, 3])
|
||
|
result = a.shift()
|
||
|
# int -> float
|
||
|
expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)])
|
||
|
tm.assert_interval_array_equal(result, expected)
|
||
|
|
||
|
def test_shift_datetime(self):
|
||
|
# GH#31502, GH#31504
|
||
|
a = IntervalArray.from_breaks(date_range("2000", periods=4))
|
||
|
result = a.shift(2)
|
||
|
expected = a.take([-1, -1, 0], allow_fill=True)
|
||
|
tm.assert_interval_array_equal(result, expected)
|
||
|
|
||
|
result = a.shift(-1)
|
||
|
expected = a.take([1, 2, -1], allow_fill=True)
|
||
|
tm.assert_interval_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestSetitem:
|
||
|
def test_set_na(self, left_right_dtypes):
|
||
|
left, right = left_right_dtypes
|
||
|
left = left.copy(deep=True)
|
||
|
right = right.copy(deep=True)
|
||
|
result = IntervalArray.from_arrays(left, right)
|
||
|
|
||
|
if result.dtype.subtype.kind not in ["m", "M"]:
|
||
|
msg = "'value' should be an interval type, got <.*NaTType'> instead."
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
result[0] = pd.NaT
|
||
|
if result.dtype.subtype.kind in ["i", "u"]:
|
||
|
msg = "Cannot set float NaN to integer-backed IntervalArray"
|
||
|
# GH#45484 TypeError, not ValueError, matches what we get with
|
||
|
# non-NA un-holdable value.
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
result[0] = np.NaN
|
||
|
return
|
||
|
|
||
|
result[0] = np.nan
|
||
|
|
||
|
expected_left = Index([left._na_value] + list(left[1:]))
|
||
|
expected_right = Index([right._na_value] + list(right[1:]))
|
||
|
expected = IntervalArray.from_arrays(expected_left, expected_right)
|
||
|
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
def test_setitem_mismatched_closed(self):
|
||
|
arr = IntervalArray.from_breaks(range(4))
|
||
|
orig = arr.copy()
|
||
|
other = arr.set_closed("both")
|
||
|
|
||
|
msg = "'value.closed' is 'both', expected 'right'"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[0] = other[0]
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[:1] = other[:1]
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[:0] = other[:0]
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[:] = other[::-1]
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[:] = list(other[::-1])
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[:] = other[::-1].astype(object)
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr[:] = other[::-1].astype("category")
|
||
|
|
||
|
# empty list should be no-op
|
||
|
arr[:0] = []
|
||
|
tm.assert_interval_array_equal(arr, orig)
|
||
|
|
||
|
|
||
|
def test_repr():
|
||
|
# GH 25022
|
||
|
arr = IntervalArray.from_tuples([(0, 1), (1, 2)])
|
||
|
result = repr(arr)
|
||
|
expected = (
|
||
|
"<IntervalArray>\n"
|
||
|
"[(0, 1], (1, 2]]\n"
|
||
|
"Length: 2, dtype: interval[int64, right]"
|
||
|
)
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
class TestReductions:
|
||
|
def test_min_max_invalid_axis(self, left_right_dtypes):
|
||
|
left, right = left_right_dtypes
|
||
|
left = left.copy(deep=True)
|
||
|
right = right.copy(deep=True)
|
||
|
arr = IntervalArray.from_arrays(left, right)
|
||
|
|
||
|
msg = "`axis` must be fewer than the number of dimensions"
|
||
|
for axis in [-2, 1]:
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr.min(axis=axis)
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
arr.max(axis=axis)
|
||
|
|
||
|
msg = "'>=' not supported between"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
arr.min(axis="foo")
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
arr.max(axis="foo")
|
||
|
|
||
|
def test_min_max(self, left_right_dtypes, index_or_series_or_array):
|
||
|
# GH#44746
|
||
|
left, right = left_right_dtypes
|
||
|
left = left.copy(deep=True)
|
||
|
right = right.copy(deep=True)
|
||
|
arr = IntervalArray.from_arrays(left, right)
|
||
|
|
||
|
# The expected results below are only valid if monotonic
|
||
|
assert left.is_monotonic_increasing
|
||
|
assert Index(arr).is_monotonic_increasing
|
||
|
|
||
|
MIN = arr[0]
|
||
|
MAX = arr[-1]
|
||
|
|
||
|
indexer = np.arange(len(arr))
|
||
|
np.random.shuffle(indexer)
|
||
|
arr = arr.take(indexer)
|
||
|
|
||
|
arr_na = arr.insert(2, np.nan)
|
||
|
|
||
|
arr = index_or_series_or_array(arr)
|
||
|
arr_na = index_or_series_or_array(arr_na)
|
||
|
|
||
|
for skipna in [True, False]:
|
||
|
res = arr.min(skipna=skipna)
|
||
|
assert res == MIN
|
||
|
assert type(res) == type(MIN)
|
||
|
|
||
|
res = arr.max(skipna=skipna)
|
||
|
assert res == MAX
|
||
|
assert type(res) == type(MAX)
|
||
|
|
||
|
res = arr_na.min(skipna=False)
|
||
|
assert np.isnan(res)
|
||
|
res = arr_na.max(skipna=False)
|
||
|
assert np.isnan(res)
|
||
|
|
||
|
res = arr_na.min(skipna=True)
|
||
|
assert res == MIN
|
||
|
assert type(res) == type(MIN)
|
||
|
res = arr_na.max(skipna=True)
|
||
|
assert res == MAX
|
||
|
assert type(res) == type(MAX)
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------------
|
||
|
# Arrow interaction
|
||
|
|
||
|
|
||
|
pyarrow_skip = td.skip_if_no("pyarrow")
|
||
|
|
||
|
|
||
|
@pyarrow_skip
|
||
|
def test_arrow_extension_type():
|
||
|
import pyarrow as pa
|
||
|
|
||
|
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||
|
|
||
|
p1 = ArrowIntervalType(pa.int64(), "left")
|
||
|
p2 = ArrowIntervalType(pa.int64(), "left")
|
||
|
p3 = ArrowIntervalType(pa.int64(), "right")
|
||
|
|
||
|
assert p1.closed == "left"
|
||
|
assert p1 == p2
|
||
|
assert not p1 == p3
|
||
|
assert hash(p1) == hash(p2)
|
||
|
assert not hash(p1) == hash(p3)
|
||
|
|
||
|
|
||
|
@pyarrow_skip
|
||
|
def test_arrow_array():
|
||
|
import pyarrow as pa
|
||
|
|
||
|
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||
|
|
||
|
intervals = pd.interval_range(1, 5, freq=1).array
|
||
|
|
||
|
result = pa.array(intervals)
|
||
|
assert isinstance(result.type, ArrowIntervalType)
|
||
|
assert result.type.closed == intervals.closed
|
||
|
assert result.type.subtype == pa.int64()
|
||
|
assert result.storage.field("left").equals(pa.array([1, 2, 3, 4], type="int64"))
|
||
|
assert result.storage.field("right").equals(pa.array([2, 3, 4, 5], type="int64"))
|
||
|
|
||
|
expected = pa.array([{"left": i, "right": i + 1} for i in range(1, 5)])
|
||
|
assert result.storage.equals(expected)
|
||
|
|
||
|
# convert to its storage type
|
||
|
result = pa.array(intervals, type=expected.type)
|
||
|
assert result.equals(expected)
|
||
|
|
||
|
# unsupported conversions
|
||
|
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
|
||
|
pa.array(intervals, type="float64")
|
||
|
|
||
|
with pytest.raises(TypeError, match="different 'subtype'"):
|
||
|
pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))
|
||
|
|
||
|
|
||
|
@pyarrow_skip
|
||
|
def test_arrow_array_missing():
|
||
|
import pyarrow as pa
|
||
|
|
||
|
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||
|
|
||
|
arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
|
||
|
arr[1] = None
|
||
|
|
||
|
result = pa.array(arr)
|
||
|
assert isinstance(result.type, ArrowIntervalType)
|
||
|
assert result.type.closed == arr.closed
|
||
|
assert result.type.subtype == pa.float64()
|
||
|
|
||
|
# fields have missing values (not NaN)
|
||
|
left = pa.array([0.0, None, 2.0], type="float64")
|
||
|
right = pa.array([1.0, None, 3.0], type="float64")
|
||
|
assert result.storage.field("left").equals(left)
|
||
|
assert result.storage.field("right").equals(right)
|
||
|
|
||
|
# structarray itself also has missing values on the array level
|
||
|
vals = [
|
||
|
{"left": 0.0, "right": 1.0},
|
||
|
{"left": None, "right": None},
|
||
|
{"left": 2.0, "right": 3.0},
|
||
|
]
|
||
|
expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False]))
|
||
|
assert result.storage.equals(expected)
|
||
|
|
||
|
|
||
|
@pyarrow_skip
|
||
|
@pytest.mark.parametrize(
|
||
|
"breaks",
|
||
|
[[0.0, 1.0, 2.0, 3.0], date_range("2017", periods=4, freq="D")],
|
||
|
ids=["float", "datetime64[ns]"],
|
||
|
)
|
||
|
def test_arrow_table_roundtrip(breaks):
|
||
|
import pyarrow as pa
|
||
|
|
||
|
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||
|
|
||
|
arr = IntervalArray.from_breaks(breaks)
|
||
|
arr[1] = None
|
||
|
df = pd.DataFrame({"a": arr})
|
||
|
|
||
|
table = pa.table(df)
|
||
|
assert isinstance(table.field("a").type, ArrowIntervalType)
|
||
|
result = table.to_pandas()
|
||
|
assert isinstance(result["a"].dtype, pd.IntervalDtype)
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
table2 = pa.concat_tables([table, table])
|
||
|
result = table2.to_pandas()
|
||
|
expected = pd.concat([df, df], ignore_index=True)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# GH-41040
|
||
|
table = pa.table(
|
||
|
[pa.chunked_array([], type=table.column(0).type)], schema=table.schema
|
||
|
)
|
||
|
result = table.to_pandas()
|
||
|
tm.assert_frame_equal(result, expected[0:0])
|
||
|
|
||
|
|
||
|
@pyarrow_skip
|
||
|
@pytest.mark.parametrize(
|
||
|
"breaks",
|
||
|
[[0.0, 1.0, 2.0, 3.0], date_range("2017", periods=4, freq="D")],
|
||
|
ids=["float", "datetime64[ns]"],
|
||
|
)
|
||
|
def test_arrow_table_roundtrip_without_metadata(breaks):
|
||
|
import pyarrow as pa
|
||
|
|
||
|
arr = IntervalArray.from_breaks(breaks)
|
||
|
arr[1] = None
|
||
|
df = pd.DataFrame({"a": arr})
|
||
|
|
||
|
table = pa.table(df)
|
||
|
# remove the metadata
|
||
|
table = table.replace_schema_metadata()
|
||
|
assert table.schema.metadata is None
|
||
|
|
||
|
result = table.to_pandas()
|
||
|
assert isinstance(result["a"].dtype, pd.IntervalDtype)
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
|
||
|
@pyarrow_skip
|
||
|
def test_from_arrow_from_raw_struct_array():
|
||
|
# in case pyarrow lost the Interval extension type (eg on parquet roundtrip
|
||
|
# with datetime64[ns] subtype, see GH-45881), still allow conversion
|
||
|
# from arrow to IntervalArray
|
||
|
import pyarrow as pa
|
||
|
|
||
|
arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}])
|
||
|
dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither")
|
||
|
|
||
|
result = dtype.__from_arrow__(arr)
|
||
|
expected = IntervalArray.from_breaks(
|
||
|
np.array([0, 1, 2], dtype="int64"), closed="neither"
|
||
|
)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"])
|
||
|
def test_interval_index_subtype(timezone, inclusive_endpoints_fixture):
|
||
|
# GH 46999
|
||
|
dates = date_range("2022", periods=3, tz=timezone)
|
||
|
dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]"
|
||
|
result = IntervalIndex.from_arrays(
|
||
|
["2022-01-01", "2022-01-02"],
|
||
|
["2022-01-02", "2022-01-03"],
|
||
|
closed=inclusive_endpoints_fixture,
|
||
|
dtype=dtype,
|
||
|
)
|
||
|
expected = IntervalIndex.from_arrays(
|
||
|
dates[:-1], dates[1:], closed=inclusive_endpoints_fixture
|
||
|
)
|
||
|
tm.assert_index_equal(result, expected)
|