aoc-2022/venv/Lib/site-packages/pandas/tests/indexes/test_indexing.py

374 lines
12 KiB
Python
Raw Normal View History

"""
test_indexing tests the following Index methods:
__getitem__
get_loc
get_value
__contains__
take
where
get_indexer
get_indexer_for
slice_locs
asof_locs
The corresponding tests.indexes.[index_type].test_indexing files
contain tests for the corresponding methods specific to those Index subclasses.
"""
import numpy as np
import pytest
from pandas.errors import InvalidIndexError
from pandas import (
NA,
DatetimeIndex,
Index,
IntervalIndex,
MultiIndex,
NaT,
PeriodIndex,
RangeIndex,
Series,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.api import (
Float64Index,
Int64Index,
UInt64Index,
)
class TestTake:
def test_take_invalid_kwargs(self, index):
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
index.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
index.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
index.take(indices, mode="clip")
def test_take(self, index):
indexer = [4, 3, 0, 2]
if len(index) < 5:
# not enough elements; ignore
return
result = index.take(indexer)
expected = index[indexer]
assert result.equals(expected)
if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
# GH 10791
msg = r"'(.*Index)' object has no attribute 'freq'"
with pytest.raises(AttributeError, match=msg):
index.freq
def test_take_indexer_type(self):
# GH#42875
integer_index = Index([0, 1, 2, 3])
scalar_index = 1
msg = "Expected indices to be array-like"
with pytest.raises(TypeError, match=msg):
integer_index.take(scalar_index)
def test_take_minus1_without_fill(self, index):
# -1 does not get treated as NA unless allow_fill=True is passed
if len(index) == 0:
# Test is not applicable
return
result = index.take([0, 0, -1])
expected = index.take([0, 0, len(index) - 1])
tm.assert_index_equal(result, expected)
class TestContains:
@pytest.mark.parametrize(
"index,val",
[
(Index([0, 1, 2]), 2),
(Index([0, 1, "2"]), "2"),
(Index([0, 1, 2, np.inf, 4]), 4),
(Index([0, 1, 2, np.nan, 4]), 4),
(Index([0, 1, 2, np.inf]), np.inf),
(Index([0, 1, 2, np.nan]), np.nan),
],
)
def test_index_contains(self, index, val):
assert val in index
@pytest.mark.parametrize(
"index,val",
[
(Index([0, 1, 2]), "2"),
(Index([0, 1, "2"]), 2),
(Index([0, 1, 2, np.inf]), 4),
(Index([0, 1, 2, np.nan]), 4),
(Index([0, 1, 2, np.inf]), np.nan),
(Index([0, 1, 2, np.nan]), np.inf),
# Checking if np.inf in Int64Index should not cause an OverflowError
# Related to GH 16957
(Int64Index([0, 1, 2]), np.inf),
(Int64Index([0, 1, 2]), np.nan),
(UInt64Index([0, 1, 2]), np.inf),
(UInt64Index([0, 1, 2]), np.nan),
],
)
def test_index_not_contains(self, index, val):
assert val not in index
@pytest.mark.parametrize(
"index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")]
)
def test_mixed_index_contains(self, index, val):
# GH#19860
assert val in index
@pytest.mark.parametrize(
"index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)]
)
def test_mixed_index_not_contains(self, index, val):
# GH#19860
assert val not in index
def test_contains_with_float_index(self):
# GH#22085
integer_index = Int64Index([0, 1, 2, 3])
uinteger_index = UInt64Index([0, 1, 2, 3])
float_index = Float64Index([0.1, 1.1, 2.2, 3.3])
for index in (integer_index, uinteger_index):
assert 1.1 not in index
assert 1.0 in index
assert 1 in index
assert 1.1 in float_index
assert 1.0 not in float_index
assert 1 not in float_index
def test_contains_requires_hashable_raises(self, index):
if isinstance(index, MultiIndex):
return # TODO: do we want this to raise?
msg = "unhashable type: 'list'"
with pytest.raises(TypeError, match=msg):
[] in index
msg = "|".join(
[
r"unhashable type: 'dict'",
r"must be real number, not dict",
r"an integer is required",
r"\{\}",
r"pandas\._libs\.interval\.IntervalTree' is not iterable",
]
)
with pytest.raises(TypeError, match=msg):
{} in index._engine
class TestGetValue:
@pytest.mark.parametrize(
"index", ["string", "int", "datetime", "timedelta"], indirect=True
)
def test_get_value(self, index):
# TODO(2.0): can remove once get_value deprecation is enforced GH#19728
values = np.random.randn(100)
value = index[67]
with pytest.raises(AttributeError, match="has no attribute '_values'"):
# Index.get_value requires a Series, not an ndarray
with tm.assert_produces_warning(FutureWarning):
index.get_value(values, value)
with tm.assert_produces_warning(FutureWarning):
result = index.get_value(Series(values, index=values), value)
tm.assert_almost_equal(result, values[67])
class TestGetLoc:
def test_get_loc_non_hashable(self, index):
# MultiIndex and Index raise TypeError, others InvalidIndexError
with pytest.raises((TypeError, InvalidIndexError), match="slice"):
index.get_loc(slice(0, 1))
def test_get_loc_generator(self, index):
exc = KeyError
if isinstance(
index,
(
DatetimeIndex,
TimedeltaIndex,
PeriodIndex,
RangeIndex,
IntervalIndex,
MultiIndex,
),
):
# TODO: make these more consistent?
exc = InvalidIndexError
with pytest.raises(exc, match="generator object"):
# MultiIndex specifically checks for generator; others for scalar
index.get_loc(x for x in range(5))
def test_get_loc_masked_duplicated_na(self):
# GH#48411
idx = Index([1, 2, NA, NA], dtype="Int64")
result = idx.get_loc(NA)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
class TestGetIndexer:
def test_get_indexer_base(self, index):
if index._index_as_unique:
expected = np.arange(index.size, dtype=np.intp)
actual = index.get_indexer(index)
tm.assert_numpy_array_equal(expected, actual)
else:
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
index.get_indexer(index)
with pytest.raises(ValueError, match="Invalid fill method"):
index.get_indexer(index, method="invalid")
def test_get_indexer_consistency(self, index):
# See GH#16819
if index._index_as_unique:
indexer = index.get_indexer(index[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
else:
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
index.get_indexer(index[0:2])
indexer, _ = index.get_indexer_non_unique(index[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
def test_get_indexer_masked_duplicated_na(self):
# GH#48411
idx = Index([1, 2, NA, NA], dtype="Int64")
result = idx.get_indexer_for(Index([1, NA], dtype="Int64"))
expected = np.array([0, 2, 3], dtype=result.dtype)
tm.assert_numpy_array_equal(result, expected)
class TestConvertSliceIndexer:
def test_convert_almost_null_slice(self, index):
# slice with None at both ends, but not step
key = slice(None, None, "foo")
if isinstance(index, IntervalIndex):
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
with pytest.raises(ValueError, match=msg):
index._convert_slice_indexer(key, "loc")
else:
msg = "'>=' not supported between instances of 'str' and 'int'"
with pytest.raises(TypeError, match=msg):
index._convert_slice_indexer(key, "loc")
class TestPutmask:
def test_putmask_with_wrong_mask(self, index):
# GH#18368
if not len(index):
return
fill = index[0]
msg = "putmask: mask and data must be the same size"
with pytest.raises(ValueError, match=msg):
index.putmask(np.ones(len(index) + 1, np.bool_), fill)
with pytest.raises(ValueError, match=msg):
index.putmask(np.ones(len(index) - 1, np.bool_), fill)
with pytest.raises(ValueError, match=msg):
index.putmask("foo", fill)
@pytest.mark.parametrize(
"idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])]
)
def test_getitem_deprecated_float(idx):
# https://github.com/pandas-dev/pandas/issues/34191
with tm.assert_produces_warning(FutureWarning):
result = idx[1.0]
expected = idx[1]
assert result == expected
def test_maybe_cast_slice_bound_kind_deprecated(index):
if not len(index):
return
with tm.assert_produces_warning(FutureWarning):
# passed as keyword
index._maybe_cast_slice_bound(index[0], "left", kind="loc")
with tm.assert_produces_warning(FutureWarning):
# pass as positional
index._maybe_cast_slice_bound(index[0], "left", "loc")
@pytest.mark.parametrize(
"idx,target,expected",
[
([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)),
(
[np.nan, "var1", np.nan],
[np.nan, "var1"],
np.array([0, 2, 1], dtype=np.intp),
),
(
np.array([np.nan, "var1", np.nan], dtype=object),
[np.nan],
np.array([0, 2], dtype=np.intp),
),
(
DatetimeIndex(["2020-08-05", NaT, NaT]),
[NaT],
np.array([1, 2], dtype=np.intp),
),
(["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)),
(
np.array(["b", np.nan, float("NaN"), "b"], dtype=object),
Index([np.nan], dtype=object),
np.array([1, 2], dtype=np.intp),
),
],
)
def test_get_indexer_non_unique_multiple_nans(idx, target, expected):
# GH 35392
axis = Index(idx)
actual = axis.get_indexer_for(target)
tm.assert_numpy_array_equal(actual, expected)
def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture):
idx = Index([1.0, 2.0])
target = Index([1, nulls_fixture], dtype="object")
result_idx, result_missing = idx.get_indexer_non_unique(target)
tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp))
tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp))