import numpy as np import pytest from pandas.errors import InvalidIndexError from pandas import ( Index, RangeIndex, Series, Timestamp, ) import pandas._testing as tm from pandas.core.indexes.api import ( Float64Index, Int64Index, UInt64Index, ) @pytest.fixture def index_large(): # large values used in UInt64Index tests where no compat needed with Int64/Float64 large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] return UInt64Index(large) class TestGetLoc: @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) def test_get_loc(self, method): index = Index([0, 1, 2]) warn = None if method is None else FutureWarning with tm.assert_produces_warning(warn, match="deprecated"): assert index.get_loc(1, method=method) == 1 if method: with tm.assert_produces_warning(warn, match="deprecated"): assert index.get_loc(1, method=method, tolerance=0) == 1 @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc_raises_bad_label(self, method): index = Index([0, 1, 2]) if method: msg = "not supported between" err = TypeError else: msg = r"\[1, 2\]" err = InvalidIndexError with pytest.raises(err, match=msg): index.get_loc([1, 2], method=method) @pytest.mark.parametrize( "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] ) @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc_tolerance(self, method, loc): index = Index([0, 1, 2]) assert index.get_loc(1.1, method) == loc assert index.get_loc(1.1, method, tolerance=1) == loc @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) def test_get_loc_outside_tolerance_raises(self, method): index = Index([0, 1, 2]) with pytest.raises(KeyError, match="1.1"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): index.get_loc(1.1, method, tolerance=0.05) def test_get_loc_bad_tolerance_raises(self): index = Index([0, 1, 2]) with pytest.raises(ValueError, match="must be numeric"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): index.get_loc(1.1, "nearest", tolerance="invalid") def test_get_loc_tolerance_no_method_raises(self): index = Index([0, 1, 2]) with pytest.raises(ValueError, match="tolerance .* valid if"): index.get_loc(1.1, tolerance=1) def test_get_loc_raises_missized_tolerance(self): index = Index([0, 1, 2]) with pytest.raises(ValueError, match="tolerance size must match"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): index.get_loc(1.1, "nearest", tolerance=[1, 1]) @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc_float64(self): idx = Float64Index([0.0, 1.0, 2.0]) for method in [None, "pad", "backfill", "nearest"]: assert idx.get_loc(1, method) == 1 if method is not None: assert idx.get_loc(1, method, tolerance=0) == 1 for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: assert idx.get_loc(1.1, method) == loc assert idx.get_loc(1.1, method, tolerance=0.9) == loc with pytest.raises(KeyError, match="^'foo'$"): idx.get_loc("foo") with pytest.raises(KeyError, match=r"^1\.5$"): idx.get_loc(1.5) with pytest.raises(KeyError, match=r"^1\.5$"): idx.get_loc(1.5, method="pad", tolerance=0.1) with pytest.raises(KeyError, match="^True$"): idx.get_loc(True) with pytest.raises(KeyError, match="^False$"): idx.get_loc(False) with pytest.raises(ValueError, match="must be numeric"): idx.get_loc(1.4, method="nearest", tolerance="foo") with pytest.raises(ValueError, match="must contain numeric elements"): idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"])) with pytest.raises( ValueError, match="tolerance size must match target index size" ): idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2])) def test_get_loc_na(self): idx = Float64Index([np.nan, 1, 2]) assert idx.get_loc(1) == 1 assert idx.get_loc(np.nan) == 0 idx = Float64Index([np.nan, 1, np.nan]) assert idx.get_loc(1) == 1 # representable by slice [0:2:2] msg = "'Cannot get left slice bound for non-unique label: nan'" with pytest.raises(KeyError, match=msg): idx.slice_locs(np.nan) # not representable by slice idx = Float64Index([np.nan, 1, np.nan, np.nan]) assert idx.get_loc(1) == 1 msg = "'Cannot get left slice bound for non-unique label: nan" with pytest.raises(KeyError, match=msg): idx.slice_locs(np.nan) def test_get_loc_missing_nan(self): # GH#8569 idx = Float64Index([1, 2]) assert idx.get_loc(1) == 0 with pytest.raises(KeyError, match=r"^3$"): idx.get_loc(3) with pytest.raises(KeyError, match="^nan$"): idx.get_loc(np.nan) with pytest.raises(InvalidIndexError, match=r"\[nan\]"): # listlike/non-hashable raises TypeError idx.get_loc([np.nan]) @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) @pytest.mark.parametrize("method", ["nearest", "pad", "backfill"]) def test_get_loc_float_index_nan_with_method(self, vals, method): # GH#39382 idx = Index(vals) with pytest.raises(KeyError, match="nan"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): idx.get_loc(np.nan, method=method) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_get_loc_numericindex_none_raises(self, dtype): # case that goes through searchsorted and key is non-comparable to values arr = np.arange(10**7, dtype=dtype) idx = Index(arr) with pytest.raises(KeyError, match="None"): idx.get_loc(None) def test_get_loc_overflows(self): # unique but non-monotonic goes through IndexEngine.mapping.get_item idx = Index([0, 2, 1]) val = np.iinfo(np.int64).max + 1 with pytest.raises(KeyError, match=str(val)): idx.get_loc(val) with pytest.raises(KeyError, match=str(val)): idx._engine.get_loc(val) class TestGetIndexer: def test_get_indexer(self): index1 = Index([1, 2, 3, 4, 5]) index2 = Index([2, 4, 6]) r1 = index1.get_indexer(index2) e1 = np.array([1, 3, -1], dtype=np.intp) tm.assert_almost_equal(r1, e1) @pytest.mark.parametrize("reverse", [True, False]) @pytest.mark.parametrize( "expected,method", [ (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), ], ) def test_get_indexer_methods(self, reverse, expected, method): index1 = Index([1, 2, 3, 4, 5]) index2 = Index([2, 4, 6]) if reverse: index1 = index1[::-1] expected = expected[::-1] result = index2.get_indexer(index1, method=method) tm.assert_almost_equal(result, expected) def test_get_indexer_invalid(self): # GH10411 index = Index(np.arange(10)) with pytest.raises(ValueError, match="tolerance argument"): index.get_indexer([1, 0], tolerance=1) with pytest.raises(ValueError, match="limit argument"): index.get_indexer([1, 0], limit=1) @pytest.mark.parametrize( "method, tolerance, indexer, expected", [ ("pad", None, [0, 5, 9], [0, 5, 9]), ("backfill", None, [0, 5, 9], [0, 5, 9]), ("nearest", None, [0, 5, 9], [0, 5, 9]), ("pad", 0, [0, 5, 9], [0, 5, 9]), ("backfill", 0, [0, 5, 9], [0, 5, 9]), ("nearest", 0, [0, 5, 9], [0, 5, 9]), ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]), ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]), ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]), ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]), ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]), ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]), ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]), ], ) def test_get_indexer_nearest(self, method, tolerance, indexer, expected): index = Index(np.arange(10)) actual = index.get_indexer(indexer, method=method, tolerance=tolerance) tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) @pytest.mark.parametrize( "tolerance, expected", list( zip( [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], ) ), ) def test_get_indexer_nearest_listlike_tolerance( self, tolerance, expected, listtype ): index = Index(np.arange(10)) actual = index.get_indexer( [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance) ) tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) def test_get_indexer_nearest_error(self): index = Index(np.arange(10)) with pytest.raises(ValueError, match="limit argument"): index.get_indexer([1, 0], method="nearest", limit=1) with pytest.raises(ValueError, match="tolerance size must match"): index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3]) @pytest.mark.parametrize( "method,expected", [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])], ) def test_get_indexer_nearest_decreasing(self, method, expected): index = Index(np.arange(10))[::-1] actual = index.get_indexer([0, 5, 9], method=method) tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) actual = index.get_indexer([0.2, 1.8, 8.5], method=method) tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) @pytest.mark.parametrize( "idx_class", [Int64Index, RangeIndex, Float64Index, UInt64Index] ) @pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"]) def test_get_indexer_numeric_index_boolean_target(self, method, idx_class): # GH 16877 numeric_index = idx_class(RangeIndex(4)) other = Index([True, False, True]) result = getattr(numeric_index, method)(other) expected = np.array([-1, -1, -1], dtype=np.intp) if method == "get_indexer": tm.assert_numpy_array_equal(result, expected) else: missing = np.arange(3, dtype=np.intp) tm.assert_numpy_array_equal(result[0], expected) tm.assert_numpy_array_equal(result[1], missing) @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) def test_get_indexer_with_method_numeric_vs_bool(self, method): left = Index([1, 2, 3]) right = Index([True, False]) with pytest.raises(TypeError, match="Cannot compare"): left.get_indexer(right, method=method) with pytest.raises(TypeError, match="Cannot compare"): right.get_indexer(left, method=method) def test_get_indexer_numeric_vs_bool(self): left = Index([1, 2, 3]) right = Index([True, False]) res = left.get_indexer(right) expected = -1 * np.ones(len(right), dtype=np.intp) tm.assert_numpy_array_equal(res, expected) res = right.get_indexer(left) expected = -1 * np.ones(len(left), dtype=np.intp) tm.assert_numpy_array_equal(res, expected) res = left.get_indexer_non_unique(right)[0] expected = -1 * np.ones(len(right), dtype=np.intp) tm.assert_numpy_array_equal(res, expected) res = right.get_indexer_non_unique(left)[0] expected = -1 * np.ones(len(left), dtype=np.intp) tm.assert_numpy_array_equal(res, expected) def test_get_indexer_float64(self): idx = Float64Index([0.0, 1.0, 2.0]) tm.assert_numpy_array_equal( idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) ) target = [-0.1, 0.5, 1.1] tm.assert_numpy_array_equal( idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) ) tm.assert_numpy_array_equal( idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) ) tm.assert_numpy_array_equal( idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) ) def test_get_indexer_nan(self): # GH#7820 result = Float64Index([1, 2, np.nan]).get_indexer([np.nan]) expected = np.array([2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) def test_get_indexer_int64(self): index = Int64Index(range(0, 20, 2)) target = Int64Index(np.arange(10)) indexer = index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) target = Int64Index(np.arange(10)) indexer = index.get_indexer(target, method="pad") expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) target = Int64Index(np.arange(10)) indexer = index.get_indexer(target, method="backfill") expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_uint64(self, index_large): target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63) indexer = index_large.get_indexer(target) expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63) indexer = index_large.get_indexer(target, method="pad") expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63) indexer = index_large.get_indexer(target, method="backfill") expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) class TestWhere: @pytest.mark.parametrize( "index", [ Float64Index(np.arange(5, dtype="float64")), Int64Index(range(0, 20, 2)), UInt64Index(np.arange(5, dtype="uint64")), ], ) def test_where(self, listlike_box, index): cond = [True] * len(index) expected = index result = index.where(listlike_box(cond)) cond = [False] + [True] * (len(index) - 1) expected = Float64Index([index._na_value] + index[1:].tolist()) result = index.where(listlike_box(cond)) tm.assert_index_equal(result, expected) def test_where_uint64(self): idx = UInt64Index([0, 6, 2]) mask = np.array([False, True, False]) other = np.array([1], dtype=np.int64) expected = UInt64Index([1, 6, 1]) result = idx.where(mask, other) tm.assert_index_equal(result, expected) result = idx.putmask(~mask, other) tm.assert_index_equal(result, expected) def test_where_infers_type_instead_of_trying_to_convert_string_to_float(self): # GH 32413 index = Index([1, np.nan]) cond = index.notna() other = Index(["a", "b"], dtype="string") expected = Index([1.0, "b"]) result = index.where(cond, other) tm.assert_index_equal(result, expected) class TestTake: @pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index]) def test_take_preserve_name(self, klass): index = klass([1, 2, 3, 4], name="foo") taken = index.take([3, 0, 1]) assert index.name == taken.name def test_take_fill_value_float64(self): # GH 12631 idx = Float64Index([1.0, 2.0, 3.0], name="xxx") result = idx.take(np.array([1, 0, -1])) expected = Float64Index([2.0, 1.0, 3.0], name="xxx") tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = Float64Index([2.0, 1.0, np.nan], name="xxx") tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = Float64Index([2.0, 1.0, 3.0], name="xxx") tm.assert_index_equal(result, expected) msg = ( "When allow_fill=True and fill_value is not None, " "all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) msg = "index -5 is out of bounds for (axis 0 with )?size 3" with pytest.raises(IndexError, match=msg): idx.take(np.array([1, -5])) @pytest.mark.parametrize("klass", [Int64Index, UInt64Index]) def test_take_fill_value_ints(self, klass): # see gh-12631 idx = klass([1, 2, 3], name="xxx") result = idx.take(np.array([1, 0, -1])) expected = klass([2, 1, 3], name="xxx") tm.assert_index_equal(result, expected) name = klass.__name__ msg = f"Unable to fill values because {name} cannot contain NA" # fill_value=True with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = klass([2, 1, 3], name="xxx") tm.assert_index_equal(result, expected) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) msg = "index -5 is out of bounds for (axis 0 with )?size 3" with pytest.raises(IndexError, match=msg): idx.take(np.array([1, -5])) class TestContains: @pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index]) def test_contains_none(self, klass): # GH#35788 should return False, not raise TypeError index = klass([0, 1, 2, 3, 4]) assert None not in index def test_contains_float64_nans(self): index = Float64Index([1.0, 2.0, np.nan]) assert np.nan in index def test_contains_float64_not_nans(self): index = Float64Index([1.0, 2.0, np.nan]) assert 1.0 in index class TestSliceLocs: @pytest.mark.parametrize("dtype", [int, float]) def test_slice_locs(self, dtype): index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) n = len(index) assert index.slice_locs(start=2) == (2, n) assert index.slice_locs(start=3) == (3, n) assert index.slice_locs(3, 8) == (3, 6) assert index.slice_locs(5, 10) == (3, n) assert index.slice_locs(end=8) == (0, 6) assert index.slice_locs(end=9) == (0, 7) # reversed index2 = index[::-1] assert index2.slice_locs(8, 2) == (2, 6) assert index2.slice_locs(7, 3) == (2, 5) @pytest.mark.parametrize("dtype", [int, float]) def test_slice_locs_float_locs(self, dtype): index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) n = len(index) assert index.slice_locs(5.0, 10.0) == (3, n) assert index.slice_locs(4.5, 10.5) == (3, 8) index2 = index[::-1] assert index2.slice_locs(8.5, 1.5) == (2, 6) assert index2.slice_locs(10.5, -1) == (0, n) @pytest.mark.parametrize("dtype", [int, float]) def test_slice_locs_dup_numeric(self, dtype): index = Index(np.array([10, 12, 12, 14], dtype=dtype)) assert index.slice_locs(12, 12) == (1, 3) assert index.slice_locs(11, 13) == (1, 3) index2 = index[::-1] assert index2.slice_locs(12, 12) == (1, 3) assert index2.slice_locs(13, 11) == (1, 3) def test_slice_locs_na(self): index = Index([np.nan, 1, 2]) assert index.slice_locs(1) == (1, 3) assert index.slice_locs(np.nan) == (0, 3) index = Index([0, np.nan, np.nan, 1, 2]) assert index.slice_locs(np.nan) == (1, 5) def test_slice_locs_na_raises(self): index = Index([np.nan, 1, 2]) with pytest.raises(KeyError, match=""): index.slice_locs(start=1.5) with pytest.raises(KeyError, match=""): index.slice_locs(end=1.5) class TestGetSliceBounds: @pytest.mark.parametrize("kind", ["getitem", "loc", None]) @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) def test_get_slice_bounds_within(self, kind, side, expected): index = Index(range(6)) with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): result = index.get_slice_bound(4, kind=kind, side=side) assert result == expected @pytest.mark.parametrize("kind", ["getitem", "loc", None]) @pytest.mark.parametrize("side", ["left", "right"]) @pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)]) def test_get_slice_bounds_outside(self, kind, side, expected, bound): index = Index(range(6)) with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): result = index.get_slice_bound(bound, kind=kind, side=side) assert result == expected