from datetime import timedelta import numpy as np import pytest from pandas._libs import iNaT import pandas as pd from pandas import ( Categorical, Index, NaT, Series, isna, ) import pandas._testing as tm class TestSeriesMissingData: def test_categorical_nan_handling(self): # NaNs are represented as -1 in labels s = Series(Categorical(["a", "b", np.nan, "a"])) tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) tm.assert_numpy_array_equal( s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8) ) def test_isna_for_inf(self): s = Series(["a", np.inf, np.nan, pd.NA, 1.0]) with pd.option_context("mode.use_inf_as_na", True): r = s.isna() dr = s.dropna() e = Series([False, True, True, True, False]) de = Series(["a", 1.0], index=[0, 4]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) @pytest.mark.parametrize( "method, expected", [ ["isna", Series([False, True, True, False])], ["dropna", Series(["a", 1.0], index=[0, 3])], ], ) def test_isnull_for_inf_deprecated(self, method, expected): # gh-17115 s = Series(["a", np.inf, np.nan, 1.0]) with pd.option_context("mode.use_inf_as_null", True): result = getattr(s, method)() tm.assert_series_equal(result, expected) def test_timedelta64_nan(self): td = Series([timedelta(days=i) for i in range(10)]) # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan assert isna(td1[0]) assert td1[0].value == iNaT td1[0] = td[0] assert not isna(td1[0]) # GH#16674 iNaT is treated as an integer when given by the user td1[1] = iNaT assert not isna(td1[1]) assert td1.dtype == np.object_ assert td1[1] == iNaT td1[1] = td[1] assert not isna(td1[1]) td1[2] = NaT assert isna(td1[2]) assert td1[2].value == iNaT td1[2] = td[2] assert not isna(td1[2]) # boolean setting # GH#2899 boolean setting td3 = np.timedelta64(timedelta(days=3)) td7 = np.timedelta64(timedelta(days=7)) td[(td > td3) & (td < td7)] = np.nan assert isna(td).sum() == 3 @pytest.mark.xfail( reason="Chained inequality raises when trying to define 'selector'" ) def test_logical_range_select(self, datetime_series): # NumPy limitation =( # https://github.com/pandas-dev/pandas/commit/9030dc021f07c76809848925cb34828f6c8484f3 np.random.seed(12345) selector = -0.5 <= datetime_series <= 0.5 expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) tm.assert_series_equal(selector, expected) def test_valid(self, datetime_series): ts = datetime_series.copy() ts.index = ts.index._with_freq(None) ts[::2] = np.NaN result = ts.dropna() assert len(result) == ts.count() tm.assert_series_equal(result, ts[1::2]) tm.assert_series_equal(result, ts[pd.notna(ts)]) def test_hasnans_uncached_for_series(): # GH#19700 idx = Index([0, 1]) assert idx.hasnans is False assert "hasnans" in idx._cache ser = idx.to_series() assert ser.hasnans is False assert not hasattr(ser, "_cache") ser.iloc[-1] = np.nan assert ser.hasnans is True assert Series.hasnans.__doc__ == Index.hasnans.__doc__