aoc-2022/venv/Lib/site-packages/pandas/tests/frame/test_reductions.py

from datetime import timedelta
from decimal import Decimal
import inspect
import re

from dateutil.tz import tzlocal
import numpy as np
import pytest

from pandas._libs import lib
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_categorical_dtype

import pandas as pd
from pandas import (
    Categorical,
    DataFrame,
    Index,
    MultiIndex,
    Series,
    Timestamp,
    date_range,
    isna,
    notna,
    to_datetime,
    to_timedelta,
)
import pandas._testing as tm
import pandas.core.algorithms as algorithms
import pandas.core.nanops as nanops


def assert_stat_op_calc(
    opname,
    alternative,
    frame,
    has_skipna=True,
    check_dtype=True,
    check_dates=False,
    rtol=1e-5,
    atol=1e-8,
    skipna_alternative=None,
):
    """
    Check that operator opname works as advertised on frame

    Parameters
    ----------
    opname : str
        Name of the operator to test on frame
    alternative : function
        Function that opname is tested against; i.e. "frame.opname()" should
        equal "alternative(frame)".
    frame : DataFrame
        The object that the tests are executed on
    has_skipna : bool, default True
        Whether the method "opname" has the kwarg "skip_na"
    check_dtype : bool, default True
        Whether the dtypes of the result of "frame.opname()" and
        "alternative(frame)" should be checked.
    check_dates : bool, default false
        Whether opname should be tested on a Datetime Series
    rtol : float, default 1e-5
        Relative tolerance.
    atol : float, default 1e-8
        Absolute tolerance.
    skipna_alternative : function, default None
        NaN-safe version of alternative
    """
    warn = FutureWarning if opname == "mad" else None
    f = getattr(frame, opname)

    if check_dates:
        expected_warning = FutureWarning if opname in ["mean", "median"] else None
        df = DataFrame({"b": date_range("1/1/2001", periods=2)})
        with tm.assert_produces_warning(expected_warning):
            result = getattr(df, opname)()
        assert isinstance(result, Series)

        df["a"] = range(len(df))
        with tm.assert_produces_warning(expected_warning):
            result = getattr(df, opname)()
        assert isinstance(result, Series)
        assert len(result)

    if has_skipna:

        def wrapper(x):
            return alternative(x.values)

        skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative)
        with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"):
            result0 = f(axis=0, skipna=False)
            result1 = f(axis=1, skipna=False)
        tm.assert_series_equal(
            result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol
        )
        tm.assert_series_equal(
            result1,
            frame.apply(wrapper, axis=1),
            rtol=rtol,
            atol=atol,
        )
    else:
        skipna_wrapper = alternative

    with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"):
        result0 = f(axis=0)
        result1 = f(axis=1)
    tm.assert_series_equal(
        result0,
        frame.apply(skipna_wrapper),
        check_dtype=check_dtype,
        rtol=rtol,
        atol=atol,
    )

    if opname in ["sum", "prod"]:
        expected = frame.apply(skipna_wrapper, axis=1)
        tm.assert_series_equal(
            result1, expected, check_dtype=False, rtol=rtol, atol=atol
        )

    # check dtypes
    if check_dtype:
        lcd_dtype = frame.values.dtype
        assert lcd_dtype == result0.dtype
        assert lcd_dtype == result1.dtype

    # bad axis
    with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"):
        with pytest.raises(ValueError, match="No axis named 2"):
            f(axis=2)

    # all NA case
    if has_skipna:
        all_na = frame * np.NaN
        with tm.assert_produces_warning(
            warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False
        ):
            r0 = getattr(all_na, opname)(axis=0)
            r1 = getattr(all_na, opname)(axis=1)
        if opname in ["sum", "prod"]:
            unit = 1 if opname == "prod" else 0  # result for empty sum/prod
            expected = Series(unit, index=r0.index, dtype=r0.dtype)
            tm.assert_series_equal(r0, expected)
            expected = Series(unit, index=r1.index, dtype=r1.dtype)
            tm.assert_series_equal(r1, expected)


class TestDataFrameAnalytics:

    # ---------------------------------------------------------------------
    # Reductions
    @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning")
    @pytest.mark.parametrize("axis", [0, 1])
    @pytest.mark.parametrize(
        "opname",
        [
            "count",
            "sum",
            "mean",
            "product",
            "median",
            "min",
            "max",
            "nunique",
            "mad",
            "var",
            "std",
            "sem",
            pytest.param("skew", marks=td.skip_if_no_scipy),
            pytest.param("kurt", marks=td.skip_if_no_scipy),
        ],
    )
    def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname):
        warn = FutureWarning if opname == "mad" else None
        with tm.assert_produces_warning(
            warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False
        ):
            getattr(float_string_frame, opname)(axis=axis)
            if opname not in ("nunique", "mad"):
                getattr(float_string_frame, opname)(axis=axis, numeric_only=True)

    @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning")
    @pytest.mark.parametrize("axis", [0, 1])
    @pytest.mark.parametrize(
        "opname",
        [
            "count",
            "sum",
            "mean",
            "product",
            "median",
            "min",
            "max",
            "var",
            "std",
            "sem",
            pytest.param("skew", marks=td.skip_if_no_scipy),
            pytest.param("kurt", marks=td.skip_if_no_scipy),
        ],
    )
    def test_stat_op_api_float_frame(self, float_frame, axis, opname):
        getattr(float_frame, opname)(axis=axis, numeric_only=False)

    def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame):
        def count(s):
            return notna(s).sum()

        def nunique(s):
            return len(algorithms.unique1d(s.dropna()))

        def mad(x):
            return np.abs(x - x.mean()).mean()

        def var(x):
            return np.var(x, ddof=1)

        def std(x):
            return np.std(x, ddof=1)

        def sem(x):
            return np.std(x, ddof=1) / np.sqrt(len(x))

        assert_stat_op_calc(
            "nunique",
            nunique,
            float_frame_with_na,
            has_skipna=False,
            check_dtype=False,
            check_dates=True,
        )

        # GH#32571 check_less_precise is needed on apparently-random
        #  py37-npdev builds and OSX-PY36-min_version builds
        # mixed types (with upcasting happening)
        assert_stat_op_calc(
            "sum",
            np.sum,
            mixed_float_frame.astype("float32"),
            check_dtype=False,
            rtol=1e-3,
        )

        assert_stat_op_calc(
            "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum
        )
        assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True)
        assert_stat_op_calc(
            "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod
        )

        assert_stat_op_calc("mad", mad, float_frame_with_na)
        assert_stat_op_calc("var", var, float_frame_with_na)
        assert_stat_op_calc("std", std, float_frame_with_na)
        assert_stat_op_calc("sem", sem, float_frame_with_na)

        assert_stat_op_calc(
            "count",
            count,
            float_frame_with_na,
            has_skipna=False,
            check_dtype=False,
            check_dates=True,
        )

    @td.skip_if_no_scipy
    def test_stat_op_calc_skew_kurtosis(self, float_frame_with_na):
        def skewness(x):
            from scipy.stats import skew

            if len(x) < 3:
                return np.nan
            return skew(x, bias=False)

        def kurt(x):
            from scipy.stats import kurtosis

            if len(x) < 4:
                return np.nan
            return kurtosis(x, bias=False)

        assert_stat_op_calc("skew", skewness, float_frame_with_na)
        assert_stat_op_calc("kurt", kurt, float_frame_with_na)

    # TODO: Ensure warning isn't emitted in the first place
    # ignore mean of empty slice and all-NaN
    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
    def test_median(self, float_frame_with_na, int_frame):
        def wrapper(x):
            if isna(x).any():
                return np.nan
            return np.median(x)

        assert_stat_op_calc("median", wrapper, float_frame_with_na, check_dates=True)
        assert_stat_op_calc(
            "median", wrapper, int_frame, check_dtype=False, check_dates=True
        )

    @pytest.mark.parametrize(
        "method", ["sum", "mean", "prod", "var", "std", "skew", "min", "max"]
    )
    @pytest.mark.parametrize(
        "df",
        [
            DataFrame(
                {
                    "a": [
                        -0.00049987540199591344,
                        -0.0016467257772919831,
                        0.00067695870775883013,
                    ],
                    "b": [-0, -0, 0.0],
                    "c": [
                        0.00031111847529610595,
                        0.0014902627951905339,
                        -0.00094099200035979691,
                    ],
                },
                index=["foo", "bar", "baz"],
                dtype="O",
            ),
            DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
        ],
    )
    def test_stat_operators_attempt_obj_array(self, method, df):
        # GH#676
        assert df.values.dtype == np.object_
        result = getattr(df, method)(1)
        expected = getattr(df.astype("f8"), method)(1)

        if method in ["sum", "prod"]:
            tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
    def test_mixed_ops(self, op):
        # GH#16116
        df = DataFrame(
            {
                "int": [1, 2, 3, 4],
                "float": [1.0, 2.0, 3.0, 4.0],
                "str": ["a", "b", "c", "d"],
            }
        )
        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = getattr(df, op)()
        assert len(result) == 2

        with pd.option_context("use_bottleneck", False):
            with tm.assert_produces_warning(
                FutureWarning, match="Select only valid columns"
            ):
                result = getattr(df, op)()
            assert len(result) == 2

    def test_reduce_mixed_frame(self):
        # GH 6806
        df = DataFrame(
            {
                "bool_data": [True, True, False, False, False],
                "int_data": [10, 20, 30, 40, 50],
                "string_data": ["a", "b", "c", "d", "e"],
            }
        )
        df.reindex(columns=["bool_data", "int_data", "string_data"])
        test = df.sum(axis=0)
        tm.assert_numpy_array_equal(
            test.values, np.array([2, 150, "abcde"], dtype=object)
        )
        alt = df.T.sum(axis=1)
        tm.assert_series_equal(test, alt)

    def test_nunique(self):
        df = DataFrame({"A": [1, 1, 1], "B": [1, 2, 3], "C": [1, np.nan, 3]})
        tm.assert_series_equal(df.nunique(), Series({"A": 1, "B": 3, "C": 2}))
        tm.assert_series_equal(
            df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3})
        )
        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
        tm.assert_series_equal(
            df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})
        )

    @pytest.mark.parametrize("tz", [None, "UTC"])
    def test_mean_mixed_datetime_numeric(self, tz):
        # https://github.com/pandas-dev/pandas/issues/24752
        df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2})
        with tm.assert_produces_warning(FutureWarning):
            result = df.mean()
        expected = Series([1.0], index=["A"])
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "UTC"])
    def test_mean_excludes_datetimes(self, tz):
        # https://github.com/pandas-dev/pandas/issues/24752
        # Our long-term desired behavior is unclear, but the behavior in
        # 0.24.0rc1 was buggy.
        df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2})
        with tm.assert_produces_warning(FutureWarning):
            result = df.mean()

        expected = Series(dtype=np.float64)
        tm.assert_series_equal(result, expected)

    def test_mean_mixed_string_decimal(self):
        # GH 11670
        # possible bug when calculating mean of DataFrame?

        d = [
            {"A": 2, "B": None, "C": Decimal("628.00")},
            {"A": 1, "B": None, "C": Decimal("383.00")},
            {"A": 3, "B": None, "C": Decimal("651.00")},
            {"A": 2, "B": None, "C": Decimal("575.00")},
            {"A": 4, "B": None, "C": Decimal("1114.00")},
            {"A": 1, "B": "TEST", "C": Decimal("241.00")},
            {"A": 2, "B": None, "C": Decimal("572.00")},
            {"A": 4, "B": None, "C": Decimal("609.00")},
            {"A": 3, "B": None, "C": Decimal("820.00")},
            {"A": 5, "B": None, "C": Decimal("1223.00")},
        ]

        df = DataFrame(d)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = df.mean()
        expected = Series([2.7, 681.6], index=["A", "C"])
        tm.assert_series_equal(result, expected)

    def test_var_std(self, datetime_frame):
        result = datetime_frame.std(ddof=4)
        expected = datetime_frame.apply(lambda x: x.std(ddof=4))
        tm.assert_almost_equal(result, expected)

        result = datetime_frame.var(ddof=4)
        expected = datetime_frame.apply(lambda x: x.var(ddof=4))
        tm.assert_almost_equal(result, expected)

        arr = np.repeat(np.random.random((1, 1000)), 1000, 0)
        result = nanops.nanvar(arr, axis=0)
        assert not (result < 0).any()

        with pd.option_context("use_bottleneck", False):
            result = nanops.nanvar(arr, axis=0)
            assert not (result < 0).any()

    @pytest.mark.parametrize("meth", ["sem", "var", "std"])
    def test_numeric_only_flag(self, meth):
        # GH 9201
        df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"])
        # set one entry to a number in str format
        df1.loc[0, "foo"] = "100"

        df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"])
        # set one entry to a non-number str
        df2.loc[0, "foo"] = "a"

        result = getattr(df1, meth)(axis=1, numeric_only=True)
        expected = getattr(df1[["bar", "baz"]], meth)(axis=1)
        tm.assert_series_equal(expected, result)

        result = getattr(df2, meth)(axis=1, numeric_only=True)
        expected = getattr(df2[["bar", "baz"]], meth)(axis=1)
        tm.assert_series_equal(expected, result)

        # df1 has all numbers, df2 has a letter inside
        msg = r"unsupported operand type\(s\) for -: 'float' and 'str'"
        with pytest.raises(TypeError, match=msg):
            getattr(df1, meth)(axis=1, numeric_only=False)
        msg = "could not convert string to float: 'a'"
        with pytest.raises(TypeError, match=msg):
            getattr(df2, meth)(axis=1, numeric_only=False)

    def test_sem(self, datetime_frame):
        result = datetime_frame.sem(ddof=4)
        expected = datetime_frame.apply(lambda x: x.std(ddof=4) / np.sqrt(len(x)))
        tm.assert_almost_equal(result, expected)

        arr = np.repeat(np.random.random((1, 1000)), 1000, 0)
        result = nanops.nansem(arr, axis=0)
        assert not (result < 0).any()

        with pd.option_context("use_bottleneck", False):
            result = nanops.nansem(arr, axis=0)
            assert not (result < 0).any()

    @td.skip_if_no_scipy
    def test_kurt(self):
        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        df = DataFrame(np.random.randn(6, 3), index=index)

        kurt = df.kurt()
        with tm.assert_produces_warning(FutureWarning):
            kurt2 = df.kurt(level=0).xs("bar")
        tm.assert_series_equal(kurt, kurt2, check_names=False)
        assert kurt.name is None
        assert kurt2.name == "bar"

    @pytest.mark.parametrize(
        "dropna, expected",
        [
            (
                True,
                {
                    "A": [12],
                    "B": [10.0],
                    "C": [1.0],
                    "D": ["a"],
                    "E": Categorical(["a"], categories=["a"]),
                    "F": to_datetime(["2000-1-2"]),
                    "G": to_timedelta(["1 days"]),
                },
            ),
            (
                False,
                {
                    "A": [12],
                    "B": [10.0],
                    "C": [np.nan],
                    "D": np.array([np.nan], dtype=object),
                    "E": Categorical([np.nan], categories=["a"]),
                    "F": [pd.NaT],
                    "G": to_timedelta([pd.NaT]),
                },
            ),
            (
                True,
                {
                    "H": [8, 9, np.nan, np.nan],
                    "I": [8, 9, np.nan, np.nan],
                    "J": [1, np.nan, np.nan, np.nan],
                    "K": Categorical(["a", np.nan, np.nan, np.nan], categories=["a"]),
                    "L": to_datetime(["2000-1-2", "NaT", "NaT", "NaT"]),
                    "M": to_timedelta(["1 days", "nan", "nan", "nan"]),
                    "N": [0, 1, 2, 3],
                },
            ),
            (
                False,
                {
                    "H": [8, 9, np.nan, np.nan],
                    "I": [8, 9, np.nan, np.nan],
                    "J": [1, np.nan, np.nan, np.nan],
                    "K": Categorical([np.nan, "a", np.nan, np.nan], categories=["a"]),
                    "L": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]),
                    "M": to_timedelta(["nan", "1 days", "nan", "nan"]),
                    "N": [0, 1, 2, 3],
                },
            ),
        ],
    )
    def test_mode_dropna(self, dropna, expected):

        df = DataFrame(
            {
                "A": [12, 12, 19, 11],
                "B": [10, 10, np.nan, 3],
                "C": [1, np.nan, np.nan, np.nan],
                "D": [np.nan, np.nan, "a", np.nan],
                "E": Categorical([np.nan, np.nan, "a", np.nan]),
                "F": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]),
                "G": to_timedelta(["1 days", "nan", "nan", "nan"]),
                "H": [8, 8, 9, 9],
                "I": [9, 9, 8, 8],
                "J": [1, 1, np.nan, np.nan],
                "K": Categorical(["a", np.nan, "a", np.nan]),
                "L": to_datetime(["2000-1-2", "2000-1-2", "NaT", "NaT"]),
                "M": to_timedelta(["1 days", "nan", "1 days", "nan"]),
                "N": np.arange(4, dtype="int64"),
            }
        )

        result = df[sorted(expected.keys())].mode(dropna=dropna)
        expected = DataFrame(expected)
        tm.assert_frame_equal(result, expected)

    def test_mode_sortwarning(self):
        # Check for the warning that is raised when the mode
        # results cannot be sorted

        df = DataFrame({"A": [np.nan, np.nan, "a", "a"]})
        expected = DataFrame({"A": ["a", np.nan]})

        with tm.assert_produces_warning(UserWarning):
            result = df.mode(dropna=False)
            result = result.sort_values(by="A").reset_index(drop=True)

        tm.assert_frame_equal(result, expected)

    def test_mode_empty_df(self):
        df = DataFrame([], columns=["a", "b"])
        result = df.mode()
        expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=int))
        tm.assert_frame_equal(result, expected)

    def test_operators_timedelta64(self):
        df = DataFrame(
            {
                "A": date_range("2012-1-1", periods=3, freq="D"),
                "B": date_range("2012-1-2", periods=3, freq="D"),
                "C": Timestamp("20120101") - timedelta(minutes=5, seconds=5),
            }
        )

        diffs = DataFrame({"A": df["A"] - df["C"], "B": df["A"] - df["B"]})

        # min
        result = diffs.min()
        assert result[0] == diffs.loc[0, "A"]
        assert result[1] == diffs.loc[0, "B"]

        result = diffs.min(axis=1)
        assert (result == diffs.loc[0, "B"]).all()

        # max
        result = diffs.max()
        assert result[0] == diffs.loc[2, "A"]
        assert result[1] == diffs.loc[2, "B"]

        result = diffs.max(axis=1)
        assert (result == diffs["A"]).all()

        # abs
        result = diffs.abs()
        result2 = abs(diffs)
        expected = DataFrame({"A": df["A"] - df["C"], "B": df["B"] - df["A"]})
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(result2, expected)

        # mixed frame
        mixed = diffs.copy()
        mixed["C"] = "foo"
        mixed["D"] = 1
        mixed["E"] = 1.0
        mixed["F"] = Timestamp("20130101")

        # results in an object array
        result = mixed.min()
        expected = Series(
            [
                pd.Timedelta(timedelta(seconds=5 * 60 + 5)),
                pd.Timedelta(timedelta(days=-1)),
                "foo",
                1,
                1.0,
                Timestamp("20130101"),
            ],
            index=mixed.columns,
        )
        tm.assert_series_equal(result, expected)

        # excludes numeric
        with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
            result = mixed.min(axis=1)
        expected = Series([1, 1, 1.0], index=[0, 1, 2])
        tm.assert_series_equal(result, expected)

        # works when only those columns are selected
        result = mixed[["A", "B"]].min(1)
        expected = Series([timedelta(days=-1)] * 3)
        tm.assert_series_equal(result, expected)

        result = mixed[["A", "B"]].min()
        expected = Series(
            [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"]
        )
        tm.assert_series_equal(result, expected)

        # GH 3106
        df = DataFrame(
            {
                "time": date_range("20130102", periods=5),
                "time2": date_range("20130105", periods=5),
            }
        )
        df["off1"] = df["time2"] - df["time"]
        assert df["off1"].dtype == "timedelta64[ns]"

        df["off2"] = df["time"] - df["time2"]
        df._consolidate_inplace()
        assert df["off1"].dtype == "timedelta64[ns]"
        assert df["off2"].dtype == "timedelta64[ns]"

    def test_std_timedelta64_skipna_false(self):
        # GH#37392
        tdi = pd.timedelta_range("1 Day", periods=10)
        df = DataFrame({"A": tdi, "B": tdi}, copy=True)
        df.iloc[-2, -1] = pd.NaT

        result = df.std(skipna=False)
        expected = Series(
            [df["A"].std(), pd.NaT], index=["A", "B"], dtype="timedelta64[ns]"
        )
        tm.assert_series_equal(result, expected)

        result = df.std(axis=1, skipna=False)
        expected = Series([pd.Timedelta(0)] * 8 + [pd.NaT, pd.Timedelta(0)])
        tm.assert_series_equal(result, expected)

    def test_sum_corner(self):
        empty_frame = DataFrame()

        axis0 = empty_frame.sum(0)
        axis1 = empty_frame.sum(1)
        assert isinstance(axis0, Series)
        assert isinstance(axis1, Series)
        assert len(axis0) == 0
        assert len(axis1) == 0

    @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)])
    @pytest.mark.parametrize("numeric_only", [None, True, False])
    def test_sum_prod_nanops(self, method, unit, numeric_only):
        idx = ["a", "b", "c"]
        df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]})
        # The default
        result = getattr(df, method)(numeric_only=numeric_only)
        expected = Series([unit, unit, unit], index=idx, dtype="float64")
        tm.assert_series_equal(result, expected)

        # min_count=1
        result = getattr(df, method)(numeric_only=numeric_only, min_count=1)
        expected = Series([unit, unit, np.nan], index=idx)
        tm.assert_series_equal(result, expected)

        # min_count=0
        result = getattr(df, method)(numeric_only=numeric_only, min_count=0)
        expected = Series([unit, unit, unit], index=idx, dtype="float64")
        tm.assert_series_equal(result, expected)

        result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1)
        expected = Series([unit, np.nan, np.nan], index=idx)
        tm.assert_series_equal(result, expected)

        # min_count > 1
        df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5})
        result = getattr(df, method)(numeric_only=numeric_only, min_count=5)
        expected = Series(result, index=["A", "B"])
        tm.assert_series_equal(result, expected)

        result = getattr(df, method)(numeric_only=numeric_only, min_count=6)
        expected = Series(result, index=["A", "B"])
        tm.assert_series_equal(result, expected)

    def test_sum_nanops_timedelta(self):
        # prod isn't defined on timedeltas
        idx = ["a", "b", "c"]
        df = DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]})

        df2 = df.apply(to_timedelta)

        # 0 by default
        result = df2.sum()
        expected = Series([0, 0, 0], dtype="m8[ns]", index=idx)
        tm.assert_series_equal(result, expected)

        # min_count=0
        result = df2.sum(min_count=0)
        tm.assert_series_equal(result, expected)

        # min_count=1
        result = df2.sum(min_count=1)
        expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx)
        tm.assert_series_equal(result, expected)

    def test_sum_nanops_min_count(self):
        # https://github.com/pandas-dev/pandas/issues/39738
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
        result = df.sum(min_count=10)
        expected = Series([np.nan, np.nan], index=["x", "y"])
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"])
    @pytest.mark.parametrize(
        "kwargs, expected_result",
        [
            ({"axis": 1, "min_count": 2}, [3.2, 5.3, np.NaN]),
            ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]),
            ({"axis": 1, "skipna": False}, [3.2, 5.3, np.NaN]),
        ],
    )
    def test_sum_nanops_dtype_min_count(self, float_type, kwargs, expected_result):
        # GH#46947
        df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype=float_type)
        result = df.sum(**kwargs)
        expected = Series(expected_result).astype(float_type)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"])
    @pytest.mark.parametrize(
        "kwargs, expected_result",
        [
            ({"axis": 1, "min_count": 2}, [2.0, 4.0, np.NaN]),
            ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]),
            ({"axis": 1, "skipna": False}, [2.0, 4.0, np.NaN]),
        ],
    )
    def test_prod_nanops_dtype_min_count(self, float_type, kwargs, expected_result):
        # GH#46947
        df = DataFrame(
            {"a": [1.0, 2.0, 4.4], "b": [2.0, 2.0, np.nan]}, dtype=float_type
        )
        result = df.prod(**kwargs)
        expected = Series(expected_result).astype(float_type)
        tm.assert_series_equal(result, expected)

    def test_sum_object(self, float_frame):
        values = float_frame.values.astype(int)
        frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns)
        deltas = frame * timedelta(1)
        deltas.sum()

    def test_sum_bool(self, float_frame):
        # ensure this works, bug report
        bools = np.isnan(float_frame)
        bools.sum(1)
        bools.sum(0)

    def test_sum_mixed_datetime(self):
        # GH#30886
        df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex(
            [2, 3, 4]
        )
        with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
            result = df.sum()

        expected = Series({"B": 7.0})
        tm.assert_series_equal(result, expected)

    def test_mean_corner(self, float_frame, float_string_frame):
        # unit test when have object data
        with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
            the_mean = float_string_frame.mean(axis=0)
        the_sum = float_string_frame.sum(axis=0, numeric_only=True)
        tm.assert_index_equal(the_sum.index, the_mean.index)
        assert len(the_mean.index) < len(float_string_frame.columns)

        # xs sum mixed type, just want to know it works...
        with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
            the_mean = float_string_frame.mean(axis=1)
        the_sum = float_string_frame.sum(axis=1, numeric_only=True)
        tm.assert_index_equal(the_sum.index, the_mean.index)

        # take mean of boolean column
        float_frame["bool"] = float_frame["A"] > 0
        means = float_frame.mean(0)
        assert means["bool"] == float_frame["bool"].values.mean()

    def test_mean_datetimelike(self):
        # GH#24757 check that datetimelike are excluded by default, handled
        #  correctly with numeric_only=True

        df = DataFrame(
            {
                "A": np.arange(3),
                "B": date_range("2016-01-01", periods=3),
                "C": pd.timedelta_range("1D", periods=3),
                "D": pd.period_range("2016", periods=3, freq="A"),
            }
        )
        result = df.mean(numeric_only=True)
        expected = Series({"A": 1.0})
        tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning):
            # in the future datetime columns will be included
            result = df.mean()
        expected = Series({"A": 1.0, "C": df.loc[1, "C"]})
        tm.assert_series_equal(result, expected)

    def test_mean_datetimelike_numeric_only_false(self):
        df = DataFrame(
            {
                "A": np.arange(3),
                "B": date_range("2016-01-01", periods=3),
                "C": pd.timedelta_range("1D", periods=3),
            }
        )

        # datetime(tz) and timedelta work
        result = df.mean(numeric_only=False)
        expected = Series({"A": 1, "B": df.loc[1, "B"], "C": df.loc[1, "C"]})
        tm.assert_series_equal(result, expected)

        # mean of period is not allowed
        df["D"] = pd.period_range("2016", periods=3, freq="A")

        with pytest.raises(TypeError, match="mean is not implemented for Period"):
            df.mean(numeric_only=False)

    def test_mean_extensionarray_numeric_only_true(self):
        # https://github.com/pandas-dev/pandas/issues/33256
        arr = np.random.randint(1000, size=(10, 5))
        df = DataFrame(arr, dtype="Int64")
        result = df.mean(numeric_only=True)
        expected = DataFrame(arr).mean()
        tm.assert_series_equal(result, expected)

    def test_stats_mixed_type(self, float_string_frame):
        # don't blow up
        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            float_string_frame.std(1)
            float_string_frame.var(1)
            float_string_frame.mean(1)
            float_string_frame.skew(1)

    def test_sum_bools(self):
        df = DataFrame(index=range(1), columns=range(10))
        bools = isna(df)
        assert bools.sum(axis=1)[0] == 10

    # ----------------------------------------------------------------------
    # Index of max / min

    @pytest.mark.parametrize("skipna", [True, False])
    @pytest.mark.parametrize("axis", [0, 1])
    def test_idxmin(self, float_frame, int_frame, skipna, axis):
        frame = float_frame
        frame.iloc[5:10] = np.nan
        frame.iloc[15:20, -2:] = np.nan
        for df in [frame, int_frame]:
            result = df.idxmin(axis=axis, skipna=skipna)
            expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
            tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("numeric_only", [True, False])
    def test_idxmin_numeric_only(self, numeric_only):
        df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
        if numeric_only:
            result = df.idxmin(numeric_only=numeric_only)
            expected = Series([2, 1], index=["a", "b"])
            tm.assert_series_equal(result, expected)
        else:
            with pytest.raises(TypeError, match="not allowed for this dtype"):
                df.idxmin(numeric_only=numeric_only)

    def test_idxmin_axis_2(self, float_frame):
        frame = float_frame
        msg = "No axis named 2 for object type DataFrame"
        with pytest.raises(ValueError, match=msg):
            frame.idxmin(axis=2)

    @pytest.mark.parametrize("skipna", [True, False])
    @pytest.mark.parametrize("axis", [0, 1])
    def test_idxmax(self, float_frame, int_frame, skipna, axis):
        frame = float_frame
        frame.iloc[5:10] = np.nan
        frame.iloc[15:20, -2:] = np.nan
        for df in [frame, int_frame]:
            result = df.idxmax(axis=axis, skipna=skipna)
            expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
            tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("numeric_only", [True, False])
    def test_idxmax_numeric_only(self, numeric_only):
        df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
        if numeric_only:
            result = df.idxmax(numeric_only=numeric_only)
            expected = Series([1, 0], index=["a", "b"])
            tm.assert_series_equal(result, expected)
        else:
            with pytest.raises(TypeError, match="not allowed for this dtype"):
                df.idxmin(numeric_only=numeric_only)

    def test_idxmax_axis_2(self, float_frame):
        frame = float_frame
        msg = "No axis named 2 for object type DataFrame"
        with pytest.raises(ValueError, match=msg):
            frame.idxmax(axis=2)

    def test_idxmax_mixed_dtype(self):
        # don't cast to object, which would raise in nanops
        dti = date_range("2016-01-01", periods=3)

        # Copying dti is needed for ArrayManager otherwise when we set
        #  df.loc[0, 3] = pd.NaT below it edits dti
        df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti.copy(deep=True)})

        result = df.idxmax()
        expected = Series([1, 0, 2], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

        result = df.idxmin()
        expected = Series([0, 2, 0], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

        # with NaTs
        df.loc[0, 3] = pd.NaT
        result = df.idxmax()
        expected = Series([1, 0, 2], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

        result = df.idxmin()
        expected = Series([0, 2, 1], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

        # with multi-column dt64 block
        df[4] = dti[::-1]
        df._consolidate_inplace()

        result = df.idxmax()
        expected = Series([1, 0, 2, 0], index=[1, 2, 3, 4])
        tm.assert_series_equal(result, expected)

        result = df.idxmin()
        expected = Series([0, 2, 1, 2], index=[1, 2, 3, 4])
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "op, expected_value",
        [("idxmax", [0, 4]), ("idxmin", [0, 5])],
    )
    def test_idxmax_idxmin_convert_dtypes(self, op, expected_value):
        # GH 40346
        df = DataFrame(
            {
                "ID": [100, 100, 100, 200, 200, 200],
                "value": [0, 0, 0, 1, 2, 0],
            },
            dtype="Int64",
        )
        df = df.groupby("ID")

        result = getattr(df, op)()
        expected = DataFrame(
            {"value": expected_value},
            index=Index([100, 200], name="ID", dtype="Int64"),
        )
        tm.assert_frame_equal(result, expected)

    def test_idxmax_dt64_multicolumn_axis1(self):
        dti = date_range("2016-01-01", periods=3)
        df = DataFrame({3: dti, 4: dti[::-1]}, copy=True)
        df.iloc[0, 0] = pd.NaT

        df._consolidate_inplace()

        result = df.idxmax(axis=1)
        expected = Series([4, 3, 3])
        tm.assert_series_equal(result, expected)

        result = df.idxmin(axis=1)
        expected = Series([4, 3, 4])
        tm.assert_series_equal(result, expected)

    # ----------------------------------------------------------------------
    # Logical reductions

    @pytest.mark.parametrize("opname", ["any", "all"])
    @pytest.mark.parametrize("axis", [0, 1])
    @pytest.mark.parametrize("bool_only", [False, True])
    def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame):
        # make sure op works on mixed-type frame
        mixed = float_string_frame
        mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5

        getattr(mixed, opname)(axis=axis, bool_only=bool_only)

    @pytest.mark.parametrize("opname", ["any", "all"])
    @pytest.mark.parametrize("axis", [0, 1])
    def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na):
        getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False)

    @pytest.mark.parametrize("opname", ["any", "all"])
    def test_any_all_bool_frame(self, opname, bool_frame_with_na):
        # GH#12863: numpy gives back non-boolean data for object type
        # so fill NaNs to compare with pandas behavior
        frame = bool_frame_with_na.fillna(True)
        alternative = getattr(np, opname)
        f = getattr(frame, opname)

        def skipna_wrapper(x):
            nona = x.dropna().values
            return alternative(nona)

        def wrapper(x):
            return alternative(x.values)

        result0 = f(axis=0, skipna=False)
        result1 = f(axis=1, skipna=False)

        tm.assert_series_equal(result0, frame.apply(wrapper))
        tm.assert_series_equal(result1, frame.apply(wrapper, axis=1))

        result0 = f(axis=0)
        result1 = f(axis=1)

        tm.assert_series_equal(result0, frame.apply(skipna_wrapper))
        tm.assert_series_equal(
            result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False
        )

        # bad axis
        with pytest.raises(ValueError, match="No axis named 2"):
            f(axis=2)

        # all NA case
        all_na = frame * np.NaN
        r0 = getattr(all_na, opname)(axis=0)
        r1 = getattr(all_na, opname)(axis=1)
        if opname == "any":
            assert not r0.any()
            assert not r1.any()
        else:
            assert r0.all()
            assert r1.all()

    def test_any_all_extra(self):
        df = DataFrame(
            {
                "A": [True, False, False],
                "B": [True, True, False],
                "C": [True, True, True],
            },
            index=["a", "b", "c"],
        )
        result = df[["A", "B"]].any(axis=1)
        expected = Series([True, True, False], index=["a", "b", "c"])
        tm.assert_series_equal(result, expected)

        result = df[["A", "B"]].any(axis=1, bool_only=True)
        tm.assert_series_equal(result, expected)

        result = df.all(1)
        expected = Series([True, False, False], index=["a", "b", "c"])
        tm.assert_series_equal(result, expected)

        result = df.all(1, bool_only=True)
        tm.assert_series_equal(result, expected)

        # Axis is None
        result = df.all(axis=None).item()
        assert result is False

        result = df.any(axis=None).item()
        assert result is True

        result = df[["C"]].all(axis=None).item()
        assert result is True

    @pytest.mark.parametrize("axis", [0, 1])
    @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
    @pytest.mark.parametrize("skipna", [True, False])
    def test_any_all_object_dtype(self, axis, bool_agg_func, skipna):
        # GH#35450
        df = DataFrame(
            data=[
                [1, np.nan, np.nan, True],
                [np.nan, 2, np.nan, True],
                [np.nan, np.nan, np.nan, True],
                [np.nan, np.nan, "5", np.nan],
            ]
        )
        result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna)
        expected = Series([True, True, True, True])
        tm.assert_series_equal(result, expected)

    def test_any_datetime(self):

        # GH 23070
        float_data = [1, np.nan, 3, np.nan]
        datetime_data = [
            Timestamp("1960-02-15"),
            Timestamp("1960-02-16"),
            pd.NaT,
            pd.NaT,
        ]
        df = DataFrame({"A": float_data, "B": datetime_data})

        result = df.any(axis=1)
        expected = Series([True, True, True, False])
        tm.assert_series_equal(result, expected)

    def test_any_all_bool_only(self):

        # GH 25101
        df = DataFrame(
            {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]}
        )

        result = df.all(bool_only=True)
        expected = Series(dtype=np.bool_)
        tm.assert_series_equal(result, expected)

        df = DataFrame(
            {
                "col1": [1, 2, 3],
                "col2": [4, 5, 6],
                "col3": [None, None, None],
                "col4": [False, False, True],
            }
        )

        result = df.all(bool_only=True)
        expected = Series({"col4": False})
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "func, data, expected",
        [
            (np.any, {}, False),
            (np.all, {}, True),
            (np.any, {"A": []}, False),
            (np.all, {"A": []}, True),
            (np.any, {"A": [False, False]}, False),
            (np.all, {"A": [False, False]}, False),
            (np.any, {"A": [True, False]}, True),
            (np.all, {"A": [True, False]}, False),
            (np.any, {"A": [True, True]}, True),
            (np.all, {"A": [True, True]}, True),
            (np.any, {"A": [False], "B": [False]}, False),
            (np.all, {"A": [False], "B": [False]}, False),
            (np.any, {"A": [False, False], "B": [False, True]}, True),
            (np.all, {"A": [False, False], "B": [False, True]}, False),
            # other types
            (np.all, {"A": Series([0.0, 1.0], dtype="float")}, False),
            (np.any, {"A": Series([0.0, 1.0], dtype="float")}, True),
            (np.all, {"A": Series([0, 1], dtype=int)}, False),
            (np.any, {"A": Series([0, 1], dtype=int)}, True),
            pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns]")}, False),
            pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, False),
            pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns]")}, True),
            pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, True),
            pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns]")}, True),
            pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True),
            pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns]")}, True),
            pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True),
            pytest.param(np.all, {"A": Series([0, 1], dtype="m8[ns]")}, False),
            pytest.param(np.any, {"A": Series([0, 1], dtype="m8[ns]")}, True),
            pytest.param(np.all, {"A": Series([1, 2], dtype="m8[ns]")}, True),
            pytest.param(np.any, {"A": Series([1, 2], dtype="m8[ns]")}, True),
            # np.all on Categorical raises, so the reduction drops the
            #  column, so all is being done on an empty Series, so is True
            (np.all, {"A": Series([0, 1], dtype="category")}, True),
            (np.any, {"A": Series([0, 1], dtype="category")}, False),
            (np.all, {"A": Series([1, 2], dtype="category")}, True),
            (np.any, {"A": Series([1, 2], dtype="category")}, False),
            # Mix GH#21484
            pytest.param(
                np.all,
                {
                    "A": Series([10, 20], dtype="M8[ns]"),
                    "B": Series([10, 20], dtype="m8[ns]"),
                },
                True,
            ),
        ],
    )
    def test_any_all_np_func(self, func, data, expected):
        # GH 19976
        data = DataFrame(data)

        warn = None
        if any(is_categorical_dtype(x) for x in data.dtypes):
            warn = FutureWarning

        with tm.assert_produces_warning(
            warn, match="Select only valid columns", check_stacklevel=False
        ):
            result = func(data)
        assert isinstance(result, np.bool_)
        assert result.item() is expected

        # method version
        with tm.assert_produces_warning(
            warn, match="Select only valid columns", check_stacklevel=False
        ):
            result = getattr(DataFrame(data), func.__name__)(axis=None)
        assert isinstance(result, np.bool_)
        assert result.item() is expected

    def test_any_all_object(self):
        # GH 19976
        result = np.all(DataFrame(columns=["a", "b"])).item()
        assert result is True

        result = np.any(DataFrame(columns=["a", "b"])).item()
        assert result is False

    def test_any_all_object_bool_only(self):
        msg = "object-dtype columns with all-bool values"

        df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
        df._consolidate_inplace()
        df["C"] = Series([True, True])

        # Categorical of bools is _not_ considered booly
        df["D"] = df["C"].astype("category")

        # The underlying bug is in DataFrame._get_bool_data, so we check
        #  that while we're here
        with tm.assert_produces_warning(FutureWarning, match=msg):
            res = df._get_bool_data()
        expected = df[["B", "C"]]
        tm.assert_frame_equal(res, expected)

        with tm.assert_produces_warning(FutureWarning, match=msg):
            res = df.all(bool_only=True, axis=0)
        expected = Series([False, True], index=["B", "C"])
        tm.assert_series_equal(res, expected)

        # operating on a subset of columns should not produce a _larger_ Series
        with tm.assert_produces_warning(FutureWarning, match=msg):
            res = df[["B", "C"]].all(bool_only=True, axis=0)
        tm.assert_series_equal(res, expected)

        with tm.assert_produces_warning(FutureWarning, match=msg):
            assert not df.all(bool_only=True, axis=None)

        with tm.assert_produces_warning(FutureWarning, match=msg):
            res = df.any(bool_only=True, axis=0)
        expected = Series([True, True], index=["B", "C"])
        tm.assert_series_equal(res, expected)

        # operating on a subset of columns should not produce a _larger_ Series
        with tm.assert_produces_warning(FutureWarning, match=msg):
            res = df[["B", "C"]].any(bool_only=True, axis=0)
        tm.assert_series_equal(res, expected)

        with tm.assert_produces_warning(FutureWarning, match=msg):
            assert df.any(bool_only=True, axis=None)

    @pytest.mark.parametrize("method", ["any", "all"])
    def test_any_all_level_axis_none_raises(self, method):
        df = DataFrame(
            {"A": 1},
            index=MultiIndex.from_product(
                [["A", "B"], ["a", "b"]], names=["out", "in"]
            ),
        )
        xpr = "Must specify 'axis' when aggregating by level."
        with pytest.raises(ValueError, match=xpr):
            with tm.assert_produces_warning(FutureWarning):
                getattr(df, method)(axis=None, level="out")

    # ---------------------------------------------------------------------
    # Unsorted

    def test_series_broadcasting(self):
        # smoke test for numpy warnings
        # GH 16378, GH 16306
        df = DataFrame([1.0, 1.0, 1.0])
        df_nan = DataFrame({"A": [np.nan, 2.0, np.nan]})
        s = Series([1, 1, 1])
        s_nan = Series([np.nan, np.nan, 1])

        with tm.assert_produces_warning(None):
            df_nan.clip(lower=s, axis=0)
            for op in ["lt", "le", "gt", "ge", "eq", "ne"]:
                getattr(df, op)(s_nan, axis=0)


class TestDataFrameReductions:
    def test_min_max_dt64_with_NaT(self):
        # Both NaT and Timestamp are in DataFrame.
        df = DataFrame({"foo": [pd.NaT, pd.NaT, Timestamp("2012-05-01")]})

        res = df.min()
        exp = Series([Timestamp("2012-05-01")], index=["foo"])
        tm.assert_series_equal(res, exp)

        res = df.max()
        exp = Series([Timestamp("2012-05-01")], index=["foo"])
        tm.assert_series_equal(res, exp)

        # GH12941, only NaTs are in DataFrame.
        df = DataFrame({"foo": [pd.NaT, pd.NaT]})

        res = df.min()
        exp = Series([pd.NaT], index=["foo"])
        tm.assert_series_equal(res, exp)

        res = df.max()
        exp = Series([pd.NaT], index=["foo"])
        tm.assert_series_equal(res, exp)

    def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture):
        # GH#36907
        tz = tz_naive_fixture
        if isinstance(tz, tzlocal) and is_platform_windows():
            pytest.skip(
                "GH#37659 OSError raised within tzlocal bc Windows "
                "chokes in times before 1970-01-01"
            )

        df = DataFrame(
            {
                "a": [
                    Timestamp("2020-01-01 08:00:00", tz=tz),
                    Timestamp("1920-02-01 09:00:00", tz=tz),
                ],
                "b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT],
            }
        )
        res = df.min(axis=1, skipna=False)
        expected = Series([df.loc[0, "a"], pd.NaT])
        assert expected.dtype == df["a"].dtype

        tm.assert_series_equal(res, expected)

        res = df.max(axis=1, skipna=False)
        expected = Series([df.loc[0, "b"], pd.NaT])
        assert expected.dtype == df["a"].dtype

        tm.assert_series_equal(res, expected)

    def test_min_max_dt64_api_consistency_with_NaT(self):
        # Calling the following sum functions returned an error for dataframes but
        # returned NaT for series. These tests check that the API is consistent in
        # min/max calls on empty Series/DataFrames. See GH:33704 for more
        # information
        df = DataFrame({"x": to_datetime([])})
        expected_dt_series = Series(to_datetime([]))
        # check axis 0
        assert (df.min(axis=0).x is pd.NaT) == (expected_dt_series.min() is pd.NaT)
        assert (df.max(axis=0).x is pd.NaT) == (expected_dt_series.max() is pd.NaT)

        # check axis 1
        tm.assert_series_equal(df.min(axis=1), expected_dt_series)
        tm.assert_series_equal(df.max(axis=1), expected_dt_series)

    def test_min_max_dt64_api_consistency_empty_df(self):
        # check DataFrame/Series api consistency when calling min/max on an empty
        # DataFrame/Series.
        df = DataFrame({"x": []})
        expected_float_series = Series([], dtype=float)
        # check axis 0
        assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min())
        assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max())
        # check axis 1
        tm.assert_series_equal(df.min(axis=1), expected_float_series)
        tm.assert_series_equal(df.min(axis=1), expected_float_series)

    @pytest.mark.parametrize(
        "initial",
        ["2018-10-08 13:36:45+00:00", "2018-10-08 13:36:45+03:00"],  # Non-UTC timezone
    )
    @pytest.mark.parametrize("method", ["min", "max"])
    def test_preserve_timezone(self, initial: str, method):
        # GH 28552
        initial_dt = to_datetime(initial)
        expected = Series([initial_dt])
        df = DataFrame([expected])
        result = getattr(df, method)(axis=1)
        tm.assert_series_equal(result, expected)

    def test_frame_any_all_with_level(self):
        df = DataFrame(
            {"data": [False, False, True, False, True, False, True]},
            index=[
                ["one", "one", "two", "one", "two", "two", "two"],
                [0, 1, 0, 2, 1, 2, 3],
            ],
        )

        with tm.assert_produces_warning(FutureWarning, match="Using the level"):
            result = df.any(level=0)
        ex = DataFrame({"data": [False, True]}, index=["one", "two"])
        tm.assert_frame_equal(result, ex)

        with tm.assert_produces_warning(FutureWarning, match="Using the level"):
            result = df.all(level=0)
        ex = DataFrame({"data": [False, False]}, index=["one", "two"])
        tm.assert_frame_equal(result, ex)

    def test_frame_any_with_timedelta(self):
        # GH#17667
        df = DataFrame(
            {
                "a": Series([0, 0]),
                "t": Series([to_timedelta(0, "s"), to_timedelta(1, "ms")]),
            }
        )

        result = df.any(axis=0)
        expected = Series(data=[False, True], index=["a", "t"])
        tm.assert_series_equal(result, expected)

        result = df.any(axis=1)
        expected = Series(data=[False, True])
        tm.assert_series_equal(result, expected)

    def test_reductions_deprecation_skipna_none(self, frame_or_series):
        # GH#44580
        obj = frame_or_series([1, 2, 3])
        with tm.assert_produces_warning(
            FutureWarning, match="skipna", raise_on_extra_warnings=False
        ):
            obj.mad(skipna=None)

    def test_reductions_deprecation_level_argument(
        self, frame_or_series, reduction_functions
    ):
        # GH#39983
        obj = frame_or_series(
            [1, 2, 3], index=MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]])
        )
        with tm.assert_produces_warning(FutureWarning, match="level"):
            getattr(obj, reduction_functions)(level=0)

    def test_reductions_skipna_none_raises(
        self, request, frame_or_series, reduction_functions
    ):
        if reduction_functions == "count":
            request.node.add_marker(
                pytest.mark.xfail(reason="Count does not accept skipna")
            )
        elif reduction_functions == "mad":
            pytest.skip("Mad is deprecated: GH#11787")
        obj = frame_or_series([1, 2, 3])
        msg = 'For argument "skipna" expected type bool, received type NoneType.'
        with pytest.raises(ValueError, match=msg):
            getattr(obj, reduction_functions)(skipna=None)


class TestNuisanceColumns:
    @pytest.mark.parametrize("method", ["any", "all"])
    def test_any_all_categorical_dtype_nuisance_column(self, method):
        # GH#36076 DataFrame should match Series behavior
        ser = Series([0, 1], dtype="category", name="A")
        df = ser.to_frame()

        # Double-check the Series behavior is to raise
        with pytest.raises(TypeError, match="does not support reduction"):
            getattr(ser, method)()

        with pytest.raises(TypeError, match="does not support reduction"):
            getattr(np, method)(ser)

        with pytest.raises(TypeError, match="does not support reduction"):
            getattr(df, method)(bool_only=False)

        # With bool_only=None, operating on this column raises and is ignored,
        #  so we expect an empty result.
        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = getattr(df, method)(bool_only=None)
        expected = Series([], index=Index([]), dtype=bool)
        tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns", check_stacklevel=False
        ):
            result = getattr(np, method)(df, axis=0)
        tm.assert_series_equal(result, expected)

    def test_median_categorical_dtype_nuisance_column(self):
        # GH#21020 DataFrame.median should match Series.median
        df = DataFrame({"A": Categorical([1, 2, 2, 2, 3])})
        ser = df["A"]

        # Double-check the Series behavior is to raise
        with pytest.raises(TypeError, match="does not support reduction"):
            ser.median()

        with pytest.raises(TypeError, match="does not support reduction"):
            df.median(numeric_only=False)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = df.median()
        expected = Series([], index=Index([]), dtype=np.float64)
        tm.assert_series_equal(result, expected)

        # same thing, but with an additional non-categorical column
        df["B"] = df["A"].astype(int)

        with pytest.raises(TypeError, match="does not support reduction"):
            df.median(numeric_only=False)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = df.median()
        expected = Series([2.0], index=["B"])
        tm.assert_series_equal(result, expected)

        # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead
        #  of expected.values

    @pytest.mark.parametrize("method", ["min", "max"])
    def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method):
        # GH#28949 DataFrame.min should behave like Series.min
        cat = Categorical(["a", "b", "c", "b"], ordered=False)
        ser = Series(cat)
        df = ser.to_frame("A")

        # Double-check the Series behavior
        with pytest.raises(TypeError, match="is not ordered for operation"):
            getattr(ser, method)()

        with pytest.raises(TypeError, match="is not ordered for operation"):
            getattr(np, method)(ser)

        with pytest.raises(TypeError, match="is not ordered for operation"):
            getattr(df, method)(numeric_only=False)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = getattr(df, method)()
        expected = Series([], index=Index([]), dtype=np.float64)
        tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns", check_stacklevel=False
        ):
            result = getattr(np, method)(df)
        tm.assert_series_equal(result, expected)

        # same thing, but with an additional non-categorical column
        df["B"] = df["A"].astype(object)
        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = getattr(df, method)()
        if method == "min":
            expected = Series(["a"], index=["B"])
        else:
            expected = Series(["c"], index=["B"])
        tm.assert_series_equal(result, expected)

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns", check_stacklevel=False
        ):
            result = getattr(np, method)(df)
        tm.assert_series_equal(result, expected)

    def test_reduction_object_block_splits_nuisance_columns(self):
        # GH#37827
        df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object)

        # We should only exclude "B", not "A"
        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = df.mean()
        expected = Series([1.0], index=["A"])
        tm.assert_series_equal(result, expected)

        # Same behavior but heterogeneous dtype
        df["C"] = df["A"].astype(int) + 4

        with tm.assert_produces_warning(
            FutureWarning, match="Select only valid columns"
        ):
            result = df.mean()
        expected = Series([1.0, 5.0], index=["A", "C"])
        tm.assert_series_equal(result, expected)


def test_sum_timedelta64_skipna_false():
    # GH#17235
    arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2)
    arr[-1, -1] = "Nat"

    df = DataFrame(arr)

    result = df.sum(skipna=False)
    expected = Series([pd.Timedelta(seconds=12), pd.NaT])
    tm.assert_series_equal(result, expected)

    result = df.sum(axis=0, skipna=False)
    tm.assert_series_equal(result, expected)

    result = df.sum(axis=1, skipna=False)
    expected = Series(
        [
            pd.Timedelta(seconds=1),
            pd.Timedelta(seconds=5),
            pd.Timedelta(seconds=9),
            pd.NaT,
        ]
    )
    tm.assert_series_equal(result, expected)


def test_mixed_frame_with_integer_sum():
    # https://github.com/pandas-dev/pandas/issues/34520
    df = DataFrame([["a", 1]], columns=list("ab"))
    df = df.astype({"b": "Int64"})
    result = df.sum()
    expected = Series(["a", 1], index=["a", "b"])
    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("numeric_only", [True, False, None])
@pytest.mark.parametrize("method", ["min", "max"])
def test_minmax_extensionarray(method, numeric_only):
    # https://github.com/pandas-dev/pandas/issues/32651
    int64_info = np.iinfo("int64")
    ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype())
    df = DataFrame({"Int64": ser})
    result = getattr(df, method)(numeric_only=numeric_only)
    expected = Series(
        [getattr(int64_info, method)], index=Index(["Int64"], dtype="object")
    )
    tm.assert_series_equal(result, expected)


def test_mad_nullable_integer(any_signed_int_ea_dtype):
    # GH#33036
    df = DataFrame(np.random.randn(100, 4).astype(np.int64))
    df2 = df.astype(any_signed_int_ea_dtype)

    with tm.assert_produces_warning(
        FutureWarning, match="The 'mad' method is deprecated"
    ):
        result = df2.mad()
        expected = df.mad()
    tm.assert_series_equal(result, expected)

    with tm.assert_produces_warning(
        FutureWarning, match="The 'mad' method is deprecated"
    ):
        result = df2.mad(axis=1)
        expected = df.mad(axis=1)
    tm.assert_series_equal(result, expected)

    # case with NAs present
    df2.iloc[::2, 1] = pd.NA

    with tm.assert_produces_warning(
        FutureWarning, match="The 'mad' method is deprecated"
    ):
        result = df2.mad()
        expected = df.mad()
        expected[1] = df.iloc[1::2, 1].mad()
    tm.assert_series_equal(result, expected)

    with tm.assert_produces_warning(
        FutureWarning, match="The 'mad' method is deprecated"
    ):
        result = df2.mad(axis=1)
        expected = df.mad(axis=1)
        expected[::2] = df.T.loc[[0, 2, 3], ::2].mad()
    tm.assert_series_equal(result, expected)


@pytest.mark.xfail(reason="GH#42895 caused by lack of 2D EA")
def test_mad_nullable_integer_all_na(any_signed_int_ea_dtype):
    # GH#33036
    df = DataFrame(np.random.randn(100, 4).astype(np.int64))
    df2 = df.astype(any_signed_int_ea_dtype)

    # case with all-NA row/column
    msg = "will attempt to set the values inplace instead"
    with tm.assert_produces_warning(FutureWarning, match=msg):
        df2.iloc[:, 1] = pd.NA  # FIXME(GH#44199): this doesn't operate in-place
        df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype)

    with tm.assert_produces_warning(
        FutureWarning, match="The 'mad' method is deprecated"
    ):
        result = df2.mad()
        expected = df.mad()

    expected[1] = pd.NA
    expected = expected.astype("Float64")
    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"])
def test_groupby_regular_arithmetic_equivalent(meth):
    # GH#40660
    df = DataFrame(
        {"a": [pd.Timedelta(hours=6), pd.Timedelta(hours=7)], "b": [12.1, 13.3]}
    )
    expected = df.copy()

    with tm.assert_produces_warning(FutureWarning):
        result = getattr(df, meth)(level=0)
    tm.assert_frame_equal(result, expected)

    result = getattr(df.groupby(level=0), meth)(numeric_only=False)
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("ts_value", [Timestamp("2000-01-01"), pd.NaT])
def test_frame_mixed_numeric_object_with_timestamp(ts_value):
    # GH 13912
    df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]})
    with tm.assert_produces_warning(
        FutureWarning, match="The default value of numeric_only"
    ):
        result = df.sum()
    expected = Series([1, 1.1, "foo"], index=list("abc"))
    tm.assert_series_equal(result, expected)


def test_prod_sum_min_count_mixed_object():
    # https://github.com/pandas-dev/pandas/issues/41074
    df = DataFrame([1, "a", True])

    result = df.prod(axis=0, min_count=1, numeric_only=False)
    expected = Series(["a"])
    tm.assert_series_equal(result, expected)

    msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
    with pytest.raises(TypeError, match=msg):
        df.sum(axis=0, min_count=1, numeric_only=False)


@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"])
def test_reduction_axis_none_deprecation(method):
    # GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it
    #  to reducing over all axes.

    df = DataFrame(np.random.randn(4, 4))
    meth = getattr(df, method)

    msg = f"scalar {method} over the entire DataFrame"
    with tm.assert_produces_warning(FutureWarning, match=msg):
        res = meth(axis=None)
    with tm.assert_produces_warning(None):
        expected = meth()
    tm.assert_series_equal(res, expected)
    tm.assert_series_equal(res, meth(axis=0))


@pytest.mark.parametrize(
    "kernel",
    [
        "corr",
        "corrwith",
        "count",
        "cov",
        "idxmax",
        "idxmin",
        "kurt",
        "kurt",
        "max",
        "mean",
        "median",
        "min",
        "mode",
        "prod",
        "prod",
        "quantile",
        "sem",
        "skew",
        "std",
        "sum",
        "var",
    ],
)
def test_numeric_only_deprecation(kernel):
    # GH#46852
    df = DataFrame({"a": [1, 2, 3], "b": object})
    args = (df,) if kernel == "corrwith" else ()
    signature = inspect.signature(getattr(DataFrame, kernel))
    default = signature.parameters["numeric_only"].default
    assert default is not True

    if kernel in ("idxmax", "idxmin"):
        # kernels that default to numeric_only=False and fail on nuisance columns
        assert default is False
        with pytest.raises(TypeError, match="not allowed for this dtype"):
            getattr(df, kernel)(*args)
    else:
        if default is None or default is lib.no_default:
            expected = getattr(df[["a"]], kernel)(*args)
            warn = FutureWarning
        else:
            # default must be False and works on any nuisance columns
            expected = getattr(df, kernel)(*args)
            if kernel == "mode":
                assert "b" in expected.columns
            else:
                assert "b" in expected.index
            warn = None
        msg = f"The default value of numeric_only in DataFrame.{kernel}"
        with tm.assert_produces_warning(warn, match=msg):
            result = getattr(df, kernel)(*args)
        tm.assert_equal(result, expected)