aoc-2022/venv/Lib/site-packages/pandas/tests/base/test_misc.py

import sys

import numpy as np
import pytest

from pandas.compat import (
    IS64,
    PYPY,
)

from pandas.core.dtypes.common import (
    is_categorical_dtype,
    is_dtype_equal,
    is_object_dtype,
)

import pandas as pd
from pandas import (
    Index,
    Series,
)
import pandas._testing as tm


def test_isnull_notnull_docstrings():
    # GH#41855 make sure its clear these are aliases
    doc = pd.DataFrame.notnull.__doc__
    assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")
    doc = pd.DataFrame.isnull.__doc__
    assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")

    doc = Series.notnull.__doc__
    assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")
    doc = Series.isnull.__doc__
    assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")


@pytest.mark.parametrize(
    "op_name, op",
    [
        ("add", "+"),
        ("sub", "-"),
        ("mul", "*"),
        ("mod", "%"),
        ("pow", "**"),
        ("truediv", "/"),
        ("floordiv", "//"),
    ],
)
def test_binary_ops_docstring(frame_or_series, op_name, op):
    # not using the all_arithmetic_functions fixture with _get_opstr
    # as _get_opstr is used internally in the dynamic implementation of the docstring
    klass = frame_or_series

    operand1 = klass.__name__.lower()
    operand2 = "other"
    expected_str = " ".join([operand1, op, operand2])
    assert expected_str in getattr(klass, op_name).__doc__

    # reverse version of the binary ops
    expected_str = " ".join([operand2, op, operand1])
    assert expected_str in getattr(klass, "r" + op_name).__doc__


def test_ndarray_compat_properties(index_or_series_obj):
    obj = index_or_series_obj

    # Check that we work.
    for p in ["shape", "dtype", "T", "nbytes"]:
        assert getattr(obj, p, None) is not None

    # deprecated properties
    for p in ["strides", "itemsize", "base", "data"]:
        assert not hasattr(obj, p)

    msg = "can only convert an array of size 1 to a Python scalar"
    with pytest.raises(ValueError, match=msg):
        obj.item()  # len > 1

    assert obj.ndim == 1
    assert obj.size == len(obj)

    assert Index([1]).item() == 1
    assert Series([1]).item() == 1


def test_array_wrap_compat():
    # Note: at time of dask 2022.01.0, this is still used by eg dask
    # (https://github.com/dask/dask/issues/8580).
    # This test is a small dummy ensuring coverage
    orig = Series([1, 2, 3], dtype="int64", index=["a", "b", "c"])
    with tm.assert_produces_warning(DeprecationWarning):
        result = orig.__array_wrap__(np.array([2, 4, 6], dtype="int64"))
    expected = orig * 2
    tm.assert_series_equal(result, expected)


@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
def test_memory_usage(index_or_series_obj):
    obj = index_or_series_obj

    res = obj.memory_usage()
    res_deep = obj.memory_usage(deep=True)

    is_ser = isinstance(obj, Series)
    is_object = is_object_dtype(obj) or (
        isinstance(obj, Series) and is_object_dtype(obj.index)
    )
    is_categorical = is_categorical_dtype(obj.dtype) or (
        isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype)
    )
    is_object_string = is_dtype_equal(obj, "string[python]") or (
        is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
    )

    if len(obj) == 0:
        if isinstance(obj, Index):
            expected = 0
        else:
            expected = 108 if IS64 else 64
        assert res_deep == res == expected
    elif is_object or is_categorical or is_object_string:
        # only deep will pick them up
        assert res_deep > res
    else:
        assert res == res_deep

    # sys.getsizeof will call the .memory_usage with
    # deep=True, and add on some GC overhead
    diff = res_deep - sys.getsizeof(obj)
    assert abs(diff) < 100


def test_memory_usage_components_series(series_with_simple_index):
    series = series_with_simple_index
    total_usage = series.memory_usage(index=True)
    non_index_usage = series.memory_usage(index=False)
    index_usage = series.index.memory_usage()
    assert total_usage == non_index_usage + index_usage


@pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)
def test_memory_usage_components_narrow_series(dtype):
    series = tm.make_rand_series(name="a", dtype=dtype)
    total_usage = series.memory_usage(index=True)
    non_index_usage = series.memory_usage(index=False)
    index_usage = series.index.memory_usage()
    assert total_usage == non_index_usage + index_usage


def test_searchsorted(request, index_or_series_obj):
    # numpy.searchsorted calls obj.searchsorted under the hood.
    # See gh-12238
    obj = index_or_series_obj

    if isinstance(obj, pd.MultiIndex):
        # See gh-14833
        request.node.add_marker(
            pytest.mark.xfail(
                reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
            )
        )
    elif obj.dtype.kind == "c" and isinstance(obj, Index):
        # TODO: Should Series cases also raise? Looks like they use numpy
        #  comparison semantics https://github.com/numpy/numpy/issues/15981
        mark = pytest.mark.xfail(reason="complex objects are not comparable")
        request.node.add_marker(mark)

    max_obj = max(obj, default=0)
    index = np.searchsorted(obj, max_obj)
    assert 0 <= index <= len(obj)

    index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
    assert 0 <= index <= len(obj)


def test_access_by_position(index_flat):
    index = index_flat

    if len(index) == 0:
        pytest.skip("Test doesn't make sense on empty data")

    series = Series(index)
    assert index[0] == series.iloc[0]
    assert index[5] == series.iloc[5]
    assert index[-1] == series.iloc[-1]

    size = len(index)
    assert index[-1] == index[size - 1]

    msg = f"index {size} is out of bounds for axis 0 with size {size}"
    if is_dtype_equal(index.dtype, "string[pyarrow]"):
        msg = "index out of bounds"
    with pytest.raises(IndexError, match=msg):
        index[size]
    msg = "single positional indexer is out-of-bounds"
    with pytest.raises(IndexError, match=msg):
        series.iloc[size]
Give it up for day 1 of smooth brain shenanigans! https://pbs.twimg.com/media/E1bxikWWEAEGYHU.png 2022-12-01 16:50:29 +00:00			`import sys`

			`import numpy as np`
			`import pytest`

			`from pandas.compat import (`
			`IS64,`
			`PYPY,`
			`)`

			`from pandas.core.dtypes.common import (`
			`is_categorical_dtype,`
			`is_dtype_equal,`
			`is_object_dtype,`
			`)`

			`import pandas as pd`
			`from pandas import (`
			`Index,`
			`Series,`
			`)`
			`import pandas._testing as tm`


			`def test_isnull_notnull_docstrings():`
			`# GH#41855 make sure its clear these are aliases`
			`doc = pd.DataFrame.notnull.__doc__`
			`assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")`
			`doc = pd.DataFrame.isnull.__doc__`
			`assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")`

			`doc = Series.notnull.__doc__`
			`assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")`
			`doc = Series.isnull.__doc__`
			`assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")`


			`@pytest.mark.parametrize(`
			`"op_name, op",`
			`[`
			`("add", "+"),`
			`("sub", "-"),`
			`("mul", "*"),`
			`("mod", "%"),`
			`("pow", "**"),`
			`("truediv", "/"),`
			`("floordiv", "//"),`
			`],`
			`)`
			`def test_binary_ops_docstring(frame_or_series, op_name, op):`
			`# not using the all_arithmetic_functions fixture with _get_opstr`
			`# as _get_opstr is used internally in the dynamic implementation of the docstring`
			`klass = frame_or_series`

			`operand1 = klass.__name__.lower()`
			`operand2 = "other"`
			`expected_str = " ".join([operand1, op, operand2])`
			`assert expected_str in getattr(klass, op_name).__doc__`

			`# reverse version of the binary ops`
			`expected_str = " ".join([operand2, op, operand1])`
			`assert expected_str in getattr(klass, "r" + op_name).__doc__`


			`def test_ndarray_compat_properties(index_or_series_obj):`
			`obj = index_or_series_obj`

			`# Check that we work.`
			`for p in ["shape", "dtype", "T", "nbytes"]:`
			`assert getattr(obj, p, None) is not None`

			`# deprecated properties`
			`for p in ["strides", "itemsize", "base", "data"]:`
			`assert not hasattr(obj, p)`

			`msg = "can only convert an array of size 1 to a Python scalar"`
			`with pytest.raises(ValueError, match=msg):`
			`obj.item() # len > 1`

			`assert obj.ndim == 1`
			`assert obj.size == len(obj)`

			`assert Index([1]).item() == 1`
			`assert Series([1]).item() == 1`


			`def test_array_wrap_compat():`
			`# Note: at time of dask 2022.01.0, this is still used by eg dask`
			`# (https://github.com/dask/dask/issues/8580).`
			`# This test is a small dummy ensuring coverage`
			`orig = Series([1, 2, 3], dtype="int64", index=["a", "b", "c"])`
			`with tm.assert_produces_warning(DeprecationWarning):`
			`result = orig.__array_wrap__(np.array([2, 4, 6], dtype="int64"))`
			`expected = orig * 2`
			`tm.assert_series_equal(result, expected)`


			`@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")`
			`def test_memory_usage(index_or_series_obj):`
			`obj = index_or_series_obj`

			`res = obj.memory_usage()`
			`res_deep = obj.memory_usage(deep=True)`

			`is_ser = isinstance(obj, Series)`
			`is_object = is_object_dtype(obj) or (`
			`isinstance(obj, Series) and is_object_dtype(obj.index)`
			`)`
			`is_categorical = is_categorical_dtype(obj.dtype) or (`
			`isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype)`
			`)`
			`is_object_string = is_dtype_equal(obj, "string[python]") or (`
			`is_ser and is_dtype_equal(obj.index.dtype, "string[python]")`
			`)`

			`if len(obj) == 0:`
			`if isinstance(obj, Index):`
			`expected = 0`
			`else:`
			`expected = 108 if IS64 else 64`
			`assert res_deep == res == expected`
			`elif is_object or is_categorical or is_object_string:`
			`# only deep will pick them up`
			`assert res_deep > res`
			`else:`
			`assert res == res_deep`

			`# sys.getsizeof will call the .memory_usage with`
			`# deep=True, and add on some GC overhead`
			`diff = res_deep - sys.getsizeof(obj)`
			`assert abs(diff) < 100`


			`def test_memory_usage_components_series(series_with_simple_index):`
			`series = series_with_simple_index`
			`total_usage = series.memory_usage(index=True)`
			`non_index_usage = series.memory_usage(index=False)`
			`index_usage = series.index.memory_usage()`
			`assert total_usage == non_index_usage + index_usage`


			`@pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)`
			`def test_memory_usage_components_narrow_series(dtype):`
			`series = tm.make_rand_series(name="a", dtype=dtype)`
			`total_usage = series.memory_usage(index=True)`
			`non_index_usage = series.memory_usage(index=False)`
			`index_usage = series.index.memory_usage()`
			`assert total_usage == non_index_usage + index_usage`


			`def test_searchsorted(request, index_or_series_obj):`
			`# numpy.searchsorted calls obj.searchsorted under the hood.`
			`# See gh-12238`
			`obj = index_or_series_obj`

			`if isinstance(obj, pd.MultiIndex):`
			`# See gh-14833`
			`request.node.add_marker(`
			`pytest.mark.xfail(`
			`reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"`
			`)`
			`)`
			`elif obj.dtype.kind == "c" and isinstance(obj, Index):`
			`# TODO: Should Series cases also raise? Looks like they use numpy`
			`# comparison semantics https://github.com/numpy/numpy/issues/15981`
			`mark = pytest.mark.xfail(reason="complex objects are not comparable")`
			`request.node.add_marker(mark)`

			`max_obj = max(obj, default=0)`
			`index = np.searchsorted(obj, max_obj)`
			`assert 0 <= index <= len(obj)`

			`index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))`
			`assert 0 <= index <= len(obj)`


			`def test_access_by_position(index_flat):`
			`index = index_flat`

			`if len(index) == 0:`
			`pytest.skip("Test doesn't make sense on empty data")`

			`series = Series(index)`
			`assert index[0] == series.iloc[0]`
			`assert index[5] == series.iloc[5]`
			`assert index[-1] == series.iloc[-1]`

			`size = len(index)`
			`assert index[-1] == index[size - 1]`

			`msg = f"index {size} is out of bounds for axis 0 with size {size}"`
			`if is_dtype_equal(index.dtype, "string[pyarrow]"):`
			`msg = "index out of bounds"`
			`with pytest.raises(IndexError, match=msg):`
			`index[size]`
			`msg = "single positional indexer is out-of-bounds"`
			`with pytest.raises(IndexError, match=msg):`
			`series.iloc[size]`