aoc-2022/venv/Lib/site-packages/pandas/tests/arrays/integer/test_construction.py

import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm
from pandas.api.types import is_integer
from pandas.core.arrays import IntegerArray
from pandas.core.arrays.integer import (
    Int8Dtype,
    Int32Dtype,
    Int64Dtype,
)


@pytest.fixture(params=[pd.array, IntegerArray._from_sequence])
def constructor(request):
    """Fixture returning parametrized IntegerArray from given sequence.

    Used to test dtype conversions.
    """
    return request.param


def test_uses_pandas_na():
    a = pd.array([1, None], dtype=Int64Dtype())
    assert a[1] is pd.NA


def test_from_dtype_from_float(data):
    # construct from our dtype & string dtype
    dtype = data.dtype

    # from float
    expected = pd.Series(data)
    result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
    tm.assert_series_equal(result, expected)

    # from int / list
    expected = pd.Series(data)
    result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
    tm.assert_series_equal(result, expected)

    # from int / array
    expected = pd.Series(data).dropna().reset_index(drop=True)
    dropped = np.array(data.dropna()).astype(np.dtype(dtype.type))
    result = pd.Series(dropped, dtype=str(dtype))
    tm.assert_series_equal(result, expected)


def test_conversions(data_missing):
    # astype to object series
    df = pd.DataFrame({"A": data_missing})
    result = df["A"].astype("object")
    expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A")
    tm.assert_series_equal(result, expected)

    # convert to object ndarray
    # we assert that we are exactly equal
    # including type conversions of scalars
    result = df["A"].astype("object").values
    expected = np.array([pd.NA, 1], dtype=object)
    tm.assert_numpy_array_equal(result, expected)

    for r, e in zip(result, expected):
        if pd.isnull(r):
            assert pd.isnull(e)
        elif is_integer(r):
            assert r == e
            assert is_integer(e)
        else:
            assert r == e
            assert type(r) == type(e)


def test_integer_array_constructor():
    values = np.array([1, 2, 3, 4], dtype="int64")
    mask = np.array([False, False, False, True], dtype="bool")

    result = IntegerArray(values, mask)
    expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
    with pytest.raises(TypeError, match=msg):
        IntegerArray(values.tolist(), mask)

    with pytest.raises(TypeError, match=msg):
        IntegerArray(values, mask.tolist())

    with pytest.raises(TypeError, match=msg):
        IntegerArray(values.astype(float), mask)
    msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
    with pytest.raises(TypeError, match=msg):
        IntegerArray(values)


def test_integer_array_constructor_copy():
    values = np.array([1, 2, 3, 4], dtype="int64")
    mask = np.array([False, False, False, True], dtype="bool")

    result = IntegerArray(values, mask)
    assert result._data is values
    assert result._mask is mask

    result = IntegerArray(values, mask, copy=True)
    assert result._data is not values
    assert result._mask is not mask


@pytest.mark.parametrize(
    "a, b",
    [
        ([1, None], [1, np.nan]),
        ([None], [np.nan]),
        ([None, np.nan], [np.nan, np.nan]),
        ([np.nan, np.nan], [np.nan, np.nan]),
    ],
)
def test_to_integer_array_none_is_nan(a, b):
    result = pd.array(a, dtype="Int64")
    expected = pd.array(b, dtype="Int64")
    tm.assert_extension_array_equal(result, expected)


@pytest.mark.parametrize(
    "values",
    [
        ["foo", "bar"],
        "foo",
        1,
        1.0,
        pd.date_range("20130101", periods=2),
        np.array(["foo"]),
        [[1, 2], [3, 4]],
        [np.nan, {"a": 1}],
    ],
)
def test_to_integer_array_error(values):
    # error in converting existing arrays to IntegerArrays
    msg = "|".join(
        [
            r"cannot be converted to IntegerDtype",
            r"invalid literal for int\(\) with base 10:",
            r"values must be a 1D list-like",
            r"Cannot pass scalar",
            r"int\(\) argument must be a string",
        ]
    )
    with pytest.raises((ValueError, TypeError), match=msg):
        pd.array(values, dtype="Int64")

    with pytest.raises((ValueError, TypeError), match=msg):
        IntegerArray._from_sequence(values)


def test_to_integer_array_inferred_dtype(constructor):
    # if values has dtype -> respect it
    result = constructor(np.array([1, 2], dtype="int8"))
    assert result.dtype == Int8Dtype()
    result = constructor(np.array([1, 2], dtype="int32"))
    assert result.dtype == Int32Dtype()

    # if values have no dtype -> always int64
    result = constructor([1, 2])
    assert result.dtype == Int64Dtype()


def test_to_integer_array_dtype_keyword(constructor):
    result = constructor([1, 2], dtype="Int8")
    assert result.dtype == Int8Dtype()

    # if values has dtype -> override it
    result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32")
    assert result.dtype == Int32Dtype()


def test_to_integer_array_float():
    result = IntegerArray._from_sequence([1.0, 2.0])
    expected = pd.array([1, 2], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
        IntegerArray._from_sequence([1.5, 2.0])

    # for float dtypes, the itemsize is not preserved
    result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32"))
    assert result.dtype == Int64Dtype()


def test_to_integer_array_str():
    result = IntegerArray._from_sequence(["1", "2", None])
    expected = pd.array([1, 2, np.nan], dtype="Int64")
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(
        ValueError, match=r"invalid literal for int\(\) with base 10: .*"
    ):
        IntegerArray._from_sequence(["1", "2", ""])

    with pytest.raises(
        ValueError, match=r"invalid literal for int\(\) with base 10: .*"
    ):
        IntegerArray._from_sequence(["1.5", "2.0"])


@pytest.mark.parametrize(
    "bool_values, int_values, target_dtype, expected_dtype",
    [
        ([False, True], [0, 1], Int64Dtype(), Int64Dtype()),
        ([False, True], [0, 1], "Int64", Int64Dtype()),
        ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),
    ],
)
def test_to_integer_array_bool(
    constructor, bool_values, int_values, target_dtype, expected_dtype
):
    result = constructor(bool_values, dtype=target_dtype)
    assert result.dtype == expected_dtype
    expected = pd.array(int_values, dtype=target_dtype)
    tm.assert_extension_array_equal(result, expected)


@pytest.mark.parametrize(
    "values, to_dtype, result_dtype",
    [
        (np.array([1], dtype="int64"), None, Int64Dtype),
        (np.array([1, np.nan]), None, Int64Dtype),
        (np.array([1, np.nan]), "int8", Int8Dtype),
    ],
)
def test_to_integer_array(values, to_dtype, result_dtype):
    # convert existing arrays to IntegerArrays
    result = IntegerArray._from_sequence(values, dtype=to_dtype)
    assert result.dtype == result_dtype()
    expected = pd.array(values, dtype=result_dtype())
    tm.assert_extension_array_equal(result, expected)
Give it up for day 1 of smooth brain shenanigans! https://pbs.twimg.com/media/E1bxikWWEAEGYHU.png 2022-12-01 16:50:29 +00:00			`import numpy as np`
			`import pytest`

			`import pandas as pd`
			`import pandas._testing as tm`
			`from pandas.api.types import is_integer`
			`from pandas.core.arrays import IntegerArray`
			`from pandas.core.arrays.integer import (`
			`Int8Dtype,`
			`Int32Dtype,`
			`Int64Dtype,`
			`)`


			`@pytest.fixture(params=[pd.array, IntegerArray._from_sequence])`
			`def constructor(request):`
			`"""Fixture returning parametrized IntegerArray from given sequence.`

			`Used to test dtype conversions.`
			`"""`
			`return request.param`


			`def test_uses_pandas_na():`
			`a = pd.array([1, None], dtype=Int64Dtype())`
			`assert a[1] is pd.NA`


			`def test_from_dtype_from_float(data):`
			`# construct from our dtype & string dtype`
			`dtype = data.dtype`

			`# from float`
			`expected = pd.Series(data)`
			`result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))`
			`tm.assert_series_equal(result, expected)`

			`# from int / list`
			`expected = pd.Series(data)`
			`result = pd.Series(np.array(data).tolist(), dtype=str(dtype))`
			`tm.assert_series_equal(result, expected)`

			`# from int / array`
			`expected = pd.Series(data).dropna().reset_index(drop=True)`
			`dropped = np.array(data.dropna()).astype(np.dtype(dtype.type))`
			`result = pd.Series(dropped, dtype=str(dtype))`
			`tm.assert_series_equal(result, expected)`


			`def test_conversions(data_missing):`
			`# astype to object series`
			`df = pd.DataFrame({"A": data_missing})`
			`result = df["A"].astype("object")`
			`expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A")`
			`tm.assert_series_equal(result, expected)`

			`# convert to object ndarray`
			`# we assert that we are exactly equal`
			`# including type conversions of scalars`
			`result = df["A"].astype("object").values`
			`expected = np.array([pd.NA, 1], dtype=object)`
			`tm.assert_numpy_array_equal(result, expected)`

			`for r, e in zip(result, expected):`
			`if pd.isnull(r):`
			`assert pd.isnull(e)`
			`elif is_integer(r):`
			`assert r == e`
			`assert is_integer(e)`
			`else:`
			`assert r == e`
			`assert type(r) == type(e)`


			`def test_integer_array_constructor():`
			`values = np.array([1, 2, 3, 4], dtype="int64")`
			`mask = np.array([False, False, False, True], dtype="bool")`

			`result = IntegerArray(values, mask)`
			`expected = pd.array([1, 2, 3, np.nan], dtype="Int64")`
			`tm.assert_extension_array_equal(result, expected)`

			`msg = r".* should be .* numpy array. Use the 'pd.array' function instead"`
			`with pytest.raises(TypeError, match=msg):`
			`IntegerArray(values.tolist(), mask)`

			`with pytest.raises(TypeError, match=msg):`
			`IntegerArray(values, mask.tolist())`

			`with pytest.raises(TypeError, match=msg):`
			`IntegerArray(values.astype(float), mask)`
			`msg = r"__init__\(\) missing 1 required positional argument: 'mask'"`
			`with pytest.raises(TypeError, match=msg):`
			`IntegerArray(values)`


			`def test_integer_array_constructor_copy():`
			`values = np.array([1, 2, 3, 4], dtype="int64")`
			`mask = np.array([False, False, False, True], dtype="bool")`

			`result = IntegerArray(values, mask)`
			`assert result._data is values`
			`assert result._mask is mask`

			`result = IntegerArray(values, mask, copy=True)`
			`assert result._data is not values`
			`assert result._mask is not mask`


			`@pytest.mark.parametrize(`
			`"a, b",`
			`[`
			`([1, None], [1, np.nan]),`
			`([None], [np.nan]),`
			`([None, np.nan], [np.nan, np.nan]),`
			`([np.nan, np.nan], [np.nan, np.nan]),`
			`],`
			`)`
			`def test_to_integer_array_none_is_nan(a, b):`
			`result = pd.array(a, dtype="Int64")`
			`expected = pd.array(b, dtype="Int64")`
			`tm.assert_extension_array_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"values",`
			`[`
			`["foo", "bar"],`
			`"foo",`
			`1,`
			`1.0,`
			`pd.date_range("20130101", periods=2),`
			`np.array(["foo"]),`
			`[[1, 2], [3, 4]],`
			`[np.nan, {"a": 1}],`
			`],`
			`)`
			`def test_to_integer_array_error(values):`
			`# error in converting existing arrays to IntegerArrays`
			`msg = "\|".join(`
			`[`
			`r"cannot be converted to IntegerDtype",`
			`r"invalid literal for int\(\) with base 10:",`
			`r"values must be a 1D list-like",`
			`r"Cannot pass scalar",`
			`r"int\(\) argument must be a string",`
			`]`
			`)`
			`with pytest.raises((ValueError, TypeError), match=msg):`
			`pd.array(values, dtype="Int64")`

			`with pytest.raises((ValueError, TypeError), match=msg):`
			`IntegerArray._from_sequence(values)`


			`def test_to_integer_array_inferred_dtype(constructor):`
			`# if values has dtype -> respect it`
			`result = constructor(np.array([1, 2], dtype="int8"))`
			`assert result.dtype == Int8Dtype()`
			`result = constructor(np.array([1, 2], dtype="int32"))`
			`assert result.dtype == Int32Dtype()`

			`# if values have no dtype -> always int64`
			`result = constructor([1, 2])`
			`assert result.dtype == Int64Dtype()`


			`def test_to_integer_array_dtype_keyword(constructor):`
			`result = constructor([1, 2], dtype="Int8")`
			`assert result.dtype == Int8Dtype()`

			`# if values has dtype -> override it`
			`result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32")`
			`assert result.dtype == Int32Dtype()`


			`def test_to_integer_array_float():`
			`result = IntegerArray._from_sequence([1.0, 2.0])`
			`expected = pd.array([1, 2], dtype="Int64")`
			`tm.assert_extension_array_equal(result, expected)`

			`with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):`
			`IntegerArray._from_sequence([1.5, 2.0])`

			`# for float dtypes, the itemsize is not preserved`
			`result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32"))`
			`assert result.dtype == Int64Dtype()`


			`def test_to_integer_array_str():`
			`result = IntegerArray._from_sequence(["1", "2", None])`
			`expected = pd.array([1, 2, np.nan], dtype="Int64")`
			`tm.assert_extension_array_equal(result, expected)`

			`with pytest.raises(`
			`ValueError, match=r"invalid literal for int\(\) with base 10: .*"`
			`):`
			`IntegerArray._from_sequence(["1", "2", ""])`

			`with pytest.raises(`
			`ValueError, match=r"invalid literal for int\(\) with base 10: .*"`
			`):`
			`IntegerArray._from_sequence(["1.5", "2.0"])`


			`@pytest.mark.parametrize(`
			`"bool_values, int_values, target_dtype, expected_dtype",`
			`[`
			`([False, True], [0, 1], Int64Dtype(), Int64Dtype()),`
			`([False, True], [0, 1], "Int64", Int64Dtype()),`
			`([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),`
			`],`
			`)`
			`def test_to_integer_array_bool(`
			`constructor, bool_values, int_values, target_dtype, expected_dtype`
			`):`
			`result = constructor(bool_values, dtype=target_dtype)`
			`assert result.dtype == expected_dtype`
			`expected = pd.array(int_values, dtype=target_dtype)`
			`tm.assert_extension_array_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"values, to_dtype, result_dtype",`
			`[`
			`(np.array([1], dtype="int64"), None, Int64Dtype),`
			`(np.array([1, np.nan]), None, Int64Dtype),`
			`(np.array([1, np.nan]), "int8", Int8Dtype),`
			`],`
			`)`
			`def test_to_integer_array(values, to_dtype, result_dtype):`
			`# convert existing arrays to IntegerArrays`
			`result = IntegerArray._from_sequence(values, dtype=to_dtype)`
			`assert result.dtype == result_dtype()`
			`expected = pd.array(values, dtype=result_dtype())`
			`tm.assert_extension_array_equal(result, expected)`