aoc-2022/venv/Lib/site-packages/pandas/tests/frame/indexing/test_coercion.py

189 lines
5.5 KiB
Python
Raw Normal View History

"""
Tests for values coercion in setitem-like operations on DataFrame.
For the most part, these should be multi-column DataFrames, otherwise
we would share the tests with Series.
"""
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDataFrameSetitemCoercion:
@pytest.mark.xfail(reason="Unnecessary cast.")
@pytest.mark.parametrize("consolidate", [True, False])
def test_loc_setitem_multiindex_columns(self, consolidate):
# GH#18415 Setting values in a single column preserves dtype,
# while setting them in multiple columns did unwanted cast.
# Note that A here has 2 blocks, below we do the same thing
# with a consolidated frame.
A = DataFrame(np.zeros((6, 5), dtype=np.float32))
A = pd.concat([A, A], axis=1, keys=[1, 2])
if consolidate:
A = A._consolidate()
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()
msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()
A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()
# TODO: i think this isn't about MultiIndex and could be done with iloc?
def test_37477():
# fixed by GH#45121
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
df = orig.copy()
df.at[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)
df = orig.copy()
df.loc[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)
df = orig.copy()
df.iat[1, 1] = 1.2
tm.assert_frame_equal(df, expected)
df = orig.copy()
df.iloc[1, 1] = 1.2
tm.assert_frame_equal(df, expected)
def test_6942(indexer_al):
# check that the .at __setitem__ after setting "Live" actually sets the data
start = Timestamp("2014-04-01")
t1 = Timestamp("2014-04-23 12:42:38.883082")
t2 = Timestamp("2014-04-24 01:33:30.040039")
dti = date_range(start, periods=1)
orig = DataFrame(index=dti, columns=["timenow", "Live"])
df = orig.copy()
indexer_al(df)[start, "timenow"] = t1
df["Live"] = True
df.at[start, "timenow"] = t2
assert df.iloc[0, 0] == t2
def test_26395(indexer_al):
# .at case fixed by GH#45121 (best guess)
df = DataFrame(index=["A", "B", "C"])
df["D"] = 0
indexer_al(df)["C", "D"] = 2
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
tm.assert_frame_equal(df, expected)
indexer_al(df)["C", "D"] = 44.5
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
tm.assert_frame_equal(df, expected)
indexer_al(df)["C", "D"] = "hello"
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
tm.assert_frame_equal(df, expected)
@pytest.mark.xfail(reason="unwanted upcast")
def test_15231():
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
df.loc[2] = Series({"a": 5, "b": 6})
assert (df.dtypes == np.int64).all()
df.loc[3] = Series({"a": 7})
# df["a"] doesn't have any NaNs, should not have been cast
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
tm.assert_series_equal(df.dtypes, exp_dtypes)
@pytest.mark.xfail(reason="Unnecessarily upcasts to float64")
def test_iloc_setitem_unnecesssary_float_upcasting():
# GH#12255
df = DataFrame(
{
0: np.array([1, 3], dtype=np.float32),
1: np.array([2, 4], dtype=np.float32),
2: ["a", "b"],
}
)
orig = df.copy()
values = df[0].values.reshape(2, 1)
msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.iloc[:, 0:1] = values
tm.assert_frame_equal(df, orig)
@pytest.mark.xfail(reason="unwanted casting to dt64")
def test_12499():
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
# which has consequences for the expected df["two"] (though i think at
# the time it might not have because of a separate bug). See if it makes
# a difference which one we use here.
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
data = [{"one": 0, "two": ts}]
orig = DataFrame(data)
df = orig.copy()
df.loc[1] = [np.nan, NaT]
expected = DataFrame(
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
)
tm.assert_frame_equal(df, expected)
data = [{"one": 0, "two": ts}]
df = orig.copy()
df.loc[1, :] = [np.nan, NaT]
tm.assert_frame_equal(df, expected)
def test_20476():
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = DataFrame(-1, index=range(3), columns=mi)
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
df["A"] = filler
expected = DataFrame(
{
0: [1, 1, 1],
1: [2, 2, 2],
2: [3.0, 3.0, 3.0],
3: [-1, -1, -1],
4: [-1, -1, -1],
5: [-1, -1, -1],
}
)
expected.columns = mi
exp_dtypes = Series(
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
index=mi,
)
tm.assert_series_equal(df.dtypes, exp_dtypes)