293 lines
10 KiB
Python
293 lines
10 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Series,
|
||
|
Timestamp,
|
||
|
date_range,
|
||
|
timedelta_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestDataFrameAppend:
|
||
|
@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning")
|
||
|
def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series):
|
||
|
obj = multiindex_dataframe_random_data
|
||
|
obj = tm.get_obj(obj, frame_or_series)
|
||
|
|
||
|
a = obj[:5]
|
||
|
b = obj[5:]
|
||
|
|
||
|
result = a.append(b)
|
||
|
tm.assert_equal(result, obj)
|
||
|
|
||
|
def test_append_empty_list(self):
|
||
|
# GH 28769
|
||
|
df = DataFrame()
|
||
|
result = df._append([])
|
||
|
expected = df
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
assert result is not df
|
||
|
|
||
|
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||
|
result = df._append([])
|
||
|
expected = df
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
assert result is not df # ._append() should return a new object
|
||
|
|
||
|
def test_append_series_dict(self):
|
||
|
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||
|
|
||
|
series = df.loc[4]
|
||
|
msg = "Indexes have overlapping values"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
df._append(series, verify_integrity=True)
|
||
|
|
||
|
series.name = None
|
||
|
msg = "Can only append a Series if ignore_index=True"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
df._append(series, verify_integrity=True)
|
||
|
|
||
|
result = df._append(series[::-1], ignore_index=True)
|
||
|
expected = df._append(
|
||
|
DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# dict
|
||
|
result = df._append(series.to_dict(), ignore_index=True)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df._append(series[::-1][:3], ignore_index=True)
|
||
|
expected = df._append(
|
||
|
DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected.loc[:, result.columns])
|
||
|
|
||
|
msg = "Can only append a dict if ignore_index=True"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
df._append(series.to_dict())
|
||
|
|
||
|
# can append when name set
|
||
|
row = df.loc[4]
|
||
|
row.name = 5
|
||
|
result = df._append(row)
|
||
|
expected = df._append(df[-1:], ignore_index=True)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_append_list_of_series_dicts(self):
|
||
|
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||
|
|
||
|
dicts = [x.to_dict() for idx, x in df.iterrows()]
|
||
|
|
||
|
result = df._append(dicts, ignore_index=True)
|
||
|
expected = df._append(df, ignore_index=True)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# different columns
|
||
|
dicts = [
|
||
|
{"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4},
|
||
|
{"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8},
|
||
|
]
|
||
|
result = df._append(dicts, ignore_index=True, sort=True)
|
||
|
expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_append_list_retain_index_name(self):
|
||
|
df = DataFrame(
|
||
|
[[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname")
|
||
|
)
|
||
|
|
||
|
serc = Series([5, 6], name="c")
|
||
|
|
||
|
expected = DataFrame(
|
||
|
[[1, 2], [3, 4], [5, 6]],
|
||
|
index=pd.Index(["a", "b", "c"], name="keepthisname"),
|
||
|
)
|
||
|
|
||
|
# append series
|
||
|
result = df._append(serc)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# append list of series
|
||
|
result = df._append([serc])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_append_missing_cols(self):
|
||
|
# GH22252
|
||
|
# exercise the conditional branch in append method where the data
|
||
|
# to be appended is a list and does not contain all columns that are in
|
||
|
# the target DataFrame
|
||
|
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||
|
|
||
|
dicts = [{"foo": 9}, {"bar": 10}]
|
||
|
result = df._append(dicts, ignore_index=True, sort=True)
|
||
|
|
||
|
expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_append_empty_dataframe(self):
|
||
|
|
||
|
# Empty df append empty df
|
||
|
df1 = DataFrame()
|
||
|
df2 = DataFrame()
|
||
|
result = df1._append(df2)
|
||
|
expected = df1.copy()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# Non-empty df append empty df
|
||
|
df1 = DataFrame(np.random.randn(5, 2))
|
||
|
df2 = DataFrame()
|
||
|
result = df1._append(df2)
|
||
|
expected = df1.copy()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# Empty df with columns append empty df
|
||
|
df1 = DataFrame(columns=["bar", "foo"])
|
||
|
df2 = DataFrame()
|
||
|
result = df1._append(df2)
|
||
|
expected = df1.copy()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# Non-Empty df with columns append empty df
|
||
|
df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
|
||
|
df2 = DataFrame()
|
||
|
result = df1._append(df2)
|
||
|
expected = df1.copy()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_append_dtypes(self, using_array_manager):
|
||
|
|
||
|
# GH 5754
|
||
|
# row appends of different dtypes (so need to do by-item)
|
||
|
# can sometimes infer the correct type
|
||
|
|
||
|
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
|
||
|
df2 = DataFrame()
|
||
|
result = df1._append(df2)
|
||
|
expected = df1.copy()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||
|
df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
|
||
|
result = df1._append(df2)
|
||
|
expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||
|
df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
|
||
|
result = df1._append(df2)
|
||
|
expected = DataFrame(
|
||
|
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
|
||
|
)
|
||
|
if using_array_manager:
|
||
|
# TODO(ArrayManager) decide on exact casting rules in concat
|
||
|
# With ArrayManager, all-NaN float is not ignored
|
||
|
expected = expected.astype(object)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||
|
df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
|
||
|
result = df1._append(df2)
|
||
|
expected = DataFrame(
|
||
|
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
|
||
|
)
|
||
|
if using_array_manager:
|
||
|
# With ArrayManager, all-NaN float is not ignored
|
||
|
expected = expected.astype(object)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df1 = DataFrame({"bar": np.nan}, index=range(1))
|
||
|
df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
|
||
|
result = df1._append(df2)
|
||
|
expected = DataFrame(
|
||
|
{"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
|
||
|
)
|
||
|
if using_array_manager:
|
||
|
# With ArrayManager, all-NaN float is not ignored
|
||
|
expected = expected.astype(object)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||
|
df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
|
||
|
result = df1._append(df2)
|
||
|
expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"]
|
||
|
)
|
||
|
def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
|
||
|
# GH 30238
|
||
|
tz = tz_naive_fixture
|
||
|
df = DataFrame([Timestamp(timestamp, tz=tz)])
|
||
|
result = df._append(df.iloc[0]).iloc[-1]
|
||
|
expected = Series(Timestamp(timestamp, tz=tz), name=0)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"data, dtype",
|
||
|
[
|
||
|
([1], pd.Int64Dtype()),
|
||
|
([1], pd.CategoricalDtype()),
|
||
|
([pd.Interval(left=0, right=5)], pd.IntervalDtype()),
|
||
|
([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")),
|
||
|
([1], pd.SparseDtype()),
|
||
|
],
|
||
|
)
|
||
|
def test_other_dtypes(self, data, dtype, using_array_manager):
|
||
|
df = DataFrame(data, dtype=dtype)
|
||
|
|
||
|
warn = None
|
||
|
if using_array_manager and isinstance(dtype, pd.SparseDtype):
|
||
|
warn = FutureWarning
|
||
|
|
||
|
with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
|
||
|
result = df._append(df.iloc[0]).iloc[-1]
|
||
|
|
||
|
expected = Series(data, name=0, dtype=dtype)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||
|
def test_append_numpy_bug_1681(self, dtype):
|
||
|
# another datetime64 bug
|
||
|
if dtype == "datetime64[ns]":
|
||
|
index = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
|
||
|
else:
|
||
|
index = timedelta_range("1 days", "10 days", freq="2D")
|
||
|
|
||
|
df = DataFrame()
|
||
|
other = DataFrame({"A": "foo", "B": index}, index=index)
|
||
|
|
||
|
result = df._append(other)
|
||
|
assert (result["B"] == index).all()
|
||
|
|
||
|
@pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning")
|
||
|
def test_multiindex_column_append_multiple(self):
|
||
|
# GH 29699
|
||
|
df = DataFrame(
|
||
|
[[1, 11], [2, 12], [3, 13]],
|
||
|
columns=pd.MultiIndex.from_tuples(
|
||
|
[("multi", "col1"), ("multi", "col2")], names=["level1", None]
|
||
|
),
|
||
|
)
|
||
|
df2 = df.copy()
|
||
|
for i in range(1, 10):
|
||
|
df[i, "colA"] = 10
|
||
|
df = df._append(df2, ignore_index=True)
|
||
|
result = df["multi"]
|
||
|
expected = DataFrame(
|
||
|
{"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_append_raises_future_warning(self):
|
||
|
# GH#35407
|
||
|
df1 = DataFrame([[1, 2], [3, 4]])
|
||
|
df2 = DataFrame([[5, 6], [7, 8]])
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
df1.append(df2)
|