import numpy as np import pytest import pandas as pd from pandas import ( DataFrame, Series, Timestamp, date_range, timedelta_range, ) import pandas._testing as tm class TestDataFrameAppend: @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): obj = multiindex_dataframe_random_data obj = tm.get_obj(obj, frame_or_series) a = obj[:5] b = obj[5:] result = a.append(b) tm.assert_equal(result, obj) def test_append_empty_list(self): # GH 28769 df = DataFrame() result = df._append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) result = df._append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df # ._append() should return a new object def test_append_series_dict(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) series = df.loc[4] msg = "Indexes have overlapping values" with pytest.raises(ValueError, match=msg): df._append(series, verify_integrity=True) series.name = None msg = "Can only append a Series if ignore_index=True" with pytest.raises(TypeError, match=msg): df._append(series, verify_integrity=True) result = df._append(series[::-1], ignore_index=True) expected = df._append( DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True ) tm.assert_frame_equal(result, expected) # dict result = df._append(series.to_dict(), ignore_index=True) tm.assert_frame_equal(result, expected) result = df._append(series[::-1][:3], ignore_index=True) expected = df._append( DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True ) tm.assert_frame_equal(result, expected.loc[:, result.columns]) msg = "Can only append a dict if ignore_index=True" with pytest.raises(TypeError, match=msg): df._append(series.to_dict()) # can append when name set row = df.loc[4] row.name = 5 result = df._append(row) expected = df._append(df[-1:], ignore_index=True) tm.assert_frame_equal(result, expected) def test_append_list_of_series_dicts(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [x.to_dict() for idx, x in df.iterrows()] result = df._append(dicts, ignore_index=True) expected = df._append(df, ignore_index=True) tm.assert_frame_equal(result, expected) # different columns dicts = [ {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, ] result = df._append(dicts, ignore_index=True, sort=True) expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_list_retain_index_name(self): df = DataFrame( [[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname") ) serc = Series([5, 6], name="c") expected = DataFrame( [[1, 2], [3, 4], [5, 6]], index=pd.Index(["a", "b", "c"], name="keepthisname"), ) # append series result = df._append(serc) tm.assert_frame_equal(result, expected) # append list of series result = df._append([serc]) tm.assert_frame_equal(result, expected) def test_append_missing_cols(self): # GH22252 # exercise the conditional branch in append method where the data # to be appended is a list and does not contain all columns that are in # the target DataFrame df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [{"foo": 9}, {"bar": 10}] result = df._append(dicts, ignore_index=True, sort=True) expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_empty_dataframe(self): # Empty df append empty df df1 = DataFrame() df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-empty df append empty df df1 = DataFrame(np.random.randn(5, 2)) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Empty df with columns append empty df df1 = DataFrame(columns=["bar", "foo"]) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-Empty df with columns append empty df df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) def test_append_dtypes(self, using_array_manager): # GH 5754 # row appends of different dtypes (so need to do by-item) # can sometimes infer the correct type df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) result = df1._append(df2) expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) result = df1._append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) if using_array_manager: # TODO(ArrayManager) decide on exact casting rules in concat # With ArrayManager, all-NaN float is not ignored expected = expected.astype(object) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) result = df1._append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) if using_array_manager: # With ArrayManager, all-NaN float is not ignored expected = expected.astype(object) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": np.nan}, index=range(1)) df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) result = df1._append(df2) expected = DataFrame( {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} ) if using_array_manager: # With ArrayManager, all-NaN float is not ignored expected = expected.astype(object) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) result = df1._append(df2) expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] ) def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 tz = tz_naive_fixture df = DataFrame([Timestamp(timestamp, tz=tz)]) result = df._append(df.iloc[0]).iloc[-1] expected = Series(Timestamp(timestamp, tz=tz), name=0) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "data, dtype", [ ([1], pd.Int64Dtype()), ([1], pd.CategoricalDtype()), ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), ([1], pd.SparseDtype()), ], ) def test_other_dtypes(self, data, dtype, using_array_manager): df = DataFrame(data, dtype=dtype) warn = None if using_array_manager and isinstance(dtype, pd.SparseDtype): warn = FutureWarning with tm.assert_produces_warning(warn, match="astype from SparseDtype"): result = df._append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) def test_append_numpy_bug_1681(self, dtype): # another datetime64 bug if dtype == "datetime64[ns]": index = date_range("2011/1/1", "2012/1/1", freq="W-FRI") else: index = timedelta_range("1 days", "10 days", freq="2D") df = DataFrame() other = DataFrame({"A": "foo", "B": index}, index=index) result = df._append(other) assert (result["B"] == index).all() @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") def test_multiindex_column_append_multiple(self): # GH 29699 df = DataFrame( [[1, 11], [2, 12], [3, 13]], columns=pd.MultiIndex.from_tuples( [("multi", "col1"), ("multi", "col2")], names=["level1", None] ), ) df2 = df.copy() for i in range(1, 10): df[i, "colA"] = 10 df = df._append(df2, ignore_index=True) result = df["multi"] expected = DataFrame( {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} ) tm.assert_frame_equal(result, expected) def test_append_raises_future_warning(self): # GH#35407 df1 = DataFrame([[1, 2], [3, 4]]) df2 = DataFrame([[5, 6], [7, 8]]) with tm.assert_produces_warning(FutureWarning): df1.append(df2)