aoc-2022/venv/Lib/site-packages/pandas/tests/copy_view/test_methods.py

217 lines
7.7 KiB
Python
Raw Normal View History

import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
def test_copy(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df_copy = df.copy()
# the deep copy doesn't share memory
assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
if using_copy_on_write:
assert df_copy._mgr.refs is None
# mutating copy doesn't mutate original
df_copy.iloc[0, 0] = 0
assert df.iloc[0, 0] == 1
def test_copy_shallow(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df_copy = df.copy(deep=False)
# the shallow copy still shares memory
assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
if using_copy_on_write:
assert df_copy._mgr.refs is not None
if using_copy_on_write:
# mutating shallow copy doesn't mutate original
df_copy.iloc[0, 0] = 0
assert df.iloc[0, 0] == 1
# mutating triggered a copy-on-write -> no longer shares memory
assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
# but still shares memory for the other columns/blocks
assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c"))
else:
# mutating shallow copy does mutate original
df_copy.iloc[0, 0] = 0
assert df.iloc[0, 0] == 0
# and still shares memory
assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
# -----------------------------------------------------------------------------
# DataFrame methods returning new DataFrame using shallow copy
def test_reset_index(using_copy_on_write):
# Case: resetting the index (i.e. adding a new column) + mutating the
# resulting dataframe
df = DataFrame(
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12]
)
df_orig = df.copy()
df2 = df.reset_index()
df2._mgr._verify_integrity()
if using_copy_on_write:
# still shares memory (df2 is a shallow copy)
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
# mutating df2 triggers a copy-on-write for that column / block
df2.iloc[0, 2] = 0
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
tm.assert_frame_equal(df, df_orig)
def test_rename_columns(using_copy_on_write):
# Case: renaming columns returns a new dataframe
# + afterwards modifying the result
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df_orig = df.copy()
df2 = df.rename(columns=str.upper)
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
df2.iloc[0, 0] = 0
assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]})
tm.assert_frame_equal(df2, expected)
tm.assert_frame_equal(df, df_orig)
def test_rename_columns_modify_parent(using_copy_on_write):
# Case: renaming columns returns a new dataframe
# + afterwards modifying the original (parent) dataframe
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df2 = df.rename(columns=str.upper)
df2_orig = df2.copy()
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
df.iloc[0, 0] = 0
assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
expected = DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
tm.assert_frame_equal(df, expected)
tm.assert_frame_equal(df2, df2_orig)
def test_reindex_columns(using_copy_on_write):
# Case: reindexing the column returns a new dataframe
# + afterwards modifying the result
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df_orig = df.copy()
df2 = df.reindex(columns=["a", "c"])
if using_copy_on_write:
# still shares memory (df2 is a shallow copy)
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
# mutating df2 triggers a copy-on-write for that column
df2.iloc[0, 0] = 0
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
tm.assert_frame_equal(df, df_orig)
def test_select_dtypes(using_copy_on_write):
# Case: selecting columns using `select_dtypes()` returns a new dataframe
# + afterwards modifying the result
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df_orig = df.copy()
df2 = df.select_dtypes("int64")
df2._mgr._verify_integrity()
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
# mutating df2 triggers a copy-on-write for that column/block
df2.iloc[0, 0] = 0
if using_copy_on_write:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)
def test_to_frame(using_copy_on_write):
# Case: converting a Series to a DataFrame with to_frame
ser = Series([1, 2, 3])
ser_orig = ser.copy()
df = ser[:].to_frame()
# currently this always returns a "view"
assert np.shares_memory(ser.values, get_array(df, 0))
df.iloc[0, 0] = 0
if using_copy_on_write:
# mutating df triggers a copy-on-write for that column
assert not np.shares_memory(ser.values, get_array(df, 0))
tm.assert_series_equal(ser, ser_orig)
else:
# but currently select_dtypes() actually returns a view -> mutates parent
expected = ser_orig.copy()
expected.iloc[0] = 0
tm.assert_series_equal(ser, expected)
# modify original series -> don't modify dataframe
df = ser[:].to_frame()
ser.iloc[0] = 0
if using_copy_on_write:
tm.assert_frame_equal(df, ser_orig.to_frame())
else:
expected = ser_orig.copy().to_frame()
expected.iloc[0, 0] = 0
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"method, idx",
[
(lambda df: df.copy(deep=False).copy(deep=False), 0),
(lambda df: df.reset_index().reset_index(), 2),
(lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0),
(lambda df: df.copy(deep=False).select_dtypes(include="number"), 0),
],
ids=["shallow-copy", "reset_index", "rename", "select_dtypes"],
)
def test_chained_methods(request, method, idx, using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
df_orig = df.copy()
# when not using CoW, only the copy() variant actually gives a view
df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy"
# modify df2 -> don't modify df
df2 = method(df)
df2.iloc[0, idx] = 0
if not df2_is_view:
tm.assert_frame_equal(df, df_orig)
# modify df -> don't modify df2
df2 = method(df)
df.iloc[0, 0] = 0
if not df2_is_view:
tm.assert_frame_equal(df2.iloc[:, idx:], df_orig)