venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,69 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: DatetimeIndex(x),
|
||||
lambda x: DatetimeIndex(DatetimeIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_datetimeindex(using_copy_on_write, cons):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_tz_convert(using_copy_on_write):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D", tz="Europe/Berlin")
|
||||
ser = Series(dt)
|
||||
idx = DatetimeIndex(ser).tz_convert("US/Eastern")
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31", tz="Europe/Berlin")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_tz_localize(using_copy_on_write):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = DatetimeIndex(ser).tz_localize("Europe/Berlin")
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_isocalendar(using_copy_on_write):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
df = DatetimeIndex(ser).isocalendar()
|
||||
expected = df.index.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_index_values(using_copy_on_write):
|
||||
idx = date_range("2019-12-31", periods=3, freq="D")
|
||||
result = idx.values
|
||||
if using_copy_on_write:
|
||||
assert result.flags.writeable is False
|
||||
else:
|
||||
assert result.flags.writeable is True
|
@ -0,0 +1,184 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def index_view(index_data=[1, 2]):
|
||||
df = DataFrame({"a": index_data, "b": 1.5})
|
||||
view = df[:]
|
||||
df = df.set_index("a", drop=True)
|
||||
idx = df.index
|
||||
# df = None
|
||||
return idx, view
|
||||
|
||||
|
||||
def test_set_index_update_column(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1})
|
||||
df = df.set_index("a", drop=False)
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 2], name="a"))
|
||||
|
||||
|
||||
def test_set_index_drop_update_column(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
view = df[:]
|
||||
df = df.set_index("a", drop=True)
|
||||
expected = df.index.copy(deep=True)
|
||||
view.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_set_index_series(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
df = df.set_index(ser)
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 11]))
|
||||
|
||||
|
||||
def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
df.index = ser
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 11]))
|
||||
|
||||
|
||||
def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
rhs_index = Index(ser)
|
||||
df.index = rhs_index
|
||||
rhs_index = None # overwrite to clear reference
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 11]))
|
||||
|
||||
|
||||
def test_index_from_series(using_copy_on_write, warn_copy_on_write):
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
else:
|
||||
tm.assert_index_equal(idx, Index([100, 2]))
|
||||
|
||||
|
||||
def test_index_from_series_copy(using_copy_on_write):
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser, copy=True) # noqa: F841
|
||||
arr = get_array(ser)
|
||||
ser.iloc[0] = 100
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
def test_index_from_index(using_copy_on_write, warn_copy_on_write):
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser)
|
||||
idx = Index(idx)
|
||||
expected = idx.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
else:
|
||||
tm.assert_index_equal(idx, Index([100, 2]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: x._shallow_copy(x._values),
|
||||
lambda x: x.view(),
|
||||
lambda x: x.take([0, 1]),
|
||||
lambda x: x.repeat([1, 1]),
|
||||
lambda x: x[slice(0, 2)],
|
||||
lambda x: x[[0, 1]],
|
||||
lambda x: x._getitem_slice(slice(0, 2)),
|
||||
lambda x: x.delete([]),
|
||||
lambda x: x.rename("b"),
|
||||
lambda x: x.astype("Int64", copy=False),
|
||||
],
|
||||
ids=[
|
||||
"_shallow_copy",
|
||||
"view",
|
||||
"take",
|
||||
"repeat",
|
||||
"getitem_slice",
|
||||
"getitem_list",
|
||||
"_getitem_slice",
|
||||
"delete",
|
||||
"rename",
|
||||
"astype",
|
||||
],
|
||||
)
|
||||
def test_index_ops(using_copy_on_write, func, request):
|
||||
idx, view_ = index_view()
|
||||
expected = idx.copy(deep=True)
|
||||
if "astype" in request.node.callspec.id:
|
||||
expected = expected.astype("Int64")
|
||||
idx = func(idx)
|
||||
view_.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected, check_names=False)
|
||||
|
||||
|
||||
def test_infer_objects(using_copy_on_write):
|
||||
idx, view_ = index_view(["a", "b"])
|
||||
expected = idx.copy(deep=True)
|
||||
idx = idx.infer_objects(copy=False)
|
||||
view_.iloc[0, 0] = "aaaa"
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected, check_names=False)
|
||||
|
||||
|
||||
def test_index_to_frame(using_copy_on_write):
|
||||
idx = Index([1, 2, 3], name="a")
|
||||
expected = idx.copy(deep=True)
|
||||
df = idx.to_frame()
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "a"), idx._values)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "a"), idx._values)
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_index_values(using_copy_on_write):
|
||||
idx = Index([1, 2, 3])
|
||||
result = idx.values
|
||||
if using_copy_on_write:
|
||||
assert result.flags.writeable is False
|
||||
else:
|
||||
assert result.flags.writeable is True
|
@ -0,0 +1,30 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: PeriodIndex(x),
|
||||
lambda x: PeriodIndex(PeriodIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_periodindex(using_copy_on_write, cons):
|
||||
dt = period_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Period("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,30 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: TimedeltaIndex(x),
|
||||
lambda x: TimedeltaIndex(TimedeltaIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_timedeltaindex(using_copy_on_write, cons):
|
||||
dt = timedelta_range("1 day", periods=3)
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timedelta("5 days")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for accessing underlying array of Series/DataFrame
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[lambda ser: ser.values, lambda ser: np.asarray(ser)],
|
||||
ids=["values", "asarray"],
|
||||
)
|
||||
def test_series_values(using_copy_on_write, method):
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
arr = method(ser)
|
||||
|
||||
if using_copy_on_write:
|
||||
# .values still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
arr[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[lambda df: df.values, lambda df: np.asarray(df)],
|
||||
ids=["values", "asarray"],
|
||||
)
|
||||
def test_dataframe_values(using_copy_on_write, using_array_manager, method):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df_orig = df.copy()
|
||||
|
||||
arr = method(df)
|
||||
|
||||
if using_copy_on_write:
|
||||
# .values still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0, 0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
df.iloc[0, 0] = 0
|
||||
assert df.values[0, 0] == 0
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
arr[0, 0] = 0
|
||||
if not using_array_manager:
|
||||
assert df.iloc[0, 0] == 0
|
||||
else:
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_series_to_numpy(using_copy_on_write):
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
# default: copy=False, no dtype or NAs
|
||||
arr = ser.to_numpy()
|
||||
if using_copy_on_write:
|
||||
# to_numpy still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
arr[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
|
||||
# specify copy=False gives a writeable array
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
arr = ser.to_numpy(copy=True)
|
||||
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
# specifying a dtype that already causes a copy also gives a writeable array
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
arr = ser.to_numpy(dtype="float64")
|
||||
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize("order", ["F", "C"])
|
||||
def test_ravel_read_only(using_copy_on_write, order):
|
||||
ser = Series([1, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, match="is deprecated"):
|
||||
arr = ser.ravel(order=order)
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
def test_series_array_ea_dtypes(using_copy_on_write):
|
||||
ser = Series([1, 2, 3], dtype="Int64")
|
||||
arr = np.asarray(ser, dtype="int64")
|
||||
assert np.shares_memory(arr, get_array(ser))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
arr = np.asarray(ser)
|
||||
assert np.shares_memory(arr, get_array(ser))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_array_ea_dtypes(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||||
arr = np.asarray(df, dtype="int64")
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
arr = np.asarray(df)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager):
|
||||
df = DataFrame({"a": ["a", "b"]}, dtype="string")
|
||||
arr = np.asarray(df)
|
||||
if not using_array_manager:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_multiple_numpy_dtypes():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1.5})
|
||||
arr = np.asarray(df)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_values_is_ea(using_copy_on_write):
|
||||
df = DataFrame({"a": date_range("2012-01-01", periods=3)})
|
||||
arr = np.asarray(df)
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_empty_dataframe():
|
||||
df = DataFrame()
|
||||
arr = np.asarray(df)
|
||||
assert arr.flags.writeable is True
|
@ -0,0 +1,260 @@
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.pyarrow import pa_version_under12p0
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_astype_single_dtype(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5})
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype("float64")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 2] = 5.5
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype("float64")
|
||||
df.iloc[0, 2] = 5.5
|
||||
tm.assert_frame_equal(df2, df_orig.astype("float64"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
|
||||
@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
|
||||
def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
|
||||
if new_dtype == "int64[pyarrow]":
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(new_dtype)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 0] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype(new_dtype)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
|
||||
def test_astype_different_target_dtype(using_copy_on_write, dtype):
|
||||
if dtype == "int32[pyarrow]":
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(dtype)
|
||||
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
|
||||
df2.iloc[0, 0] = 5
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype(dtype)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df2, df_orig.astype(dtype))
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_astype_numpy_to_ea():
|
||||
ser = Series([1, 2, 3])
|
||||
with pd.option_context("mode.copy_on_write", True):
|
||||
result = ser.astype("Int64")
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, new_dtype", [("object", "string"), ("string", "object")]
|
||||
)
|
||||
def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(new_dtype)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
df2.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, new_dtype", [("object", "string"), ("string", "object")]
|
||||
)
|
||||
def test_astype_string_and_object_update_original(
|
||||
using_copy_on_write, dtype, new_dtype
|
||||
):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
|
||||
df2 = df.astype(new_dtype)
|
||||
df_orig = df2.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
df.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df2, df_orig)
|
||||
|
||||
|
||||
def test_astype_string_copy_on_pickle_roundrip():
|
||||
# https://github.com/pandas-dev/pandas/issues/54654
|
||||
# ensure_string_array may alter array inplace
|
||||
base = Series(np.array([(1, 2), None, 1], dtype="object"))
|
||||
base_copy = pickle.loads(pickle.dumps(base))
|
||||
base_copy.astype(str)
|
||||
tm.assert_series_equal(base, base_copy)
|
||||
|
||||
|
||||
def test_astype_dict_dtypes(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype({"a": "float64", "c": "float64"})
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 2] = 5.5
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
|
||||
df2.iloc[0, 1] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_astype_different_datetime_resos(using_copy_on_write):
|
||||
df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")})
|
||||
result = df.astype("datetime64[ms]")
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
if using_copy_on_write:
|
||||
assert result._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_astype_different_timezones(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
{"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")}
|
||||
)
|
||||
result = df.astype("datetime64[ns, Europe/Berlin]")
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
|
||||
|
||||
def test_astype_different_timezones_different_reso(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
{"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")}
|
||||
)
|
||||
result = df.astype("datetime64[ms, Europe/Berlin]")
|
||||
if using_copy_on_write:
|
||||
assert result._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
|
||||
|
||||
def test_astype_arrow_timestamp(using_copy_on_write):
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [
|
||||
Timestamp("2020-01-01 01:01:01.000001"),
|
||||
Timestamp("2020-01-01 01:01:01.000001"),
|
||||
]
|
||||
},
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
result = df.astype("timestamp[ns][pyarrow]")
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr._has_no_reference(0)
|
||||
if pa_version_under12p0:
|
||||
assert not np.shares_memory(
|
||||
get_array(df, "a"), get_array(result, "a")._pa_array
|
||||
)
|
||||
else:
|
||||
assert np.shares_memory(
|
||||
get_array(df, "a"), get_array(result, "a")._pa_array
|
||||
)
|
||||
|
||||
|
||||
def test_convert_dtypes_infer_objects(using_copy_on_write):
|
||||
ser = Series(["a", "b", "c"])
|
||||
ser_orig = ser.copy()
|
||||
result = ser.convert_dtypes(
|
||||
convert_integer=False,
|
||||
convert_boolean=False,
|
||||
convert_floating=False,
|
||||
convert_string=False,
|
||||
)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
result.iloc[0] = "x"
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
|
||||
def test_convert_dtypes(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.convert_dtypes()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert np.shares_memory(get_array(df2, "d"), get_array(df, "d"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d"))
|
||||
|
||||
df2.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df, df_orig)
|
@ -0,0 +1,174 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
from pandas.errors import (
|
||||
ChainedAssignmentError,
|
||||
SettingWithCopyWarning,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_methods_iloc_warn(using_copy_on_write):
|
||||
if not using_copy_on_write:
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].replace(1, 5, inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].fillna(1, inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].interpolate(inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].ffill(inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].bfill(inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, args",
|
||||
[
|
||||
("replace", (4, 5)),
|
||||
("fillna", (1,)),
|
||||
("interpolate", ()),
|
||||
("bfill", ()),
|
||||
("ffill", ()),
|
||||
],
|
||||
)
|
||||
def test_methods_iloc_getitem_item_cache(
|
||||
func, args, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# ensure we don't incorrectly raise chained assignment warning because
|
||||
# of the item cache / iloc not setting the item cache
|
||||
df_orig = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df.iloc[:, 0]
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
# parent that holds item_cache is dead, so don't increase ref count
|
||||
df = df_orig.copy()
|
||||
ser = df.copy()["a"]
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df.iloc[:, 0] # iloc creates a new object
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df["a"]
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
# TODO(CoW-warn) because of the usage of *args, this doesn't warn on Py3.11+
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error(not PY311):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
else:
|
||||
with tm.assert_cow_warning(not PY311, match="A value"):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df["a"] # populate the item_cache and keep ref
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error(not PY311):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
else:
|
||||
# ideally also warns on the default mode, but the ser' _cacher
|
||||
# messes up the refcount + even in warning mode this doesn't trigger
|
||||
# the warning of Py3.1+ (see above)
|
||||
with tm.assert_cow_warning(warn_copy_on_write and not PY311, match="A value"):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
|
||||
|
||||
def test_methods_iloc_getitem_item_cache_fillna(
|
||||
using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# ensure we don't incorrectly raise chained assignment warning because
|
||||
# of the item cache / iloc not setting the item cache
|
||||
df_orig = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df.iloc[:, 0]
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
# parent that holds item_cache is dead, so don't increase ref count
|
||||
df = df_orig.copy()
|
||||
ser = df.copy()["a"]
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df.iloc[:, 0] # iloc creates a new object
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df["a"]
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(1, inplace=True)
|
||||
else:
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df["a"].fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df["a"] # populate the item_cache and keep ref
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(1, inplace=True)
|
||||
else:
|
||||
# TODO(CoW-warn) ideally also warns on the default mode, but the ser' _cacher
|
||||
# messes up the refcount
|
||||
with tm.assert_cow_warning(warn_copy_on_write, match="A value"):
|
||||
df["a"].fillna(1, inplace=True)
|
||||
|
||||
|
||||
# TODO(CoW-warn) expand the cases
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write):
|
||||
# ensure we only get a single warning for those typical cases of chained
|
||||
# assignment
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
# using custom check instead of tm.assert_produces_warning because that doesn't
|
||||
# fail if multiple warnings are raised
|
||||
with pytest.warns() as record:
|
||||
df["a"][indexer] = 0
|
||||
assert len(record) == 1
|
||||
if using_copy_on_write:
|
||||
assert record[0].category == ChainedAssignmentError
|
||||
else:
|
||||
assert record[0].category == FutureWarning
|
||||
assert "ChainedAssignmentError" in record[0].message.args[0]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.SettingWithCopyWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", ["a", ["a", "b"], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_frame_setitem(indexer, using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,)
|
||||
|
||||
with option_context("chained_assignment", "warn"):
|
||||
with tm.raises_chained_assignment_error(extra_warnings=extra_warnings):
|
||||
df[0:3][indexer] = 10
|
@ -0,0 +1,101 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_copy = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
if warn_copy_on_write:
|
||||
with tm.assert_cow_warning():
|
||||
df.clip(lower=2, inplace=True)
|
||||
else:
|
||||
df.clip(lower=2, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_copy, view)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
|
||||
def test_clip_inplace_reference_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_copy = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.clip(lower=0, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_copy, view)
|
||||
|
||||
|
||||
def test_clip_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
df.clip(lower=2, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_clip(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.clip(lower=2)
|
||||
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_clip_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df2 = df.clip(lower=0)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_clip_chained_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 4, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].clip(1, 2, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].clip(1, 2, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["a"].clip(1, 2, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[["a"]].clip(1, 2, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[df["a"] > 1].clip(1, 2, inplace=True)
|
@ -0,0 +1,382 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for Series / DataFrame constructors
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write):
|
||||
# Case: constructing a Series from another Series object follows CoW rules:
|
||||
# a new object is returned and thus mutations are not propagated
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
|
||||
# default is copy=False -> new Series is a shallow copy / view of original
|
||||
result = Series(ser, dtype=dtype)
|
||||
|
||||
# the shallow copy still shares memory
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
if using_copy_on_write:
|
||||
assert result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
if using_copy_on_write:
|
||||
# mutating new series copy doesn't mutate original
|
||||
result.iloc[0] = 0
|
||||
assert ser.iloc[0] == 1
|
||||
# mutating triggered a copy-on-write -> no longer shares memory
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
else:
|
||||
# mutating shallow copy does mutate original
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
# and still shares memory
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
# the same when modifying the parent
|
||||
result = Series(ser, dtype=dtype)
|
||||
|
||||
if using_copy_on_write:
|
||||
# mutating original doesn't mutate new series
|
||||
ser.iloc[0] = 0
|
||||
assert result.iloc[0] == 1
|
||||
else:
|
||||
# mutating original does mutate shallow copy
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 0
|
||||
assert result.iloc[0] == 0
|
||||
|
||||
|
||||
def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write):
|
||||
# Case: constructing a Series from another Series with specifying an index
|
||||
# that potentially requires a reindex of the values
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
|
||||
# passing an index that doesn't actually require a reindex of the values
|
||||
# -> without CoW we get an actual mutating view
|
||||
for index in [
|
||||
ser.index,
|
||||
ser.index.copy(),
|
||||
list(ser.index),
|
||||
ser.index.rename("idx"),
|
||||
]:
|
||||
result = Series(ser, index=index)
|
||||
assert np.shares_memory(ser.values, result.values)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0] = 0
|
||||
if using_copy_on_write:
|
||||
assert ser.iloc[0] == 1
|
||||
else:
|
||||
assert ser.iloc[0] == 0
|
||||
|
||||
# ensure that if an actual reindex is needed, we don't have any refs
|
||||
# (mutating the result wouldn't trigger CoW)
|
||||
result = Series(ser, index=[0, 1, 2, 3])
|
||||
assert not np.shares_memory(ser.values, result.values)
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fastpath", [False, True])
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")]
|
||||
)
|
||||
def test_series_from_array(using_copy_on_write, idx, dtype, fastpath, arr):
|
||||
if idx is None or dtype is not None:
|
||||
fastpath = False
|
||||
msg = "The 'fastpath' keyword in pd.Series is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser = Series(arr, dtype=dtype, index=idx, fastpath=fastpath)
|
||||
ser_orig = ser.copy()
|
||||
data = getattr(arr, "_data", arr)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(ser), data)
|
||||
else:
|
||||
assert np.shares_memory(get_array(ser), data)
|
||||
|
||||
arr[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
else:
|
||||
expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False, None])
|
||||
def test_series_from_array_different_dtype(using_copy_on_write, copy):
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
ser = Series(arr, dtype="int32", copy=copy)
|
||||
assert not np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
Index([1, 2]),
|
||||
DatetimeIndex([Timestamp("2019-12-31"), Timestamp("2020-12-31")]),
|
||||
PeriodIndex([Period("2019-12-31"), Period("2020-12-31")]),
|
||||
TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]),
|
||||
],
|
||||
)
|
||||
def test_series_from_index(using_copy_on_write, idx):
|
||||
ser = Series(idx)
|
||||
expected = idx.copy(deep=True)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(ser), get_array(idx))
|
||||
assert not ser._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(ser), get_array(idx))
|
||||
ser.iloc[0] = ser.iloc[1]
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_series_from_index_different_dtypes(using_copy_on_write):
|
||||
idx = Index([1, 2, 3], dtype="int64")
|
||||
ser = Series(idx, dtype="int32")
|
||||
assert not np.shares_memory(get_array(ser), get_array(idx))
|
||||
if using_copy_on_write:
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
@pytest.mark.parametrize("fastpath", [False, True])
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
|
||||
def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath):
|
||||
ser = Series([1, 2, 3], dtype="int64")
|
||||
ser_orig = ser.copy()
|
||||
msg = "The 'fastpath' keyword in pd.Series is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx)
|
||||
assert np.shares_memory(get_array(ser), get_array(ser2))
|
||||
if using_copy_on_write:
|
||||
assert not ser2._mgr._has_no_reference(0)
|
||||
|
||||
ser2.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
else:
|
||||
expected = Series([100, 2, 3])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_series_from_block_manager_different_dtype(using_copy_on_write):
|
||||
ser = Series([1, 2, 3], dtype="int64")
|
||||
msg = "Passing a SingleBlockManager to Series"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser2 = Series(ser._mgr, dtype="int32")
|
||||
assert not np.shares_memory(get_array(ser), get_array(ser2))
|
||||
if using_copy_on_write:
|
||||
assert ser2._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_mgr", [True, False])
|
||||
@pytest.mark.parametrize("columns", [None, ["a"]])
|
||||
def test_dataframe_constructor_mgr_or_df(
|
||||
using_copy_on_write, warn_copy_on_write, columns, use_mgr
|
||||
):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
if use_mgr:
|
||||
data = df._mgr
|
||||
warn = DeprecationWarning
|
||||
else:
|
||||
data = df
|
||||
warn = None
|
||||
msg = "Passing a BlockManager to DataFrame"
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
new_df = DataFrame(data)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
with tm.assert_cow_warning(warn_copy_on_write and not use_mgr):
|
||||
new_df.iloc[0] = 100
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
tm.assert_frame_equal(df, new_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
|
||||
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
|
||||
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
|
||||
def test_dataframe_from_dict_of_series(
|
||||
request, using_copy_on_write, warn_copy_on_write, columns, index, dtype
|
||||
):
|
||||
# Case: constructing a DataFrame from Series objects with copy=False
|
||||
# has to do a lazy following CoW rules
|
||||
# (the default for DataFrame(dict) is still to copy to ensure consolidation)
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
s1_orig = s1.copy()
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6]}, index=index, columns=columns, dtype=dtype
|
||||
)
|
||||
|
||||
result = DataFrame(
|
||||
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
|
||||
)
|
||||
|
||||
# the shallow copy still shares memory
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
|
||||
# mutating the new dataframe doesn't mutate original
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0, 0] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
tm.assert_series_equal(s1, s1_orig)
|
||||
else:
|
||||
assert s1.iloc[0] == 10
|
||||
|
||||
# the same when modifying the parent series
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
result = DataFrame(
|
||||
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
|
||||
)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s1.iloc[0] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
assert result.iloc[0, 0] == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
def test_dataframe_from_dict_of_series_with_reindex(dtype):
|
||||
# Case: constructing a DataFrame from Series objects with copy=False
|
||||
# and passing an index that requires an actual (no-view) reindex -> need
|
||||
# to ensure the result doesn't have refs set up to unnecessarily trigger
|
||||
# a copy on write
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
df = DataFrame({"a": s1, "b": s2}, index=[1, 2, 3], dtype=dtype, copy=False)
|
||||
|
||||
# df should own its memory, so mutating shouldn't trigger a copy
|
||||
arr_before = get_array(df, "a")
|
||||
assert not np.shares_memory(arr_before, get_array(s1))
|
||||
df.iloc[0, 0] = 100
|
||||
arr_after = get_array(df, "a")
|
||||
assert np.shares_memory(arr_before, arr_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cons", [Series, Index])
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
|
||||
)
|
||||
def test_dataframe_from_series_or_index(
|
||||
using_copy_on_write, warn_copy_on_write, data, dtype, cons
|
||||
):
|
||||
obj = cons(data, dtype=dtype)
|
||||
obj_orig = obj.copy()
|
||||
df = DataFrame(obj, dtype=dtype)
|
||||
assert np.shares_memory(get_array(obj), get_array(df, 0))
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = data[-1]
|
||||
if using_copy_on_write:
|
||||
tm.assert_equal(obj, obj_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cons", [Series, Index])
|
||||
def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, cons):
|
||||
obj = cons([1, 2], dtype="int64")
|
||||
df = DataFrame(obj, dtype="int32")
|
||||
assert not np.shares_memory(get_array(obj), get_array(df, 0))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_dataframe_from_series_infer_datetime(using_copy_on_write):
|
||||
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
df = DataFrame(ser)
|
||||
assert not np.shares_memory(get_array(ser), get_array(df, 0))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
|
||||
def test_dataframe_from_dict_of_series_with_dtype(index):
|
||||
# Variant of above, but now passing a dtype that causes a copy
|
||||
# -> need to ensure the result doesn't have refs set up to unnecessarily
|
||||
# trigger a copy on write
|
||||
s1 = Series([1.0, 2.0, 3.0])
|
||||
s2 = Series([4, 5, 6])
|
||||
df = DataFrame({"a": s1, "b": s2}, index=index, dtype="int64", copy=False)
|
||||
|
||||
# df should own its memory, so mutating shouldn't trigger a copy
|
||||
arr_before = get_array(df, "a")
|
||||
assert not np.shares_memory(arr_before, get_array(s1))
|
||||
df.iloc[0, 0] = 100
|
||||
arr_after = get_array(df, "a")
|
||||
assert np.shares_memory(arr_before, arr_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [False, None, True])
|
||||
def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager):
|
||||
arr = np.array([[1, 2], [3, 4]])
|
||||
df = DataFrame(arr, copy=copy)
|
||||
|
||||
if (
|
||||
using_copy_on_write
|
||||
and copy is not False
|
||||
or copy is True
|
||||
or (using_array_manager and copy is None)
|
||||
):
|
||||
assert not np.shares_memory(get_array(df, 0), arr)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, 0), arr)
|
||||
|
||||
|
||||
def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df2 = DataFrame.from_records(df)
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df2.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
def test_frame_from_dict_of_index(using_copy_on_write):
|
||||
idx = Index([1, 2, 3])
|
||||
expected = idx.copy(deep=True)
|
||||
df = DataFrame({"a": idx}, copy=False)
|
||||
assert np.shares_memory(get_array(df, "a"), idx._values)
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,106 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_assigning_to_same_variable_removes_references(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df = df.reset_index()
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(1)
|
||||
arr = get_array(df, "a")
|
||||
df.iloc[0, 1] = 100 # Write into a
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
# We split the block in setitem, if we are not careful the new blocks will
|
||||
# reference each other triggering a copy
|
||||
df.iloc[0, 0] = 100
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_setitem_with_view_copies(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
view = df[:]
|
||||
expected = df.copy()
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 100 # Check that we correctly track reference
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(view, expected)
|
||||
|
||||
|
||||
def test_setitem_with_view_invalidated_does_not_copy(
|
||||
using_copy_on_write, warn_copy_on_write, request
|
||||
):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
view = df[:]
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
view = None # noqa: F841
|
||||
# TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100`
|
||||
# which introduces additional refs, even when those of `view` go out of scopes
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
# Setitem split the block. Since the old block shared data with view
|
||||
# all the new blocks are referencing view and each other. When view
|
||||
# goes out of scope, they don't share data with any other block,
|
||||
# so we should not trigger a copy
|
||||
mark = pytest.mark.xfail(
|
||||
reason="blk.delete does not track references correctly"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_out_of_scope(using_copy_on_write):
|
||||
def func():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1})
|
||||
# create some subset
|
||||
result = df[["a", "b"]]
|
||||
return result
|
||||
|
||||
result = func()
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
assert not result._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
|
||||
def test_delete(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
|
||||
)
|
||||
del df["b"]
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
assert not df._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
df = df[["a"]]
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
|
||||
def test_delete_reference(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
|
||||
)
|
||||
x = df[:]
|
||||
del df["b"]
|
||||
if using_copy_on_write:
|
||||
assert df._mgr.blocks[0].refs.has_reference()
|
||||
assert df._mgr.blocks[1].refs.has_reference()
|
||||
assert x._mgr.blocks[0].refs.has_reference()
|
@ -0,0 +1,396 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
concat,
|
||||
merge,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_concat_frames(using_copy_on_write):
|
||||
df = DataFrame({"b": ["a"] * 3})
|
||||
df2 = DataFrame({"a": ["a"] * 3})
|
||||
df_orig = df.copy()
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
result.iloc[0, 0] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
result.iloc[0, 1] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_concat_frames_updating_input(using_copy_on_write):
|
||||
df = DataFrame({"b": ["a"] * 3})
|
||||
df2 = DataFrame({"a": ["a"] * 3})
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
expected = result.copy()
|
||||
df.iloc[0, 0] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.iloc[0, 0] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series(using_copy_on_write):
|
||||
ser = Series([1, 2], name="a")
|
||||
ser2 = Series([3, 4], name="b")
|
||||
ser_orig = ser.copy()
|
||||
ser2_orig = ser2.copy()
|
||||
result = concat([ser, ser2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert not np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
|
||||
result.iloc[0, 1] = 1000
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
tm.assert_series_equal(ser2, ser2_orig)
|
||||
|
||||
|
||||
def test_concat_frames_chained(using_copy_on_write):
|
||||
df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
df2 = DataFrame({"c": [4, 5, 6]})
|
||||
df3 = DataFrame({"d": [4, 5, 6]})
|
||||
result = concat([concat([df1, df2], axis=1), df3], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
|
||||
|
||||
df1.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series_chained(using_copy_on_write):
|
||||
ser1 = Series([1, 2, 3], name="a")
|
||||
ser2 = Series([4, 5, 6], name="c")
|
||||
ser3 = Series([4, 5, 6], name="d")
|
||||
result = concat([concat([ser1, ser2], axis=1), ser3], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
|
||||
|
||||
ser1.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series_updating_input(using_copy_on_write):
|
||||
ser = Series([1, 2], name="a")
|
||||
ser2 = Series([3, 4], name="b")
|
||||
expected = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = concat([ser, ser2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2.iloc[0] = 1000
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_mixed_series_frame(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "c": 1})
|
||||
ser = Series([4, 5, 6], name="d")
|
||||
result = concat([df, ser], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(df, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, None, False])
|
||||
def test_concat_copy_keyword(using_copy_on_write, copy):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [1.5, 2.5]})
|
||||
|
||||
result = concat([df, df2], axis=1, copy=copy)
|
||||
|
||||
if using_copy_on_write or copy is False:
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda df1, df2, **kwargs: df1.merge(df2, **kwargs),
|
||||
lambda df1, df2, **kwargs: merge(df1, df2, **kwargs),
|
||||
],
|
||||
)
|
||||
def test_merge_on_key(using_copy_on_write, func):
|
||||
df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
|
||||
df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = func(df1, df2, on="key")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
|
||||
assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 2] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_merge_on_index(using_copy_on_write):
|
||||
df1 = DataFrame({"a": [1, 2, 3]})
|
||||
df2 = DataFrame({"b": [4, 5, 6]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = merge(df1, df2, left_index=True, right_index=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, how",
|
||||
[
|
||||
(lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"),
|
||||
(lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"),
|
||||
],
|
||||
)
|
||||
def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
|
||||
df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
|
||||
df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = func(df1, df2, how=how)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert df2._mgr._has_no_reference(1)
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is (
|
||||
how == "left"
|
||||
)
|
||||
assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
if how == "left":
|
||||
result.iloc[0, 1] = 0
|
||||
else:
|
||||
result.iloc[0, 2] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, None, False])
|
||||
def test_merge_copy_keyword(using_copy_on_write, copy):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [3, 4.5]})
|
||||
|
||||
result = df.merge(df2, copy=copy, left_index=True, right_index=True)
|
||||
|
||||
if using_copy_on_write or copy is False:
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
|
||||
|
||||
def test_join_on_key(using_copy_on_write):
|
||||
df_index = Index(["a", "b", "c"], name="key")
|
||||
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
|
||||
df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
|
||||
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = df1.join(df2, on="key")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(result.index), get_array(df1.index))
|
||||
assert not np.shares_memory(get_array(result.index), get_array(df2.index))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_join_multiple_dataframes_on_key(using_copy_on_write):
|
||||
df_index = Index(["a", "b", "c"], name="key")
|
||||
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
|
||||
dfs_list = [
|
||||
DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)),
|
||||
DataFrame({"c": [7, 8, 9]}, index=df_index.copy(deep=True)),
|
||||
]
|
||||
|
||||
df1_orig = df1.copy()
|
||||
dfs_list_orig = [df.copy() for df in dfs_list]
|
||||
|
||||
result = df1.join(dfs_list)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
assert np.shares_memory(get_array(result.index), get_array(df1.index))
|
||||
assert not np.shares_memory(
|
||||
get_array(result.index), get_array(dfs_list[0].index)
|
||||
)
|
||||
assert not np.shares_memory(
|
||||
get_array(result.index), get_array(dfs_list[1].index)
|
||||
)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 2] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
for df, df_orig in zip(dfs_list, dfs_list_orig):
|
||||
tm.assert_frame_equal(df, df_orig)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,151 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_consolidate(using_copy_on_write):
|
||||
# create unconsolidated DataFrame
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
df["c"] = [4, 5, 6]
|
||||
|
||||
# take a viewing subset
|
||||
subset = df[:]
|
||||
|
||||
# each block of subset references a block of df
|
||||
assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
|
||||
|
||||
# consolidate the two int64 blocks
|
||||
subset._consolidate_inplace()
|
||||
|
||||
# the float64 block still references the parent one because it still a view
|
||||
assert subset._mgr.blocks[0].refs.has_reference()
|
||||
# equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
|
||||
# but avoids caching df["b"]
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
|
||||
|
||||
# the new consolidated int64 block does not reference another
|
||||
assert not subset._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
# the parent dataframe now also only is linked for the float column
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
assert df._mgr.blocks[1].refs.has_reference()
|
||||
assert not df._mgr.blocks[2].refs.has_reference()
|
||||
|
||||
# and modifying subset still doesn't modify parent
|
||||
if using_copy_on_write:
|
||||
subset.iloc[0, 1] = 0.0
|
||||
assert not df._mgr.blocks[1].refs.has_reference()
|
||||
assert df.loc[0, "b"] == 0.1
|
||||
|
||||
|
||||
@pytest.mark.single_cpu
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_switch_options():
|
||||
# ensure we can switch the value of the option within one session
|
||||
# (assuming data is constructed after switching)
|
||||
|
||||
# using the option_context to ensure we set back to global option value
|
||||
# after running the test
|
||||
with pd.option_context("mode.copy_on_write", False):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
subset = df[:]
|
||||
subset.iloc[0, 0] = 0
|
||||
# df updated with CoW disabled
|
||||
assert df.iloc[0, 0] == 0
|
||||
|
||||
pd.options.mode.copy_on_write = True
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
subset = df[:]
|
||||
subset.iloc[0, 0] = 0
|
||||
# df not updated with CoW enabled
|
||||
assert df.iloc[0, 0] == 1
|
||||
|
||||
pd.options.mode.copy_on_write = False
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
subset = df[:]
|
||||
subset.iloc[0, 0] = 0
|
||||
# df updated with CoW disabled
|
||||
assert df.iloc[0, 0] == 0
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
@pytest.mark.parametrize("dtype", [np.intp, np.int8])
|
||||
@pytest.mark.parametrize(
|
||||
"locs, arr",
|
||||
[
|
||||
([0], np.array([-1, -2, -3])),
|
||||
([1], np.array([-1, -2, -3])),
|
||||
([5], np.array([-1, -2, -3])),
|
||||
([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
|
||||
([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
],
|
||||
)
|
||||
def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
|
||||
# Nothing currently calls iset with
|
||||
# more than 1 loc with inplace=True (only happens with inplace=False)
|
||||
# but ensure that it works
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [4, 5, 6],
|
||||
"c": [7, 8, 9],
|
||||
"d": [10, 11, 12],
|
||||
"e": [13, 14, 15],
|
||||
"f": ["a", "b", "c"],
|
||||
},
|
||||
)
|
||||
arr = arr.astype(dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy)
|
||||
df2._mgr.iset(locs, arr, inplace=True)
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
if using_copy_on_write:
|
||||
for i, col in enumerate(df.columns):
|
||||
if i not in locs:
|
||||
assert np.shares_memory(get_array(df, col), get_array(df2, col))
|
||||
else:
|
||||
for col in df.columns:
|
||||
assert not np.shares_memory(get_array(df, col), get_array(df2, col))
|
||||
|
||||
|
||||
def test_exponential_backoff():
|
||||
# GH#55518
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
for i in range(490):
|
||||
df.copy(deep=False)
|
||||
|
||||
assert len(df._mgr.blocks[0].refs.referenced_blocks) == 491
|
||||
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
dfs = [df.copy(deep=False) for i in range(510)]
|
||||
|
||||
for i in range(20):
|
||||
df.copy(deep=False)
|
||||
assert len(df._mgr.blocks[0].refs.referenced_blocks) == 531
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 1000
|
||||
|
||||
for i in range(500):
|
||||
df.copy(deep=False)
|
||||
|
||||
# Don't reduce since we still have over 500 objects alive
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 1000
|
||||
|
||||
dfs = dfs[:300]
|
||||
for i in range(500):
|
||||
df.copy(deep=False)
|
||||
|
||||
# Reduce since there are less than 500 objects alive
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 500
|
@ -0,0 +1,432 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
ArrowDtype,
|
||||
DataFrame,
|
||||
Interval,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
|
||||
def test_interpolate_no_op(using_copy_on_write, method):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df_orig = df.copy()
|
||||
|
||||
warn = None
|
||||
if method == "pad":
|
||||
warn = FutureWarning
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = df.interpolate(method=method)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
def test_interp_fill_functions(using_copy_on_write, func):
|
||||
# Check that these takes the same code paths as interpolate
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df_orig = df.copy()
|
||||
|
||||
result = getattr(df, func)()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_triggers_copy(using_copy_on_write, vals, func):
|
||||
df = DataFrame({"a": vals})
|
||||
result = getattr(df, func)()
|
||||
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
# Check that we don't have references when triggering a copy
|
||||
assert result._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
|
||||
df = DataFrame({"a": vals})
|
||||
arr = get_array(df, "a")
|
||||
df.interpolate(method="linear", inplace=True)
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
# Check that we don't have references when triggering a copy
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2]})
|
||||
df_orig = df.copy()
|
||||
arr = get_array(df, "a")
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.interpolate(method="linear", inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
# Check that copy was triggered in interpolate and that we don't
|
||||
# have any references left
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_interp_fill_functions_inplace(
|
||||
using_copy_on_write, func, warn_copy_on_write, dtype
|
||||
):
|
||||
# Check that these takes the same code paths as interpolate
|
||||
df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
arr = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write and dtype == "float64"):
|
||||
getattr(df, func)(inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
# Check that copy was triggered in interpolate and that we don't
|
||||
# have any references left
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64")
|
||||
|
||||
|
||||
def test_interpolate_cleaned_fill_method(using_copy_on_write):
|
||||
# Check that "method is set to None" case works correctly
|
||||
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
msg = "DataFrame.interpolate with object dtype"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.interpolate(method="linear")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = Timestamp("2021-12-31")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_interpolate_object_convert_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True)
|
||||
|
||||
# Now CoW makes a copy, it should not!
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_object_convert_copies(using_copy_on_write):
|
||||
df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_downcast(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True, downcast="infer")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True, downcast="infer")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
tm.assert_frame_equal(df, view)
|
||||
|
||||
|
||||
def test_fillna(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.fillna(5.5)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_dict(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.fillna({"a": 100.5})
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("downcast", [None, False])
|
||||
def test_fillna_inplace(using_copy_on_write, downcast):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
arr_b = get_array(df, "b")
|
||||
|
||||
msg = "The 'downcast' keyword in fillna is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.fillna(5.5, inplace=True, downcast=downcast)
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert df._mgr._has_no_reference(1)
|
||||
|
||||
|
||||
def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
arr_b = get_array(df, "b")
|
||||
view = df[:]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.fillna(5.5, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
expected = DataFrame({"a": [1.5, 5.5], "b": 1})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write):
|
||||
# Set dtype explicitly to avoid implicit cast when setting nan
|
||||
ser = Series(
|
||||
interval_range(start=0, end=5), name="a", dtype="interval[float64, right]"
|
||||
)
|
||||
ser.iloc[1] = np.nan
|
||||
|
||||
ser_orig = ser.copy()
|
||||
view = ser[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.fillna(value=Interval(left=0, right=5), inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(
|
||||
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||||
)
|
||||
tm.assert_series_equal(view, ser_orig)
|
||||
else:
|
||||
assert np.shares_memory(
|
||||
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||||
)
|
||||
|
||||
|
||||
def test_fillna_series_empty_arg(using_copy_on_write):
|
||||
ser = Series([1, np.nan, 2])
|
||||
ser_orig = ser.copy()
|
||||
result = ser.fillna({})
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
ser.iloc[0] = 100.5
|
||||
tm.assert_series_equal(ser_orig, result)
|
||||
|
||||
|
||||
def test_fillna_series_empty_arg_inplace(using_copy_on_write):
|
||||
ser = Series([1, np.nan, 2])
|
||||
arr = get_array(ser)
|
||||
ser.fillna({}, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
if using_copy_on_write:
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_fillna_ea_noop_shares_memory(
|
||||
using_copy_on_write, any_numeric_ea_and_arrow_dtype
|
||||
):
|
||||
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.fillna(100)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not df2._mgr._has_no_reference(1)
|
||||
elif isinstance(df.dtypes.iloc[0], ArrowDtype):
|
||||
# arrow is immutable, so no-ops do not need to copy underlying array
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert df2._mgr._has_no_reference(1)
|
||||
assert df._mgr._has_no_reference(1)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_inplace_ea_noop_shares_memory(
|
||||
using_copy_on_write, warn_copy_on_write, any_numeric_ea_and_arrow_dtype
|
||||
):
|
||||
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.fillna(100, inplace=True)
|
||||
|
||||
if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||||
else:
|
||||
# MaskedArray can actually respect inplace=True
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||||
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(1)
|
||||
assert not view._mgr._has_no_reference(1)
|
||||
|
||||
with tm.assert_cow_warning(
|
||||
warn_copy_on_write and "pyarrow" not in any_numeric_ea_and_arrow_dtype
|
||||
):
|
||||
df.iloc[0, 1] = 100
|
||||
if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
# we actually have a view
|
||||
tm.assert_frame_equal(df, view)
|
||||
|
||||
|
||||
def test_fillna_chained_assignment(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].fillna(100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[["a"]].fillna(100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[df.a > 5].fillna(100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["a"].fillna(100, inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["interpolate", "ffill", "bfill"])
|
||||
def test_interpolate_chained_assignment(using_copy_on_write, func):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
getattr(df["a"], func)(inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
getattr(df[["a"]], func)(inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
getattr(df["a"], func)(inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
getattr(df[["a"]], func)(inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
getattr(df[df["a"] > 1], func)(inplace=True)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,481 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"replace_kwargs",
|
||||
[
|
||||
{"to_replace": {"a": 1, "b": 4}, "value": -1},
|
||||
# Test CoW splits blocks to avoid copying unchanged columns
|
||||
{"to_replace": {"a": 1}, "value": -1},
|
||||
{"to_replace": {"b": 4}, "value": -1},
|
||||
{"to_replace": {"b": {4: 1}}},
|
||||
# TODO: Add these in a further optimization
|
||||
# We would need to see which columns got replaced in the mask
|
||||
# which could be expensive
|
||||
# {"to_replace": {"b": 1}},
|
||||
# 1
|
||||
],
|
||||
)
|
||||
def test_replace(using_copy_on_write, replace_kwargs):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]})
|
||||
df_orig = df.copy()
|
||||
|
||||
df_replaced = df.replace(**replace_kwargs)
|
||||
|
||||
if using_copy_on_write:
|
||||
if (df_replaced["b"] == df["b"]).all():
|
||||
assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
|
||||
|
||||
# mutating squeezed df triggers a copy-on-write for that column/block
|
||||
df_replaced.loc[0, "c"] = -1
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
|
||||
|
||||
if "a" in replace_kwargs["to_replace"]:
|
||||
arr = get_array(df_replaced, "a")
|
||||
df_replaced.loc[0, "a"] = 100
|
||||
assert np.shares_memory(get_array(df_replaced, "a"), arr)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": ["aaa", "bbb"]})
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
arr = get_array(df, "a")
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_regex_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": ["aaa", "bbb"]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=r"^b.*$", value="new", regex=True)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_regex_inplace_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=r"^x.$", value="new", regex=True)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_mask_all_false_second_block(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace=1.5, value=55.5)
|
||||
|
||||
if using_copy_on_write:
|
||||
# TODO: Block splitting would allow us to avoid copying b
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "c"] = 1
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
# TODO: This should split and not copy the whole block
|
||||
# assert np.shares_memory(get_array(df, "d"), get_array(df2, "d"))
|
||||
|
||||
|
||||
def test_replace_coerce_single_column(using_copy_on_write, using_array_manager):
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace=1.5, value="a")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
elif not using_array_manager:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
if using_copy_on_write:
|
||||
df2.loc[0, "b"] = 0.5
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
|
||||
def test_replace_to_replace_wrong_dtype(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace="xxx", value=1.5)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "b"] = 0.5
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
|
||||
def test_replace_list_categorical(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
|
||||
arr = get_array(df, "a")
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.replace(["c"], value="a", inplace=True)
|
||||
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
df_orig = df.copy()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df2 = df.replace(["b"], value="a")
|
||||
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_list_inplace_refs_categorical(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.replace(["c"], value="a", inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(
|
||||
get_array(view, "a").codes, get_array(df, "a").codes
|
||||
)
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
# This could be inplace
|
||||
assert not np.shares_memory(
|
||||
get_array(view, "a").codes, get_array(df, "a").codes
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1.5, [1.5], []])
|
||||
def test_replace_inplace(using_copy_on_write, to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace(to_replace=1.5, value=15.5, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1.5, [1.5]])
|
||||
def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace(to_replace=to_replace, value=15.5, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", ["a", 100.5])
|
||||
def test_replace_inplace_reference_no_op(using_copy_on_write, to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.replace(to_replace=to_replace, value=15.5, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not view._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1, [1]])
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_replace):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if val == 1.5 else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
df.replace(to_replace=to_replace, value=val, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_replace_categorical_inplace(using_copy_on_write, val):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if val == 1.5 else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
df.replace(to_replace=1, value=val, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
expected = DataFrame({"a": Categorical([val, 2, 3])})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_replace_categorical(using_copy_on_write, val):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
df_orig = df.copy()
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if val == 1.5 else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
df2 = df.replace(to_replace=1, value=val)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
arr_a = get_array(df2, "a").codes
|
||||
df2.iloc[0, 0] = 2.0
|
||||
assert np.shares_memory(get_array(df2, "a").codes, arr_a)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["where", "mask"])
|
||||
def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
method = getattr(df, method)
|
||||
if warn_copy_on_write:
|
||||
with tm.assert_cow_warning():
|
||||
method(df["a"] > 1.6, -1, inplace=True)
|
||||
else:
|
||||
method(df["a"] > 1.6, -1, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
|
||||
def test_replace_empty_list(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
|
||||
df2 = df.replace([], [])
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace([], [])
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not df2._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["d", None])
|
||||
def test_replace_object_list_inplace(using_copy_on_write, value):
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(["c"], value, inplace=True)
|
||||
if using_copy_on_write or value is None:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
else:
|
||||
# This could be inplace
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_replace_list_multiple_elements_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace([1, 2], 4, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_list_none(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(["b"], value=None)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
|
||||
def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
arr = get_array(df, "a")
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace(["a"], value=None, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_columnwise_no_op_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
df.replace({"a": 10}, 100, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_replace_columnwise_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace({"a": 10}, 100)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
df2.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_chained_assignment(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].replace(1, 100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].replace(1, 100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[["a"]].replace(1, 100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[df.a > 5].replace(1, 100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["a"].replace(1, 100, inplace=True)
|
||||
|
||||
|
||||
def test_replace_listlike(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
result = df.replace([200, 201], [11, 11])
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df, df)
|
||||
|
||||
result = df.replace([200, 2], [10, 10])
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace([200, 2], [10, 11], inplace=True)
|
||||
assert np.shares_memory(get_array(df, "a"), arr)
|
||||
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace([200, 3], [10, 11], inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr)
|
||||
tm.assert_frame_equal(df, view)
|
@ -0,0 +1,156 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for the values that are set in a DataFrame
|
||||
|
||||
|
||||
def test_set_column_with_array():
|
||||
# Case: setting an array as a new column (df[col] = arr) copies that data
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
|
||||
df["c"] = arr
|
||||
|
||||
# the array data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), arr)
|
||||
# and thus modifying the array does not modify the DataFrame
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
||||
|
||||
|
||||
def test_set_column_with_series(using_copy_on_write):
|
||||
# Case: setting a series as a new column (df[col] = s) copies that data
|
||||
# (with delayed copy with CoW)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
ser = Series([1, 2, 3])
|
||||
|
||||
df["c"] = ser
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(ser))
|
||||
else:
|
||||
# the series data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(ser))
|
||||
|
||||
# and modifying the series does not modify the DataFrame
|
||||
ser.iloc[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
||||
|
||||
|
||||
def test_set_column_with_index(using_copy_on_write):
|
||||
# Case: setting an index as a new column (df[col] = idx) copies that data
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
idx = Index([1, 2, 3])
|
||||
|
||||
df["c"] = idx
|
||||
|
||||
# the index data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), idx.values)
|
||||
|
||||
idx = RangeIndex(1, 4)
|
||||
arr = idx.values
|
||||
|
||||
df["d"] = idx
|
||||
|
||||
assert not np.shares_memory(get_array(df, "d"), arr)
|
||||
|
||||
|
||||
def test_set_columns_with_dataframe(using_copy_on_write):
|
||||
# Case: setting a DataFrame as new columns copies that data
|
||||
# (with delayed copy with CoW)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
|
||||
|
||||
df[["c", "d"]] = df2
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
else:
|
||||
# the data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
|
||||
# and modifying the set DataFrame does not modify the original DataFrame
|
||||
df2.iloc[0, 0] = 0
|
||||
tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
|
||||
|
||||
|
||||
def test_setitem_series_no_copy(using_copy_on_write):
|
||||
# Case: setting a Series as column into a DataFrame can delay copying that data
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
# adding a new column
|
||||
df["b"] = rhs
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_no_copy_single_block(using_copy_on_write):
|
||||
# Overwriting an existing column that is a single block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
df["a"] = rhs
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "a"))
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_no_copy_split_block(using_copy_on_write):
|
||||
# Overwriting an existing column that is part of a larger block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
df["b"] = rhs
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
|
||||
# Setting a Series to multiple columns will repeat the data
|
||||
# (currently copying the data eagerly)
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [3, 4, 5]],
|
||||
columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
|
||||
)
|
||||
rhs = Series([10, 11])
|
||||
df["a"] = rhs
|
||||
assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
# this should not raise any warning
|
||||
with tm.assert_produces_warning(None):
|
||||
df["a"] += 1
|
||||
|
||||
# when it is not in a chain, then it should produce a warning
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
ser = df["a"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser += 1
|
@ -0,0 +1,14 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import DataFrame
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_get_array_numpy():
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_get_array_masked():
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
|
@ -0,0 +1,30 @@
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.arrays import BaseMaskedArray
|
||||
|
||||
|
||||
def get_array(obj, col=None):
|
||||
"""
|
||||
Helper method to get array for a DataFrame column or a Series.
|
||||
|
||||
Equivalent of df[col].values, but without going through normal getitem,
|
||||
which triggers tracking references / CoW (and we might be testing that
|
||||
this is done by some other operation).
|
||||
"""
|
||||
if isinstance(obj, Index):
|
||||
arr = obj._values
|
||||
elif isinstance(obj, Series) and (col is None or obj.name == col):
|
||||
arr = obj._values
|
||||
else:
|
||||
assert col is not None
|
||||
icol = obj.columns.get_loc(col)
|
||||
assert isinstance(icol, int)
|
||||
arr = obj._get_column_array(icol)
|
||||
if isinstance(arr, BaseMaskedArray):
|
||||
return arr._data
|
||||
elif isinstance(arr, Categorical):
|
||||
return arr
|
||||
return getattr(arr, "_ndarray", arr)
|
Reference in New Issue
Block a user