venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,413 @@
|
||||
"""Tests dealing with the NDFrame.allows_duplicates."""
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
not_implemented = pytest.mark.xfail(reason="Not implemented.")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Preservation
|
||||
|
||||
|
||||
class TestPreserves:
|
||||
@pytest.mark.parametrize(
|
||||
"cls, data",
|
||||
[
|
||||
(pd.Series, np.array([])),
|
||||
(pd.Series, [1, 2]),
|
||||
(pd.DataFrame, {}),
|
||||
(pd.DataFrame, {"A": [1, 2]}),
|
||||
],
|
||||
)
|
||||
def test_construction_ok(self, cls, data):
|
||||
result = cls(data)
|
||||
assert result.flags.allows_duplicate_labels is True
|
||||
|
||||
result = cls(data).set_flags(allows_duplicate_labels=False)
|
||||
assert result.flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
operator.itemgetter(["a"]),
|
||||
operator.methodcaller("add", 1),
|
||||
operator.methodcaller("rename", str.upper),
|
||||
operator.methodcaller("rename", "name"),
|
||||
operator.methodcaller("abs"),
|
||||
np.abs,
|
||||
],
|
||||
)
|
||||
def test_preserved_series(self, func):
|
||||
s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
|
||||
assert func(s).flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])]
|
||||
)
|
||||
# TODO: frame
|
||||
@not_implemented
|
||||
def test_align(self, other):
|
||||
s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
|
||||
a, b = s.align(other)
|
||||
assert a.flags.allows_duplicate_labels is False
|
||||
assert b.flags.allows_duplicate_labels is False
|
||||
|
||||
def test_preserved_frame(self):
|
||||
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
assert df.loc[["a"]].flags.allows_duplicate_labels is False
|
||||
assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False
|
||||
|
||||
def test_to_frame(self):
|
||||
ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False)
|
||||
assert ser.to_frame().flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize("func", ["add", "sub"])
|
||||
@pytest.mark.parametrize("frame", [False, True])
|
||||
@pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")])
|
||||
def test_binops(self, func, other, frame):
|
||||
df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
if frame:
|
||||
df = df.to_frame()
|
||||
if isinstance(other, pd.Series) and frame:
|
||||
other = other.to_frame()
|
||||
func = operator.methodcaller(func, other)
|
||||
assert df.flags.allows_duplicate_labels is False
|
||||
assert func(df).flags.allows_duplicate_labels is False
|
||||
|
||||
def test_preserve_getitem(self):
|
||||
df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
|
||||
assert df[["A"]].flags.allows_duplicate_labels is False
|
||||
assert df["A"].flags.allows_duplicate_labels is False
|
||||
assert df.loc[0].flags.allows_duplicate_labels is False
|
||||
assert df.loc[[0]].flags.allows_duplicate_labels is False
|
||||
assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False
|
||||
|
||||
def test_ndframe_getitem_caching_issue(
|
||||
self, request, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
if not (using_copy_on_write or warn_copy_on_write):
|
||||
request.applymarker(pytest.mark.xfail(reason="Unclear behavior."))
|
||||
# NDFrame.__getitem__ will cache the first df['A']. May need to
|
||||
# invalidate that cache? Update the cached entries?
|
||||
df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
|
||||
assert df["A"].flags.allows_duplicate_labels is False
|
||||
df.flags.allows_duplicate_labels = True
|
||||
assert df["A"].flags.allows_duplicate_labels is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objs, kwargs",
|
||||
[
|
||||
# Series
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=["a", "b"]),
|
||||
pd.Series(2, index=["c", "d"]),
|
||||
],
|
||||
{},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=["a", "b"]),
|
||||
pd.Series(2, index=["a", "b"]),
|
||||
],
|
||||
{"ignore_index": True},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=["a", "b"]),
|
||||
pd.Series(2, index=["a", "b"]),
|
||||
],
|
||||
{"axis": 1},
|
||||
),
|
||||
# Frame
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.DataFrame({"A": [1, 2]}, index=["c", "d"]),
|
||||
],
|
||||
{},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
],
|
||||
{"ignore_index": True},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.DataFrame({"B": [1, 2]}, index=["a", "b"]),
|
||||
],
|
||||
{"axis": 1},
|
||||
),
|
||||
# Series / Frame
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.Series([1, 2], index=["a", "b"], name="B"),
|
||||
],
|
||||
{"axis": 1},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_concat(self, objs, kwargs):
|
||||
objs = [x.set_flags(allows_duplicate_labels=False) for x in objs]
|
||||
result = pd.concat(objs, **kwargs)
|
||||
assert result.flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
# false false false
|
||||
pytest.param(
|
||||
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
),
|
||||
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
),
|
||||
False,
|
||||
marks=not_implemented,
|
||||
),
|
||||
# false true false
|
||||
pytest.param(
|
||||
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
),
|
||||
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
|
||||
False,
|
||||
marks=not_implemented,
|
||||
),
|
||||
# true true true
|
||||
(
|
||||
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]),
|
||||
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
|
||||
True,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_merge(self, left, right, expected):
|
||||
result = pd.merge(left, right, left_index=True, right_index=True)
|
||||
assert result.flags.allows_duplicate_labels is expected
|
||||
|
||||
@not_implemented
|
||||
def test_groupby(self):
|
||||
# XXX: This is under tested
|
||||
# TODO:
|
||||
# - apply
|
||||
# - transform
|
||||
# - Should passing a grouper that disallows duplicates propagate?
|
||||
df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False)
|
||||
result = df.groupby([0, 0, 1]).agg("count")
|
||||
assert result.flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
@not_implemented
|
||||
def test_window(self, frame):
|
||||
df = pd.Series(
|
||||
1,
|
||||
index=pd.date_range("2000", periods=12),
|
||||
name="A",
|
||||
allows_duplicate_labels=False,
|
||||
)
|
||||
if frame:
|
||||
df = df.to_frame()
|
||||
assert df.rolling(3).mean().flags.allows_duplicate_labels is False
|
||||
assert df.ewm(3).mean().flags.allows_duplicate_labels is False
|
||||
assert df.expanding(3).mean().flags.allows_duplicate_labels is False
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Raises
|
||||
|
||||
|
||||
class TestRaises:
|
||||
@pytest.mark.parametrize(
|
||||
"cls, axes",
|
||||
[
|
||||
(pd.Series, {"index": ["a", "a"], "dtype": float}),
|
||||
(pd.DataFrame, {"index": ["a", "a"]}),
|
||||
(pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}),
|
||||
(pd.DataFrame, {"columns": ["b", "b"]}),
|
||||
],
|
||||
)
|
||||
def test_set_flags_with_duplicates(self, cls, axes):
|
||||
result = cls(**axes)
|
||||
assert result.flags.allows_duplicate_labels is True
|
||||
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
cls(**axes).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.Series(index=[0, 0], dtype=float),
|
||||
pd.DataFrame(index=[0, 0]),
|
||||
pd.DataFrame(columns=[0, 0]),
|
||||
],
|
||||
)
|
||||
def test_setting_allows_duplicate_labels_raises(self, data):
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
data.flags.allows_duplicate_labels = False
|
||||
|
||||
assert data.flags.allows_duplicate_labels is True
|
||||
|
||||
def test_series_raises(self):
|
||||
a = pd.Series(0, index=["a", "b"])
|
||||
b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.concat([a, b])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"getter, target",
|
||||
[
|
||||
(operator.itemgetter(["A", "A"]), None),
|
||||
# loc
|
||||
(operator.itemgetter(["a", "a"]), "loc"),
|
||||
pytest.param(operator.itemgetter(("a", ["A", "A"])), "loc"),
|
||||
(operator.itemgetter((["a", "a"], "A")), "loc"),
|
||||
# iloc
|
||||
(operator.itemgetter([0, 0]), "iloc"),
|
||||
pytest.param(operator.itemgetter((0, [0, 0])), "iloc"),
|
||||
pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"),
|
||||
],
|
||||
)
|
||||
def test_getitem_raises(self, getter, target):
|
||||
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
if target:
|
||||
# df, df.loc, or df.iloc
|
||||
target = getattr(df, target)
|
||||
else:
|
||||
target = df
|
||||
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
getter(target)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objs, kwargs",
|
||||
[
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=[0, 1], name="a"),
|
||||
pd.Series(2, index=[0, 1], name="a"),
|
||||
],
|
||||
{"axis": 1},
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_concat_raises(self, objs, kwargs):
|
||||
objs = [x.set_flags(allows_duplicate_labels=False) for x in objs]
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.concat(objs, **kwargs)
|
||||
|
||||
@not_implemented
|
||||
def test_merge_raises(self):
|
||||
a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"])
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.merge(a, b, left_index=True, right_index=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
pd.Index([1, 1]),
|
||||
pd.Index(["a", "a"]),
|
||||
pd.Index([1.1, 1.1]),
|
||||
pd.PeriodIndex([pd.Period("2000", "D")] * 2),
|
||||
pd.DatetimeIndex([pd.Timestamp("2000")] * 2),
|
||||
pd.TimedeltaIndex([pd.Timedelta("1D")] * 2),
|
||||
pd.CategoricalIndex(["a", "a"]),
|
||||
pd.IntervalIndex([pd.Interval(0, 1)] * 2),
|
||||
pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_raises_basic(idx):
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
|
||||
def test_format_duplicate_labels_message():
|
||||
idx = pd.Index(["a", "b", "a", "b", "c"])
|
||||
result = idx._format_duplicate_message()
|
||||
expected = pd.DataFrame(
|
||||
{"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_format_duplicate_labels_message_multi():
|
||||
idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]])
|
||||
result = idx._format_duplicate_message()
|
||||
expected = pd.DataFrame(
|
||||
{"positions": [[0, 2], [1, 3]]},
|
||||
index=pd.MultiIndex.from_product([["A"], ["a", "b"]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_dataframe_insert_raises():
|
||||
df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
|
||||
msg = "Cannot specify"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(0, "A", [3, 4], allow_duplicates=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, frame_only",
|
||||
[
|
||||
(operator.methodcaller("set_index", "A", inplace=True), True),
|
||||
(operator.methodcaller("reset_index", inplace=True), True),
|
||||
(operator.methodcaller("rename", lambda x: x, inplace=True), False),
|
||||
],
|
||||
)
|
||||
def test_inplace_raises(method, frame_only):
|
||||
df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
s = df["A"]
|
||||
s.flags.allows_duplicate_labels = False
|
||||
msg = "Cannot specify"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
method(df)
|
||||
if not frame_only:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
method(s)
|
||||
|
||||
|
||||
def test_pickle():
|
||||
a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False)
|
||||
b = tm.round_trip_pickle(a)
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False)
|
||||
b = tm.round_trip_pickle(a)
|
||||
tm.assert_frame_equal(a, b)
|
@ -0,0 +1,767 @@
|
||||
"""
|
||||
An exhaustive list of pandas methods exercising NDFrame.__finalize__.
|
||||
"""
|
||||
import operator
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
# TODO:
|
||||
# * Binary methods (mul, div, etc.)
|
||||
# * Binary outputs (align, etc.)
|
||||
# * top-level methods (concat, merge, get_dummies, etc.)
|
||||
# * window
|
||||
# * cumulative reductions
|
||||
|
||||
not_implemented_mark = pytest.mark.xfail(reason="not implemented")
|
||||
|
||||
mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"])
|
||||
|
||||
frame_data = ({"A": [1]},)
|
||||
frame_mi_data = ({"A": [1, 2, 3, 4]}, mi)
|
||||
|
||||
|
||||
# Tuple of
|
||||
# - Callable: Constructor (Series, DataFrame)
|
||||
# - Tuple: Constructor args
|
||||
# - Callable: pass the constructed value with attrs set to this.
|
||||
|
||||
_all_methods = [
|
||||
(pd.Series, ([0],), operator.methodcaller("take", [])),
|
||||
(pd.Series, ([0],), operator.methodcaller("__getitem__", [True])),
|
||||
(pd.Series, ([0],), operator.methodcaller("repeat", 2)),
|
||||
(pd.Series, ([0],), operator.methodcaller("reset_index")),
|
||||
(pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)),
|
||||
(pd.Series, ([0],), operator.methodcaller("to_frame")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("duplicated")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("round")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("rename", "name")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("drop", [0])),
|
||||
(pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("shift")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("isna")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("isnull")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("notna")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("notnull")),
|
||||
(pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))),
|
||||
# TODO: mul, div, etc.
|
||||
(
|
||||
pd.Series,
|
||||
([0], pd.period_range("2000", periods=1)),
|
||||
operator.methodcaller("to_timestamp"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
([0], pd.date_range("2000", periods=1)),
|
||||
operator.methodcaller("to_period"),
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("dot", pd.DataFrame(index=["A"])),
|
||||
),
|
||||
marks=pytest.mark.xfail(reason="Implement binary finalize"),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("transpose")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))),
|
||||
(pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("reset_index")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("isna")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("isnull")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("notna")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("notnull")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("dropna")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("duplicated")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sort_index")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("add", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
# TODO: div, mul, etc.
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("combine_first", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("update", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
marks=not_implemented_mark,
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1], "B": [1]},),
|
||||
operator.methodcaller("pivot_table", columns="A"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1], "B": [1]},),
|
||||
operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("stack")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("explode", "A")),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},),
|
||||
operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)),
|
||||
pytest.param(
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
|
||||
),
|
||||
marks=not_implemented_mark,
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("corr")),
|
||||
pytest.param(
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cov")),
|
||||
marks=[
|
||||
pytest.mark.filterwarnings("ignore::RuntimeWarning"),
|
||||
],
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("corrwith", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("count")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("nunique")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("idxmin")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("idxmax")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("mode")),
|
||||
(pd.Series, [0], operator.methodcaller("mode")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("median")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("quantile", numeric_only=True),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},),
|
||||
operator.methodcaller("quantile", numeric_only=False),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [np.datetime64("2022-01-01"), np.datetime64("2022-01-02")]},),
|
||||
operator.methodcaller("quantile", numeric_only=True),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1]}, [pd.Period("2000", "D")]),
|
||||
operator.methodcaller("to_timestamp"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1]}, [pd.Timestamp("2000")]),
|
||||
operator.methodcaller("to_period", freq="D"),
|
||||
),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_mi_data,
|
||||
operator.methodcaller("isin", pd.DataFrame({"A": [1]})),
|
||||
),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("pop", "A")),
|
||||
# Squeeze on columns, otherwise we'll end up with a scalar
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("squeeze")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")),
|
||||
# Unary ops
|
||||
(pd.DataFrame, frame_data, operator.neg),
|
||||
(pd.Series, [1], operator.neg),
|
||||
(pd.DataFrame, frame_data, operator.pos),
|
||||
(pd.Series, [1], operator.pos),
|
||||
(pd.DataFrame, frame_data, operator.inv),
|
||||
(pd.Series, [1], operator.inv),
|
||||
(pd.DataFrame, frame_data, abs),
|
||||
(pd.Series, [1], abs),
|
||||
(pd.DataFrame, frame_data, round),
|
||||
(pd.Series, [1], round),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")),
|
||||
(pd.Series, (1, mi), operator.methodcaller("xs", "a")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("get", "A")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
frame_data,
|
||||
operator.methodcaller("reindex_like", pd.Series([0, 1, 2])),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")),
|
||||
(pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")),
|
||||
(pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")),
|
||||
(pd.Series, ([3, 2],), operator.methodcaller("sort_values")),
|
||||
(pd.Series, ([1] * 10,), operator.methodcaller("head")),
|
||||
(pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")),
|
||||
(pd.Series, ([1] * 10,), operator.methodcaller("tail")),
|
||||
(pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)),
|
||||
(pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("astype", float)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("astype", float)),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("copy")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("copy")),
|
||||
(pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": np.array([1, 2], dtype=object)},),
|
||||
operator.methodcaller("infer_objects"),
|
||||
),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")),
|
||||
(pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")),
|
||||
(pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("asfreq", "h"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("asfreq", "h"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("at_time", "12:00"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("at_time", "12:00"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("between_time", "12:00", "13:00"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("between_time", "12:00", "13:00"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("last", "3D"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("last", "3D"),
|
||||
),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("rank")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rank")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4, tz="UTC")),
|
||||
operator.methodcaller("tz_convert", "CET"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")),
|
||||
operator.methodcaller("tz_convert", "CET"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("tz_localize", "CET"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("tz_localize", "CET"),
|
||||
),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("describe")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("describe")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("pct_change")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("pct_change")),
|
||||
(pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_mi_data,
|
||||
operator.methodcaller("transform", lambda x: x - x.min()),
|
||||
),
|
||||
(pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)),
|
||||
# Cumulative reductions
|
||||
(pd.Series, ([1],), operator.methodcaller("cumsum")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cumsum")),
|
||||
(pd.Series, ([1],), operator.methodcaller("cummin")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cummin")),
|
||||
(pd.Series, ([1],), operator.methodcaller("cummax")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cummax")),
|
||||
(pd.Series, ([1],), operator.methodcaller("cumprod")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cumprod")),
|
||||
# Reductions
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("any")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("all")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("min")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("max")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sum")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("std")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("mean")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("prod")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sem")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("skew")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("kurt")),
|
||||
]
|
||||
|
||||
|
||||
def idfn(x):
|
||||
xpr = re.compile(r"'(.*)?'")
|
||||
m = xpr.search(str(x))
|
||||
if m:
|
||||
return m.group(1)
|
||||
else:
|
||||
return str(x)
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1]))
|
||||
def ndframe_method(request):
|
||||
"""
|
||||
An NDFrame method returning an NDFrame.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning",
|
||||
"ignore:last is deprecated:FutureWarning",
|
||||
)
|
||||
def test_finalize_called(ndframe_method):
|
||||
cls, init_args, method = ndframe_method
|
||||
ndframe = cls(*init_args)
|
||||
|
||||
ndframe.attrs = {"a": 1}
|
||||
result = method(ndframe)
|
||||
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.Series(1, pd.date_range("2000", periods=4)),
|
||||
pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
],
|
||||
)
|
||||
def test_finalize_first(data):
|
||||
deprecated_msg = "first is deprecated"
|
||||
|
||||
data.attrs = {"a": 1}
|
||||
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
|
||||
result = data.first("3D")
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.Series(1, pd.date_range("2000", periods=4)),
|
||||
pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
],
|
||||
)
|
||||
def test_finalize_last(data):
|
||||
# GH 53710
|
||||
deprecated_msg = "last is deprecated"
|
||||
|
||||
data.attrs = {"a": 1}
|
||||
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
|
||||
result = data.last("3D")
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@not_implemented_mark
|
||||
def test_finalize_called_eval_numexpr():
|
||||
pytest.importorskip("numexpr")
|
||||
df = pd.DataFrame({"A": [1, 2]})
|
||||
df.attrs["A"] = 1
|
||||
result = df.eval("A + 1", engine="numexpr")
|
||||
assert result.attrs == {"A": 1}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Binary operations
|
||||
|
||||
|
||||
@pytest.mark.parametrize("annotate", ["left", "right", "both"])
|
||||
@pytest.mark.parametrize(
|
||||
"args",
|
||||
[
|
||||
(1, pd.Series([1])),
|
||||
(1, pd.DataFrame({"A": [1]})),
|
||||
(pd.Series([1]), 1),
|
||||
(pd.DataFrame({"A": [1]}), 1),
|
||||
(pd.Series([1]), pd.Series([1])),
|
||||
(pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})),
|
||||
(pd.Series([1]), pd.DataFrame({"A": [1]})),
|
||||
(pd.DataFrame({"A": [1]}), pd.Series([1])),
|
||||
],
|
||||
ids=lambda x: f"({type(x[0]).__name__},{type(x[1]).__name__})",
|
||||
)
|
||||
def test_binops(request, args, annotate, all_binary_operators):
|
||||
# This generates 624 tests... Is that needed?
|
||||
left, right = args
|
||||
if isinstance(left, (pd.DataFrame, pd.Series)):
|
||||
left.attrs = {}
|
||||
if isinstance(right, (pd.DataFrame, pd.Series)):
|
||||
right.attrs = {}
|
||||
|
||||
if annotate == "left" and isinstance(left, int):
|
||||
pytest.skip("left is an int and doesn't support .attrs")
|
||||
if annotate == "right" and isinstance(right, int):
|
||||
pytest.skip("right is an int and doesn't support .attrs")
|
||||
|
||||
if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both":
|
||||
if not all_binary_operators.__name__.startswith("r"):
|
||||
if annotate == "right" and isinstance(left, type(right)):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when right has "
|
||||
f"attrs and both are {type(left)}"
|
||||
)
|
||||
)
|
||||
if not isinstance(left, type(right)):
|
||||
if annotate == "left" and isinstance(left, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
elif annotate == "right" and isinstance(right, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
else:
|
||||
if annotate == "left" and isinstance(left, type(right)):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when left has "
|
||||
f"attrs and both are {type(left)}"
|
||||
)
|
||||
)
|
||||
if not isinstance(left, type(right)):
|
||||
if annotate == "right" and isinstance(right, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
elif annotate == "left" and isinstance(left, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
if annotate in {"left", "both"} and not isinstance(left, int):
|
||||
left.attrs = {"a": 1}
|
||||
if annotate in {"right", "both"} and not isinstance(right, int):
|
||||
right.attrs = {"a": 1}
|
||||
|
||||
is_cmp = all_binary_operators in [
|
||||
operator.eq,
|
||||
operator.ne,
|
||||
operator.gt,
|
||||
operator.ge,
|
||||
operator.lt,
|
||||
operator.le,
|
||||
]
|
||||
if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series):
|
||||
# in 2.0 silent alignment on comparisons was removed xref GH#28759
|
||||
left, right = left.align(right, axis=1, copy=False)
|
||||
elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame):
|
||||
right, left = right.align(left, axis=1, copy=False)
|
||||
|
||||
result = all_binary_operators(left, right)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Accessors
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("capitalize"),
|
||||
operator.methodcaller("casefold"),
|
||||
operator.methodcaller("cat", ["a"]),
|
||||
operator.methodcaller("contains", "a"),
|
||||
operator.methodcaller("count", "a"),
|
||||
operator.methodcaller("encode", "utf-8"),
|
||||
operator.methodcaller("endswith", "a"),
|
||||
operator.methodcaller("extract", r"(\w)(\d)"),
|
||||
operator.methodcaller("extract", r"(\w)(\d)", expand=False),
|
||||
operator.methodcaller("find", "a"),
|
||||
operator.methodcaller("findall", "a"),
|
||||
operator.methodcaller("get", 0),
|
||||
operator.methodcaller("index", "a"),
|
||||
operator.methodcaller("len"),
|
||||
operator.methodcaller("ljust", 4),
|
||||
operator.methodcaller("lower"),
|
||||
operator.methodcaller("lstrip"),
|
||||
operator.methodcaller("match", r"\w"),
|
||||
operator.methodcaller("normalize", "NFC"),
|
||||
operator.methodcaller("pad", 4),
|
||||
operator.methodcaller("partition", "a"),
|
||||
operator.methodcaller("repeat", 2),
|
||||
operator.methodcaller("replace", "a", "b"),
|
||||
operator.methodcaller("rfind", "a"),
|
||||
operator.methodcaller("rindex", "a"),
|
||||
operator.methodcaller("rjust", 4),
|
||||
operator.methodcaller("rpartition", "a"),
|
||||
operator.methodcaller("rstrip"),
|
||||
operator.methodcaller("slice", 4),
|
||||
operator.methodcaller("slice_replace", 1, repl="a"),
|
||||
operator.methodcaller("startswith", "a"),
|
||||
operator.methodcaller("strip"),
|
||||
operator.methodcaller("swapcase"),
|
||||
operator.methodcaller("translate", {"a": "b"}),
|
||||
operator.methodcaller("upper"),
|
||||
operator.methodcaller("wrap", 4),
|
||||
operator.methodcaller("zfill", 4),
|
||||
operator.methodcaller("isalnum"),
|
||||
operator.methodcaller("isalpha"),
|
||||
operator.methodcaller("isdigit"),
|
||||
operator.methodcaller("isspace"),
|
||||
operator.methodcaller("islower"),
|
||||
operator.methodcaller("isupper"),
|
||||
operator.methodcaller("istitle"),
|
||||
operator.methodcaller("isnumeric"),
|
||||
operator.methodcaller("isdecimal"),
|
||||
operator.methodcaller("get_dummies"),
|
||||
],
|
||||
ids=idfn,
|
||||
)
|
||||
def test_string_method(method):
|
||||
s = pd.Series(["a1"])
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.str)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("to_period"),
|
||||
operator.methodcaller("tz_localize", "CET"),
|
||||
operator.methodcaller("normalize"),
|
||||
operator.methodcaller("strftime", "%Y"),
|
||||
operator.methodcaller("round", "h"),
|
||||
operator.methodcaller("floor", "h"),
|
||||
operator.methodcaller("ceil", "h"),
|
||||
operator.methodcaller("month_name"),
|
||||
operator.methodcaller("day_name"),
|
||||
],
|
||||
ids=idfn,
|
||||
)
|
||||
def test_datetime_method(method):
|
||||
s = pd.Series(pd.date_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.dt)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr",
|
||||
[
|
||||
"date",
|
||||
"time",
|
||||
"timetz",
|
||||
"year",
|
||||
"month",
|
||||
"day",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
"microsecond",
|
||||
"nanosecond",
|
||||
"dayofweek",
|
||||
"day_of_week",
|
||||
"dayofyear",
|
||||
"day_of_year",
|
||||
"quarter",
|
||||
"is_month_start",
|
||||
"is_month_end",
|
||||
"is_quarter_start",
|
||||
"is_quarter_end",
|
||||
"is_year_start",
|
||||
"is_year_end",
|
||||
"is_leap_year",
|
||||
"daysinmonth",
|
||||
"days_in_month",
|
||||
],
|
||||
)
|
||||
def test_datetime_property(attr):
|
||||
s = pd.Series(pd.date_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = getattr(s.dt, attr)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr", ["days", "seconds", "microseconds", "nanoseconds", "components"]
|
||||
)
|
||||
def test_timedelta_property(attr):
|
||||
s = pd.Series(pd.timedelta_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = getattr(s.dt, attr)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")])
|
||||
def test_timedelta_methods(method):
|
||||
s = pd.Series(pd.timedelta_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.dt)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("add_categories", ["c"]),
|
||||
operator.methodcaller("as_ordered"),
|
||||
operator.methodcaller("as_unordered"),
|
||||
lambda x: getattr(x, "codes"),
|
||||
operator.methodcaller("remove_categories", "a"),
|
||||
operator.methodcaller("remove_unused_categories"),
|
||||
operator.methodcaller("rename_categories", {"a": "A", "b": "B"}),
|
||||
operator.methodcaller("reorder_categories", ["b", "a"]),
|
||||
operator.methodcaller("set_categories", ["A", "B"]),
|
||||
],
|
||||
)
|
||||
@not_implemented_mark
|
||||
def test_categorical_accessor(method):
|
||||
s = pd.Series(["a", "b"], dtype="category")
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.cat)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Groupby
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("sum"),
|
||||
lambda x: x.apply(lambda y: y),
|
||||
lambda x: x.agg("sum"),
|
||||
lambda x: x.agg("mean"),
|
||||
lambda x: x.agg("median"),
|
||||
],
|
||||
)
|
||||
def test_groupby_finalize(obj, method):
|
||||
obj.attrs = {"a": 1}
|
||||
result = method(obj.groupby([0, 0], group_keys=False))
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.agg(["sum", "count"]),
|
||||
lambda x: x.agg("std"),
|
||||
lambda x: x.agg("var"),
|
||||
lambda x: x.agg("sem"),
|
||||
lambda x: x.agg("size"),
|
||||
lambda x: x.agg("ohlc"),
|
||||
],
|
||||
)
|
||||
@not_implemented_mark
|
||||
def test_groupby_finalize_not_implemented(obj, method):
|
||||
obj.attrs = {"a": 1}
|
||||
result = method(obj.groupby([0, 0]))
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
def test_finalize_frame_series_name():
|
||||
# https://github.com/pandas-dev/pandas/pull/37186/files#r506978889
|
||||
# ensure we don't copy the column `name` to the Series.
|
||||
df = pd.DataFrame({"name": [1, 2]})
|
||||
result = pd.Series([1, 2]).__finalize__(df)
|
||||
assert result.name is None
|
@ -0,0 +1,209 @@
|
||||
from copy import deepcopy
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrame:
|
||||
@pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"])
|
||||
def test_set_axis_name(self, func):
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
|
||||
result = methodcaller(func, "foo")(df)
|
||||
assert df.index.name is None
|
||||
assert result.index.name == "foo"
|
||||
|
||||
result = methodcaller(func, "cols", axis=1)(df)
|
||||
assert df.columns.name is None
|
||||
assert result.columns.name == "cols"
|
||||
|
||||
@pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"])
|
||||
def test_set_axis_name_mi(self, func):
|
||||
df = DataFrame(
|
||||
np.empty((3, 3)),
|
||||
index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]),
|
||||
columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]),
|
||||
)
|
||||
|
||||
level_names = ["L1", "L2"]
|
||||
|
||||
result = methodcaller(func, level_names)(df)
|
||||
assert result.index.names == level_names
|
||||
assert result.columns.names == [None, None]
|
||||
|
||||
result = methodcaller(func, level_names, axis=1)(df)
|
||||
assert result.columns.names == ["L1", "L2"]
|
||||
assert result.index.names == [None, None]
|
||||
|
||||
def test_nonzero_single_element(self):
|
||||
# allow single item via bool method
|
||||
msg_warn = (
|
||||
"DataFrame.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
df = DataFrame([[True]])
|
||||
df1 = DataFrame([[False]])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert df.bool()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert not df1.bool()
|
||||
|
||||
df = DataFrame([[False, False]])
|
||||
msg_err = "The truth value of a DataFrame is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
bool(df)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
df.bool()
|
||||
|
||||
def test_metadata_propagation_indiv_groupby(self):
|
||||
# groupby
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
|
||||
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
|
||||
"C": np.random.default_rng(2).standard_normal(8),
|
||||
"D": np.random.default_rng(2).standard_normal(8),
|
||||
}
|
||||
)
|
||||
result = df.groupby("A").sum()
|
||||
tm.assert_metadata_equivalent(df, result)
|
||||
|
||||
def test_metadata_propagation_indiv_resample(self):
|
||||
# resample
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((1000, 2)),
|
||||
index=date_range("20130101", periods=1000, freq="s"),
|
||||
)
|
||||
result = df.resample("1min")
|
||||
tm.assert_metadata_equivalent(df, result)
|
||||
|
||||
def test_metadata_propagation_indiv(self, monkeypatch):
|
||||
# merging with override
|
||||
# GH 6923
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
for name in self._metadata:
|
||||
if method == "merge":
|
||||
left, right = other.left, other.right
|
||||
value = getattr(left, name, "") + "|" + getattr(right, name, "")
|
||||
object.__setattr__(self, name, value)
|
||||
elif method == "concat":
|
||||
value = "+".join(
|
||||
[getattr(o, name) for o in other.objs if getattr(o, name, None)]
|
||||
)
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, ""))
|
||||
|
||||
return self
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(DataFrame, "_metadata", ["filename"])
|
||||
m.setattr(DataFrame, "__finalize__", finalize)
|
||||
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["a", "b"]
|
||||
)
|
||||
df2 = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["c", "d"]
|
||||
)
|
||||
DataFrame._metadata = ["filename"]
|
||||
df1.filename = "fname1.csv"
|
||||
df2.filename = "fname2.csv"
|
||||
|
||||
result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner")
|
||||
assert result.filename == "fname1.csv|fname2.csv"
|
||||
|
||||
# concat
|
||||
# GH#6927
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 4, (3, 2)), columns=list("ab")
|
||||
)
|
||||
df1.filename = "foo"
|
||||
|
||||
result = pd.concat([df1, df1])
|
||||
assert result.filename == "foo+foo"
|
||||
|
||||
def test_set_attribute(self):
|
||||
# Test for consistent setattr behavior when an attribute and a column
|
||||
# have the same name (Issue #8994)
|
||||
df = DataFrame({"x": [1, 2, 3]})
|
||||
|
||||
df.y = 2
|
||||
df["y"] = [2, 4, 6]
|
||||
df.y = 5
|
||||
|
||||
assert df.y == 5
|
||||
tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y"))
|
||||
|
||||
def test_deepcopy_empty(self):
|
||||
# This test covers empty frame copying with non-empty column sets
|
||||
# as reported in issue GH15370
|
||||
empty_frame = DataFrame(data=[], index=[], columns=["A"])
|
||||
empty_frame_copy = deepcopy(empty_frame)
|
||||
|
||||
tm.assert_frame_equal(empty_frame_copy, empty_frame)
|
||||
|
||||
|
||||
# formerly in Generic but only test DataFrame
|
||||
class TestDataFrame2:
|
||||
@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_bool_args(self, value):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
msg = 'For argument "inplace" expected type bool, received type'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().rename_axis(mapper={"a": "x", "b": "y"}, axis=1, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().drop("a", axis=1, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().fillna(value=0, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().replace(to_replace=1, value=7, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().interpolate(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy()._where(cond=df.a > 2, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().mask(cond=df.a > 2, inplace=value)
|
||||
|
||||
def test_unexpected_keyword(self):
|
||||
# GH8597
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 2)), columns=["jim", "joe"]
|
||||
)
|
||||
ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
|
||||
ts = df["joe"].copy()
|
||||
ts[2] = np.nan
|
||||
|
||||
msg = "unexpected keyword"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.drop("joe", axis=1, in_place=True)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.reindex([1, 0], inplace=True)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ca.fillna(0, inplace=True)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ts.fillna(0, in_place=True)
|
@ -0,0 +1,504 @@
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Generic types test cases
|
||||
|
||||
|
||||
def construct(box, shape, value=None, dtype=None, **kwargs):
|
||||
"""
|
||||
construct an object for the given shape
|
||||
if value is specified use that if its a scalar
|
||||
if value is an array, repeat it as needed
|
||||
"""
|
||||
if isinstance(shape, int):
|
||||
shape = tuple([shape] * box._AXIS_LEN)
|
||||
if value is not None:
|
||||
if is_scalar(value):
|
||||
if value == "empty":
|
||||
arr = None
|
||||
dtype = np.float64
|
||||
|
||||
# remove the info axis
|
||||
kwargs.pop(box._info_axis_name, None)
|
||||
else:
|
||||
arr = np.empty(shape, dtype=dtype)
|
||||
arr.fill(value)
|
||||
else:
|
||||
fshape = np.prod(shape)
|
||||
arr = value.ravel()
|
||||
new_shape = fshape / arr.shape[0]
|
||||
if fshape % arr.shape[0] != 0:
|
||||
raise Exception("invalid value passed in construct")
|
||||
|
||||
arr = np.repeat(arr, new_shape).reshape(shape)
|
||||
else:
|
||||
arr = np.random.default_rng(2).standard_normal(shape)
|
||||
return box(arr, dtype=dtype, **kwargs)
|
||||
|
||||
|
||||
class TestGeneric:
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
str.lower,
|
||||
{x: x.lower() for x in list("ABCD")},
|
||||
Series({x: x.lower() for x in list("ABCD")}),
|
||||
],
|
||||
)
|
||||
def test_rename(self, frame_or_series, func):
|
||||
# single axis
|
||||
idx = list("ABCD")
|
||||
|
||||
for axis in frame_or_series._AXIS_ORDERS:
|
||||
kwargs = {axis: idx}
|
||||
obj = construct(frame_or_series, 4, **kwargs)
|
||||
|
||||
# rename a single axis
|
||||
result = obj.rename(**{axis: func})
|
||||
expected = obj.copy()
|
||||
setattr(expected, axis, list("abcd"))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_get_numeric_data(self, frame_or_series):
|
||||
n = 4
|
||||
kwargs = {
|
||||
frame_or_series._get_axis_name(i): list(range(n))
|
||||
for i in range(frame_or_series._AXIS_LEN)
|
||||
}
|
||||
|
||||
# get the numeric data
|
||||
o = construct(frame_or_series, n, **kwargs)
|
||||
result = o._get_numeric_data()
|
||||
tm.assert_equal(result, o)
|
||||
|
||||
# non-inclusion
|
||||
result = o._get_bool_data()
|
||||
expected = construct(frame_or_series, n, value="empty", **kwargs)
|
||||
if isinstance(o, DataFrame):
|
||||
# preserve columns dtype
|
||||
expected.columns = o.columns[:0]
|
||||
# https://github.com/pandas-dev/pandas/issues/50862
|
||||
tm.assert_equal(result.reset_index(drop=True), expected)
|
||||
|
||||
# get the bool data
|
||||
arr = np.array([True, True, False, True])
|
||||
o = construct(frame_or_series, n, value=arr, **kwargs)
|
||||
result = o._get_numeric_data()
|
||||
tm.assert_equal(result, o)
|
||||
|
||||
def test_nonzero(self, frame_or_series):
|
||||
# GH 4633
|
||||
# look at the boolean/nonzero behavior for objects
|
||||
obj = construct(frame_or_series, shape=4)
|
||||
msg = f"The truth value of a {frame_or_series.__name__} is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
obj = construct(frame_or_series, shape=4, value=1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
obj = construct(frame_or_series, shape=4, value=np.nan)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
# empty
|
||||
obj = construct(frame_or_series, shape=0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
# invalid behaviors
|
||||
|
||||
obj1 = construct(frame_or_series, shape=4, value=1)
|
||||
obj2 = construct(frame_or_series, shape=4, value=1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
if obj1:
|
||||
pass
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj1 and obj2
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj1 or obj2
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
not obj1
|
||||
|
||||
def test_frame_or_series_compound_dtypes(self, frame_or_series):
|
||||
# see gh-5191
|
||||
# Compound dtypes should raise NotImplementedError.
|
||||
|
||||
def f(dtype):
|
||||
return construct(frame_or_series, shape=3, value=1, dtype=dtype)
|
||||
|
||||
msg = (
|
||||
"compound dtypes are not implemented "
|
||||
f"in the {frame_or_series.__name__} constructor"
|
||||
)
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
|
||||
|
||||
# these work (though results may be unexpected)
|
||||
f("int64")
|
||||
f("float64")
|
||||
f("M8[ns]")
|
||||
|
||||
def test_metadata_propagation(self, frame_or_series):
|
||||
# check that the metadata matches up on the resulting ops
|
||||
|
||||
o = construct(frame_or_series, shape=3)
|
||||
o.name = "foo"
|
||||
o2 = construct(frame_or_series, shape=3)
|
||||
o2.name = "bar"
|
||||
|
||||
# ----------
|
||||
# preserving
|
||||
# ----------
|
||||
|
||||
# simple ops with scalars
|
||||
for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
|
||||
result = getattr(o, op)(1)
|
||||
tm.assert_metadata_equivalent(o, result)
|
||||
|
||||
# ops with like
|
||||
for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
|
||||
result = getattr(o, op)(o)
|
||||
tm.assert_metadata_equivalent(o, result)
|
||||
|
||||
# simple boolean
|
||||
for op in ["__eq__", "__le__", "__ge__"]:
|
||||
v1 = getattr(o, op)(o)
|
||||
tm.assert_metadata_equivalent(o, v1)
|
||||
tm.assert_metadata_equivalent(o, v1 & v1)
|
||||
tm.assert_metadata_equivalent(o, v1 | v1)
|
||||
|
||||
# combine_first
|
||||
result = o.combine_first(o2)
|
||||
tm.assert_metadata_equivalent(o, result)
|
||||
|
||||
# ---------------------------
|
||||
# non-preserving (by default)
|
||||
# ---------------------------
|
||||
|
||||
# add non-like
|
||||
result = o + o2
|
||||
tm.assert_metadata_equivalent(result)
|
||||
|
||||
# simple boolean
|
||||
for op in ["__eq__", "__le__", "__ge__"]:
|
||||
# this is a name matching op
|
||||
v1 = getattr(o, op)(o)
|
||||
v2 = getattr(o, op)(o2)
|
||||
tm.assert_metadata_equivalent(v2)
|
||||
tm.assert_metadata_equivalent(v1 & v2)
|
||||
tm.assert_metadata_equivalent(v1 | v2)
|
||||
|
||||
def test_size_compat(self, frame_or_series):
|
||||
# GH8846
|
||||
# size property should be defined
|
||||
|
||||
o = construct(frame_or_series, shape=10)
|
||||
assert o.size == np.prod(o.shape)
|
||||
assert o.size == 10 ** len(o.axes)
|
||||
|
||||
def test_split_compat(self, frame_or_series):
|
||||
# xref GH8846
|
||||
o = construct(frame_or_series, shape=10)
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False
|
||||
):
|
||||
assert len(np.array_split(o, 5)) == 5
|
||||
assert len(np.array_split(o, 2)) == 2
|
||||
|
||||
# See gh-12301
|
||||
def test_stat_unexpected_keyword(self, frame_or_series):
|
||||
obj = construct(frame_or_series, 5)
|
||||
starwars = "Star Wars"
|
||||
errmsg = "unexpected keyword"
|
||||
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.max(epic=starwars) # stat_function
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.var(epic=starwars) # stat_function_ddof
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.sum(epic=starwars) # cum_function
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.any(epic=starwars) # logical_function
|
||||
|
||||
@pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
|
||||
def test_api_compat(self, func, frame_or_series):
|
||||
# GH 12021
|
||||
# compat for __name__, __qualname__
|
||||
|
||||
obj = construct(frame_or_series, 5)
|
||||
f = getattr(obj, func)
|
||||
assert f.__name__ == func
|
||||
assert f.__qualname__.endswith(func)
|
||||
|
||||
def test_stat_non_defaults_args(self, frame_or_series):
|
||||
obj = construct(frame_or_series, 5)
|
||||
out = np.array([0])
|
||||
errmsg = "the 'out' parameter is not supported"
|
||||
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.max(out=out) # stat_function
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.var(out=out) # stat_function_ddof
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.sum(out=out) # cum_function
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.any(out=out) # logical_function
|
||||
|
||||
def test_truncate_out_of_bounds(self, frame_or_series):
|
||||
# GH11382
|
||||
|
||||
# small
|
||||
shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1))
|
||||
small = construct(frame_or_series, shape, dtype="int8", value=1)
|
||||
tm.assert_equal(small.truncate(), small)
|
||||
tm.assert_equal(small.truncate(before=0, after=3e3), small)
|
||||
tm.assert_equal(small.truncate(before=-1, after=2e3), small)
|
||||
|
||||
# big
|
||||
shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1))
|
||||
big = construct(frame_or_series, shape, dtype="int8", value=1)
|
||||
tm.assert_equal(big.truncate(), big)
|
||||
tm.assert_equal(big.truncate(before=0, after=3e6), big)
|
||||
tm.assert_equal(big.truncate(before=-1, after=2e6), big)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
|
||||
)
|
||||
@pytest.mark.parametrize("shape", [0, 1, 2])
|
||||
def test_copy_and_deepcopy(self, frame_or_series, shape, func):
|
||||
# GH 15444
|
||||
obj = construct(frame_or_series, shape)
|
||||
obj_copy = func(obj)
|
||||
assert obj_copy is not obj
|
||||
tm.assert_equal(obj_copy, obj)
|
||||
|
||||
def test_data_deprecated(self, frame_or_series):
|
||||
obj = frame_or_series()
|
||||
msg = "(Series|DataFrame)._data is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
mgr = obj._data
|
||||
assert mgr is obj._mgr
|
||||
|
||||
|
||||
class TestNDFrame:
|
||||
# tests that don't fit elsewhere
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(range(10), dtype=np.float64),
|
||||
Series([str(i) for i in range(10)], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_squeeze_series_noop(self, ser):
|
||||
# noop
|
||||
tm.assert_series_equal(ser.squeeze(), ser)
|
||||
|
||||
def test_squeeze_frame_noop(self):
|
||||
# noop
|
||||
df = DataFrame(np.eye(2))
|
||||
tm.assert_frame_equal(df.squeeze(), df)
|
||||
|
||||
def test_squeeze_frame_reindex(self):
|
||||
# squeezing
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
).reindex(columns=["A"])
|
||||
tm.assert_series_equal(df.squeeze(), df["A"])
|
||||
|
||||
def test_squeeze_0_len_dim(self):
|
||||
# don't fail with 0 length dimensions GH11229 & GH8999
|
||||
empty_series = Series([], name="five", dtype=np.float64)
|
||||
empty_frame = DataFrame([empty_series])
|
||||
tm.assert_series_equal(empty_series, empty_series.squeeze())
|
||||
tm.assert_series_equal(empty_series, empty_frame.squeeze())
|
||||
|
||||
def test_squeeze_axis(self):
|
||||
# axis argument
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((1, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=1, freq="B"),
|
||||
).iloc[:, :1]
|
||||
assert df.shape == (1, 1)
|
||||
tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
|
||||
tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
|
||||
tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
|
||||
tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
|
||||
assert df.squeeze() == df.iloc[0, 0]
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.squeeze(axis=2)
|
||||
msg = "No axis named x for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.squeeze(axis="x")
|
||||
|
||||
def test_squeeze_axis_len_3(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=3, freq="B"),
|
||||
)
|
||||
tm.assert_frame_equal(df.squeeze(axis=0), df)
|
||||
|
||||
def test_numpy_squeeze(self):
|
||||
s = Series(range(2), dtype=np.float64)
|
||||
tm.assert_series_equal(np.squeeze(s), s)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
).reindex(columns=["A"])
|
||||
tm.assert_series_equal(np.squeeze(df), df["A"])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(range(10), dtype=np.float64),
|
||||
Series([str(i) for i in range(10)], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_transpose_series(self, ser):
|
||||
# calls implementation in pandas/core/base.py
|
||||
tm.assert_series_equal(ser.transpose(), ser)
|
||||
|
||||
def test_transpose_frame(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
tm.assert_frame_equal(df.transpose().transpose(), df)
|
||||
|
||||
def test_numpy_transpose(self, frame_or_series):
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
if frame_or_series is Series:
|
||||
# 1D -> np.transpose is no-op
|
||||
tm.assert_series_equal(np.transpose(obj), obj)
|
||||
|
||||
# round-trip preserved
|
||||
tm.assert_equal(np.transpose(np.transpose(obj)), obj)
|
||||
|
||||
msg = "the 'axes' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.transpose(obj, axes=1)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(range(10), dtype=np.float64),
|
||||
Series([str(i) for i in range(10)], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_take_series(self, ser):
|
||||
indices = [1, 5, -2, 6, 3, -1]
|
||||
out = ser.take(indices)
|
||||
expected = Series(
|
||||
data=ser.values.take(indices),
|
||||
index=ser.index.take(indices),
|
||||
dtype=ser.dtype,
|
||||
)
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
def test_take_frame(self):
|
||||
indices = [1, 5, -2, 6, 3, -1]
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
out = df.take(indices)
|
||||
expected = DataFrame(
|
||||
data=df.values.take(indices, axis=0),
|
||||
index=df.index.take(indices),
|
||||
columns=df.columns,
|
||||
)
|
||||
tm.assert_frame_equal(out, expected)
|
||||
|
||||
def test_take_invalid_kwargs(self, frame_or_series):
|
||||
indices = [-3, 2, 0, 1]
|
||||
|
||||
obj = DataFrame(range(5))
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.take(indices, mode="clip")
|
||||
|
||||
def test_axis_classmethods(self, frame_or_series):
|
||||
box = frame_or_series
|
||||
obj = box(dtype=object)
|
||||
values = box._AXIS_TO_AXIS_NUMBER.keys()
|
||||
for v in values:
|
||||
assert obj._get_axis_number(v) == box._get_axis_number(v)
|
||||
assert obj._get_axis_name(v) == box._get_axis_name(v)
|
||||
assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
|
||||
|
||||
def test_flags_identity(self, frame_or_series):
|
||||
obj = Series([1, 2])
|
||||
if frame_or_series is DataFrame:
|
||||
obj = obj.to_frame()
|
||||
|
||||
assert obj.flags is obj.flags
|
||||
obj2 = obj.copy()
|
||||
assert obj2.flags is not obj.flags
|
||||
|
||||
def test_bool_dep(self) -> None:
|
||||
# GH-51749
|
||||
msg_warn = (
|
||||
"DataFrame.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
DataFrame({"col": [False]}).bool()
|
@ -0,0 +1,336 @@
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.missing import array_equivalent
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# Fixtures
|
||||
# ========
|
||||
@pytest.fixture
|
||||
def df():
|
||||
"""DataFrame with columns 'L1', 'L2', and 'L3'"""
|
||||
return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]})
|
||||
|
||||
|
||||
@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]])
|
||||
def df_levels(request, df):
|
||||
"""DataFrame with columns or index levels 'L1', 'L2', and 'L3'"""
|
||||
levels = request.param
|
||||
|
||||
if levels:
|
||||
df = df.set_index(levels)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_ambig(df):
|
||||
"""DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3'"""
|
||||
df = df.set_index(["L1", "L2"])
|
||||
|
||||
df["L1"] = df["L3"]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_duplabels(df):
|
||||
"""DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'"""
|
||||
df = df.set_index(["L1"])
|
||||
df = pd.concat([df, df["L2"]], axis=1)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# Test is label/level reference
|
||||
# =============================
|
||||
def get_labels_levels(df_levels):
|
||||
expected_labels = list(df_levels.columns)
|
||||
expected_levels = [name for name in df_levels.index.names if name is not None]
|
||||
return expected_labels, expected_levels
|
||||
|
||||
|
||||
def assert_label_reference(frame, labels, axis):
|
||||
for label in labels:
|
||||
assert frame._is_label_reference(label, axis=axis)
|
||||
assert not frame._is_level_reference(label, axis=axis)
|
||||
assert frame._is_label_or_level_reference(label, axis=axis)
|
||||
|
||||
|
||||
def assert_level_reference(frame, levels, axis):
|
||||
for level in levels:
|
||||
assert frame._is_level_reference(level, axis=axis)
|
||||
assert not frame._is_label_reference(level, axis=axis)
|
||||
assert frame._is_label_or_level_reference(level, axis=axis)
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_is_level_or_label_reference_df_simple(df_levels, axis):
|
||||
axis = df_levels._get_axis_number(axis)
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_level_reference(df_levels, expected_levels, axis=axis)
|
||||
assert_label_reference(df_levels, expected_labels, axis=axis)
|
||||
|
||||
|
||||
def test_is_level_reference_df_ambig(df_ambig, axis):
|
||||
axis = df_ambig._get_axis_number(axis)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
# df has both an on-axis level and off-axis label named L1
|
||||
# Therefore L1 should reference the label, not the level
|
||||
assert_label_reference(df_ambig, ["L1"], axis=axis)
|
||||
|
||||
# df has an on-axis level named L2 and it is not ambiguous
|
||||
# Therefore L2 is an level reference
|
||||
assert_level_reference(df_ambig, ["L2"], axis=axis)
|
||||
|
||||
# df has a column named L3 and it not an level reference
|
||||
assert_label_reference(df_ambig, ["L3"], axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_is_level_reference_series_simple_axis0(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_level_reference(s, ["L1"], axis=0)
|
||||
assert not s._is_level_reference("L2")
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_level_reference(s, ["L1", "L2"], axis=0)
|
||||
assert not s._is_level_reference("L3")
|
||||
|
||||
|
||||
def test_is_level_reference_series_axis1_error(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._is_level_reference("L1", axis=1)
|
||||
|
||||
|
||||
# Test _check_label_or_level_ambiguity_df
|
||||
# =======================================
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_check_label_or_level_ambiguity_df(df_ambig, axis):
|
||||
axis = df_ambig._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_ambig = df_ambig.T
|
||||
msg = "'L1' is both a column level and an index label"
|
||||
|
||||
else:
|
||||
msg = "'L1' is both an index level and a column label"
|
||||
# df_ambig has both an on-axis level and off-axis label named L1
|
||||
# Therefore, L1 is ambiguous.
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df_ambig._check_label_or_level_ambiguity("L1", axis=axis)
|
||||
|
||||
# df_ambig has an on-axis level named L2,, and it is not ambiguous.
|
||||
df_ambig._check_label_or_level_ambiguity("L2", axis=axis)
|
||||
|
||||
# df_ambig has an off-axis label named L3, and it is not ambiguous
|
||||
assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_check_label_or_level_ambiguity_series(df):
|
||||
# A series has no columns and therefore references are never ambiguous
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
s._check_label_or_level_ambiguity("L1", axis=0)
|
||||
s._check_label_or_level_ambiguity("L2", axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
s._check_label_or_level_ambiguity("L1", axis=0)
|
||||
s._check_label_or_level_ambiguity("L2", axis=0)
|
||||
s._check_label_or_level_ambiguity("L3", axis=0)
|
||||
|
||||
|
||||
def test_check_label_or_level_ambiguity_series_axis1_error(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._check_label_or_level_ambiguity("L1", axis=1)
|
||||
|
||||
|
||||
# Test _get_label_or_level_values
|
||||
# ===============================
|
||||
def assert_label_values(frame, labels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for label in labels:
|
||||
if axis == 0:
|
||||
expected = frame[label]._values
|
||||
else:
|
||||
expected = frame.loc[label]._values
|
||||
|
||||
result = frame._get_label_or_level_values(label, axis=axis)
|
||||
assert array_equivalent(expected, result)
|
||||
|
||||
|
||||
def assert_level_values(frame, levels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for level in levels:
|
||||
if axis == 0:
|
||||
expected = frame.index.get_level_values(level=level)._values
|
||||
else:
|
||||
expected = frame.columns.get_level_values(level=level)._values
|
||||
|
||||
result = frame._get_label_or_level_values(level, axis=axis)
|
||||
assert array_equivalent(expected, result)
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_get_label_or_level_values_df_simple(df_levels, axis):
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
axis = df_levels._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_label_values(df_levels, expected_labels, axis=axis)
|
||||
assert_level_values(df_levels, expected_levels, axis=axis)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_df_ambig(df_ambig, axis):
|
||||
axis = df_ambig._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
# df has an on-axis level named L2, and it is not ambiguous.
|
||||
assert_level_values(df_ambig, ["L2"], axis=axis)
|
||||
|
||||
# df has an off-axis label named L3, and it is not ambiguous.
|
||||
assert_label_values(df_ambig, ["L3"], axis=axis)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_df_duplabels(df_duplabels, axis):
|
||||
axis = df_duplabels._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_duplabels = df_duplabels.T
|
||||
|
||||
# df has unambiguous level 'L1'
|
||||
assert_level_values(df_duplabels, ["L1"], axis=axis)
|
||||
|
||||
# df has unique label 'L3'
|
||||
assert_label_values(df_duplabels, ["L3"], axis=axis)
|
||||
|
||||
# df has duplicate labels 'L2'
|
||||
if axis == 0:
|
||||
expected_msg = "The column label 'L2' is not unique"
|
||||
else:
|
||||
expected_msg = "The index label 'L2' is not unique"
|
||||
|
||||
with pytest.raises(ValueError, match=expected_msg):
|
||||
assert_label_values(df_duplabels, ["L2"], axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_get_label_or_level_values_series_axis0(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_level_values(s, ["L1"], axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_level_values(s, ["L1", "L2"], axis=0)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_series_axis1_error(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._get_label_or_level_values("L1", axis=1)
|
||||
|
||||
|
||||
# Test _drop_labels_or_levels
|
||||
# ===========================
|
||||
def assert_labels_dropped(frame, labels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for label in labels:
|
||||
df_dropped = frame._drop_labels_or_levels(label, axis=axis)
|
||||
|
||||
if axis == 0:
|
||||
assert label in frame.columns
|
||||
assert label not in df_dropped.columns
|
||||
else:
|
||||
assert label in frame.index
|
||||
assert label not in df_dropped.index
|
||||
|
||||
|
||||
def assert_levels_dropped(frame, levels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for level in levels:
|
||||
df_dropped = frame._drop_labels_or_levels(level, axis=axis)
|
||||
|
||||
if axis == 0:
|
||||
assert level in frame.index.names
|
||||
assert level not in df_dropped.index.names
|
||||
else:
|
||||
assert level in frame.columns.names
|
||||
assert level not in df_dropped.columns.names
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_drop_labels_or_levels_df(df_levels, axis):
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
axis = df_levels._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_labels_dropped(df_levels, expected_labels, axis=axis)
|
||||
assert_levels_dropped(df_levels, expected_levels, axis=axis)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
df_levels._drop_labels_or_levels("L4", axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_drop_labels_or_levels_series(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_levels_dropped(s, ["L1"], axis=0)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
s._drop_labels_or_levels("L4", axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_levels_dropped(s, ["L1", "L2"], axis=0)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
s._drop_labels_or_levels("L4", axis=0)
|
@ -0,0 +1,159 @@
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeries:
|
||||
@pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"])
|
||||
def test_set_axis_name_mi(self, func):
|
||||
ser = Series(
|
||||
[11, 21, 31],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"]
|
||||
),
|
||||
)
|
||||
|
||||
result = methodcaller(func, ["L1", "L2"])(ser)
|
||||
assert ser.index.name is None
|
||||
assert ser.index.names == ["l1", "l2"]
|
||||
assert result.index.name is None
|
||||
assert result.index.names, ["L1", "L2"]
|
||||
|
||||
def test_set_axis_name_raises(self):
|
||||
ser = Series([1])
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser._set_axis_name(name="a", axis=1)
|
||||
|
||||
def test_get_bool_data_preserve_dtype(self):
|
||||
ser = Series([True, False, True])
|
||||
result = ser._get_bool_data()
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
def test_nonzero_single_element(self):
|
||||
# allow single item via bool method
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
ser = Series([True])
|
||||
ser1 = Series([False])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert ser.bool()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert not ser1.bool()
|
||||
|
||||
@pytest.mark.parametrize("data", [np.nan, pd.NaT, True, False])
|
||||
def test_nonzero_single_element_raise_1(self, data):
|
||||
# single item nan to raise
|
||||
series = Series([data])
|
||||
|
||||
msg = "The truth value of a Series is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(series)
|
||||
|
||||
@pytest.mark.parametrize("data", [np.nan, pd.NaT])
|
||||
def test_nonzero_single_element_raise_2(self, data):
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
msg_err = "bool cannot act on a non-boolean single element Series"
|
||||
series = Series([data])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
series.bool()
|
||||
|
||||
@pytest.mark.parametrize("data", [(True, True), (False, False)])
|
||||
def test_nonzero_multiple_element_raise(self, data):
|
||||
# multiple bool are still an error
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
msg_err = "The truth value of a Series is ambiguous"
|
||||
series = Series([data])
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
bool(series)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
series.bool()
|
||||
|
||||
@pytest.mark.parametrize("data", [1, 0, "a", 0.0])
|
||||
def test_nonbool_single_element_raise(self, data):
|
||||
# single non-bool are an error
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
msg_err1 = "The truth value of a Series is ambiguous"
|
||||
msg_err2 = "bool cannot act on a non-boolean single element Series"
|
||||
series = Series([data])
|
||||
with pytest.raises(ValueError, match=msg_err1):
|
||||
bool(series)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err2):
|
||||
series.bool()
|
||||
|
||||
def test_metadata_propagation_indiv_resample(self):
|
||||
# resample
|
||||
ts = Series(
|
||||
np.random.default_rng(2).random(1000),
|
||||
index=date_range("20130101", periods=1000, freq="s"),
|
||||
name="foo",
|
||||
)
|
||||
result = ts.resample("1min").mean()
|
||||
tm.assert_metadata_equivalent(ts, result)
|
||||
|
||||
result = ts.resample("1min").min()
|
||||
tm.assert_metadata_equivalent(ts, result)
|
||||
|
||||
result = ts.resample("1min").apply(lambda x: x.sum())
|
||||
tm.assert_metadata_equivalent(ts, result)
|
||||
|
||||
def test_metadata_propagation_indiv(self, monkeypatch):
|
||||
# check that the metadata matches up on the resulting ops
|
||||
|
||||
ser = Series(range(3), range(3))
|
||||
ser.name = "foo"
|
||||
ser2 = Series(range(3), range(3))
|
||||
ser2.name = "bar"
|
||||
|
||||
result = ser.T
|
||||
tm.assert_metadata_equivalent(ser, result)
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
for name in self._metadata:
|
||||
if method == "concat" and name == "filename":
|
||||
value = "+".join(
|
||||
[
|
||||
getattr(obj, name)
|
||||
for obj in other.objs
|
||||
if getattr(obj, name, None)
|
||||
]
|
||||
)
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, None))
|
||||
|
||||
return self
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(Series, "_metadata", ["name", "filename"])
|
||||
m.setattr(Series, "__finalize__", finalize)
|
||||
|
||||
ser.filename = "foo"
|
||||
ser2.filename = "bar"
|
||||
|
||||
result = pd.concat([ser, ser2])
|
||||
assert result.filename == "foo+bar"
|
||||
assert result.name is None
|
@ -0,0 +1,130 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytest.importorskip("xarray")
|
||||
|
||||
|
||||
class TestDataFrameToXArray:
|
||||
@pytest.fixture
|
||||
def df(self):
|
||||
return DataFrame(
|
||||
{
|
||||
"a": list("abcd"),
|
||||
"b": list(range(1, 5)),
|
||||
"c": np.arange(3, 7).astype("u1"),
|
||||
"d": np.arange(4.0, 8.0, dtype="float64"),
|
||||
"e": [True, False, True, False],
|
||||
"f": Categorical(list("abcd")),
|
||||
"g": date_range("20130101", periods=4),
|
||||
"h": date_range("20130101", periods=4, tz="US/Eastern"),
|
||||
}
|
||||
)
|
||||
|
||||
def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
|
||||
index = index_flat
|
||||
# MultiIndex is tested in test_to_xarray_with_multiindex
|
||||
if len(index) == 0:
|
||||
pytest.skip("Test doesn't make sense for empty index")
|
||||
|
||||
from xarray import Dataset
|
||||
|
||||
df.index = index[:4]
|
||||
df.index.name = "foo"
|
||||
df.columns.name = "bar"
|
||||
result = df.to_xarray()
|
||||
assert result.sizes["foo"] == 4
|
||||
assert len(result.coords) == 1
|
||||
assert len(result.data_vars) == 8
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
# idempotency
|
||||
# datetimes w/tz are preserved
|
||||
# column names are lost
|
||||
expected = df.copy()
|
||||
expected["f"] = expected["f"].astype(
|
||||
object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
expected.columns.name = None
|
||||
tm.assert_frame_equal(result.to_dataframe(), expected)
|
||||
|
||||
def test_to_xarray_empty(self, df):
|
||||
from xarray import Dataset
|
||||
|
||||
df.index.name = "foo"
|
||||
result = df[0:0].to_xarray()
|
||||
assert result.sizes["foo"] == 0
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
def test_to_xarray_with_multiindex(self, df, using_infer_string):
|
||||
from xarray import Dataset
|
||||
|
||||
# MultiIndex
|
||||
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
|
||||
result = df.to_xarray()
|
||||
assert result.sizes["one"] == 1
|
||||
assert result.sizes["two"] == 4
|
||||
assert len(result.coords) == 2
|
||||
assert len(result.data_vars) == 8
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
result = result.to_dataframe()
|
||||
expected = df.copy()
|
||||
expected["f"] = expected["f"].astype(
|
||||
object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
expected.columns.name = None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesToXArray:
|
||||
def test_to_xarray_index_types(self, index_flat):
|
||||
index = index_flat
|
||||
# MultiIndex is tested in test_to_xarray_with_multiindex
|
||||
|
||||
from xarray import DataArray
|
||||
|
||||
ser = Series(range(len(index)), index=index, dtype="int64")
|
||||
ser.index.name = "foo"
|
||||
result = ser.to_xarray()
|
||||
repr(result)
|
||||
assert len(result) == len(index)
|
||||
assert len(result.coords) == 1
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, DataArray)
|
||||
|
||||
# idempotency
|
||||
tm.assert_series_equal(result.to_series(), ser)
|
||||
|
||||
def test_to_xarray_empty(self):
|
||||
from xarray import DataArray
|
||||
|
||||
ser = Series([], dtype=object)
|
||||
ser.index.name = "foo"
|
||||
result = ser.to_xarray()
|
||||
assert len(result) == 0
|
||||
assert len(result.coords) == 1
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, DataArray)
|
||||
|
||||
def test_to_xarray_with_multiindex(self):
|
||||
from xarray import DataArray
|
||||
|
||||
mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"])
|
||||
ser = Series(range(6), dtype="int64", index=mi)
|
||||
result = ser.to_xarray()
|
||||
assert len(result) == 2
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
|
||||
assert isinstance(result, DataArray)
|
||||
res = result.to_series()
|
||||
tm.assert_series_equal(res, ser)
|
Reference in New Issue
Block a user