forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,199 @@
|
||||
"""
|
||||
Tests for values coercion in setitem-like operations on DataFrame.
|
||||
|
||||
For the most part, these should be multi-column DataFrames, otherwise
|
||||
we would share the tests with Series.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameSetitemCoercion:
|
||||
@pytest.mark.parametrize("consolidate", [True, False])
|
||||
def test_loc_setitem_multiindex_columns(self, consolidate):
|
||||
# GH#18415 Setting values in a single column preserves dtype,
|
||||
# while setting them in multiple columns did unwanted cast.
|
||||
|
||||
# Note that A here has 2 blocks, below we do the same thing
|
||||
# with a consolidated frame.
|
||||
A = DataFrame(np.zeros((6, 5), dtype=np.float32))
|
||||
A = pd.concat([A, A], axis=1, keys=[1, 2])
|
||||
if consolidate:
|
||||
A = A._consolidate()
|
||||
|
||||
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
|
||||
assert (A.dtypes == np.float32).all()
|
||||
|
||||
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
|
||||
|
||||
assert (A.dtypes == np.float32).all()
|
||||
|
||||
A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
|
||||
assert (A.dtypes == np.float32).all()
|
||||
|
||||
# TODO: i think this isn't about MultiIndex and could be done with iloc?
|
||||
|
||||
|
||||
def test_37477():
|
||||
# fixed by GH#45121
|
||||
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
|
||||
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.at[1, "B"] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.loc[1, "B"] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.iat[1, 1] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.iloc[1, 1] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_6942(indexer_al):
|
||||
# check that the .at __setitem__ after setting "Live" actually sets the data
|
||||
start = Timestamp("2014-04-01")
|
||||
t1 = Timestamp("2014-04-23 12:42:38.883082")
|
||||
t2 = Timestamp("2014-04-24 01:33:30.040039")
|
||||
|
||||
dti = date_range(start, periods=1)
|
||||
orig = DataFrame(index=dti, columns=["timenow", "Live"])
|
||||
|
||||
df = orig.copy()
|
||||
indexer_al(df)[start, "timenow"] = t1
|
||||
|
||||
df["Live"] = True
|
||||
|
||||
df.at[start, "timenow"] = t2
|
||||
assert df.iloc[0, 0] == t2
|
||||
|
||||
|
||||
def test_26395(indexer_al):
|
||||
# .at case fixed by GH#45121 (best guess)
|
||||
df = DataFrame(index=["A", "B", "C"])
|
||||
df["D"] = 0
|
||||
|
||||
indexer_al(df)["C", "D"] = 2
|
||||
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
indexer_al(df)["C", "D"] = 44.5
|
||||
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
indexer_al(df)["C", "D"] = "hello"
|
||||
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="unwanted upcast")
|
||||
def test_15231():
|
||||
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
|
||||
df.loc[2] = Series({"a": 5, "b": 6})
|
||||
assert (df.dtypes == np.int64).all()
|
||||
|
||||
df.loc[3] = Series({"a": 7})
|
||||
|
||||
# df["a"] doesn't have any NaNs, should not have been cast
|
||||
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
||||
|
||||
|
||||
def test_iloc_setitem_unnecesssary_float_upcasting():
|
||||
# GH#12255
|
||||
df = DataFrame(
|
||||
{
|
||||
0: np.array([1, 3], dtype=np.float32),
|
||||
1: np.array([2, 4], dtype=np.float32),
|
||||
2: ["a", "b"],
|
||||
}
|
||||
)
|
||||
orig = df.copy()
|
||||
|
||||
values = df[0].values.reshape(2, 1)
|
||||
df.iloc[:, 0:1] = values
|
||||
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="unwanted casting to dt64")
|
||||
def test_12499():
|
||||
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
|
||||
# which has consequences for the expected df["two"] (though i think at
|
||||
# the time it might not have because of a separate bug). See if it makes
|
||||
# a difference which one we use here.
|
||||
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
|
||||
|
||||
data = [{"one": 0, "two": ts}]
|
||||
orig = DataFrame(data)
|
||||
df = orig.copy()
|
||||
df.loc[1] = [np.nan, NaT]
|
||||
|
||||
expected = DataFrame(
|
||||
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
data = [{"one": 0, "two": ts}]
|
||||
df = orig.copy()
|
||||
df.loc[1, :] = [np.nan, NaT]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_20476():
|
||||
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
|
||||
df = DataFrame(-1, index=range(3), columns=mi)
|
||||
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
|
||||
df["A"] = filler
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: [1, 1, 1],
|
||||
1: [2, 2, 2],
|
||||
2: [3.0, 3.0, 3.0],
|
||||
3: [-1, -1, -1],
|
||||
4: [-1, -1, -1],
|
||||
5: [-1, -1, -1],
|
||||
}
|
||||
)
|
||||
expected.columns = mi
|
||||
exp_dtypes = Series(
|
||||
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
|
||||
index=mi,
|
||||
)
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
@ -0,0 +1,60 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestDataFrameDelItem:
|
||||
def test_delitem(self, float_frame):
|
||||
del float_frame["A"]
|
||||
assert "A" not in float_frame
|
||||
|
||||
def test_delitem_multiindex(self):
|
||||
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), columns=midx)
|
||||
assert len(df.columns) == 4
|
||||
assert ("A",) in df.columns
|
||||
assert "A" in df.columns
|
||||
|
||||
result = df["A"]
|
||||
assert isinstance(result, DataFrame)
|
||||
del df["A"]
|
||||
|
||||
assert len(df.columns) == 2
|
||||
|
||||
# A still in the levels, BUT get a KeyError if trying
|
||||
# to delete
|
||||
assert ("A",) not in df.columns
|
||||
with pytest.raises(KeyError, match=re.escape("('A',)")):
|
||||
del df[("A",)]
|
||||
|
||||
# behavior of dropped/deleted MultiIndex levels changed from
|
||||
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
|
||||
# levels which are dropped/deleted
|
||||
assert "A" not in df.columns
|
||||
with pytest.raises(KeyError, match=re.escape("('A',)")):
|
||||
del df["A"]
|
||||
|
||||
def test_delitem_corner(self, float_frame):
|
||||
f = float_frame.copy()
|
||||
del f["D"]
|
||||
assert len(f.columns) == 3
|
||||
with pytest.raises(KeyError, match=r"^'D'$"):
|
||||
del f["D"]
|
||||
del f["B"]
|
||||
assert len(f.columns) == 2
|
||||
|
||||
def test_delitem_col_still_multiindex(self):
|
||||
arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((3, 4)), columns=index)
|
||||
del df[("a", "", "")]
|
||||
assert isinstance(df.columns, MultiIndex)
|
@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGet:
|
||||
def test_get(self, float_frame):
|
||||
b = float_frame.get("B")
|
||||
tm.assert_series_equal(b, float_frame["B"])
|
||||
|
||||
assert float_frame.get("foo") is None
|
||||
tm.assert_series_equal(
|
||||
float_frame.get("foo", float_frame["B"]), float_frame["B"]
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df",
|
||||
[
|
||||
DataFrame(),
|
||||
DataFrame(columns=list("AB")),
|
||||
DataFrame(columns=list("AB"), index=range(3)),
|
||||
],
|
||||
)
|
||||
def test_get_none(self, df):
|
||||
# see gh-5652
|
||||
assert df.get(None) is None
|
@ -0,0 +1,22 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestGetValue:
|
||||
def test_get_set_value_no_partial_indexing(self):
|
||||
# partial w/ MultiIndex raise exception
|
||||
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
|
||||
df = DataFrame(index=index, columns=range(4))
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
df._get_value(0, 1)
|
||||
|
||||
def test_get_value(self, float_frame):
|
||||
for idx in float_frame.index:
|
||||
for col in float_frame.columns:
|
||||
result = float_frame._get_value(idx, col)
|
||||
expected = float_frame[col][idx]
|
||||
assert result == expected
|
@ -0,0 +1,472 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
DateOffset,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
get_dummies,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import SparseArray
|
||||
|
||||
|
||||
class TestGetitem:
|
||||
def test_getitem_unused_level_raises(self):
|
||||
# GH#20410
|
||||
mi = MultiIndex(
|
||||
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
|
||||
codes=[[1, 0], [1, 0]],
|
||||
)
|
||||
df = DataFrame(-1, index=range(3), columns=mi)
|
||||
|
||||
with pytest.raises(KeyError, match="notevenone"):
|
||||
df["notevenone"]
|
||||
|
||||
def test_getitem_periodindex(self):
|
||||
rng = period_range("1/1/2000", periods=5)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)), columns=rng)
|
||||
|
||||
ts = df[rng[0]]
|
||||
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||||
|
||||
ts = df["1/1/2000"]
|
||||
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||||
|
||||
def test_getitem_list_of_labels_categoricalindex_cols(self):
|
||||
# GH#16115
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
expected = DataFrame([[1, 0], [0, 1]], dtype="bool", index=[0, 1], columns=cats)
|
||||
dummies = get_dummies(cats)
|
||||
result = dummies[list(dummies.columns)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_sparse_column_return_type_and_dtype(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23559
|
||||
data = SparseArray([0, 1])
|
||||
df = DataFrame({"A": data})
|
||||
expected = Series(data, name="A")
|
||||
result = df["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Also check iloc and loc while we're here
|
||||
result = df.iloc[:, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[:, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_string_columns(self):
|
||||
# GH#46185
|
||||
df = DataFrame([[1, 2]], columns=Index(["A", "B"], dtype="string"))
|
||||
result = df.A
|
||||
expected = df["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemListLike:
|
||||
def test_getitem_list_missing_key(self):
|
||||
# GH#13822, incorrect error string with non-unique columns when missing
|
||||
# column is accessed
|
||||
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
|
||||
df.columns = ["x", "x", "z"]
|
||||
|
||||
# Check that we get the correct value in the KeyError
|
||||
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
|
||||
df[["x", "y", "z"]]
|
||||
|
||||
def test_getitem_list_duplicates(self):
|
||||
# GH#1943
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), columns=list("AABC")
|
||||
)
|
||||
df.columns.name = "foo"
|
||||
|
||||
result = df[["B", "C"]]
|
||||
assert result.columns.name == "foo"
|
||||
|
||||
expected = df.iloc[:, 2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_dupe_cols(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
|
||||
msg = "\"None of [Index(['baf'], dtype="
|
||||
with pytest.raises(KeyError, match=re.escape(msg)):
|
||||
df[["baf"]]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_type",
|
||||
[
|
||||
list,
|
||||
iter,
|
||||
Index,
|
||||
set,
|
||||
lambda keys: dict(zip(keys, range(len(keys)))),
|
||||
lambda keys: dict(zip(keys, range(len(keys)))).keys(),
|
||||
],
|
||||
ids=["list", "iter", "Index", "set", "dict", "dict_keys"],
|
||||
)
|
||||
@pytest.mark.parametrize("levels", [1, 2])
|
||||
def test_getitem_listlike(self, idx_type, levels, float_frame):
|
||||
# GH#21294
|
||||
|
||||
if levels == 1:
|
||||
frame, missing = float_frame, "food"
|
||||
else:
|
||||
# MultiIndex columns
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 3)),
|
||||
columns=Index(
|
||||
[("foo", "bar"), ("baz", "qux"), ("peek", "aboo")],
|
||||
name=("sth", "sth2"),
|
||||
),
|
||||
)
|
||||
missing = ("good", "food")
|
||||
|
||||
keys = [frame.columns[1], frame.columns[0]]
|
||||
idx = idx_type(keys)
|
||||
idx_check = list(idx_type(keys))
|
||||
|
||||
if isinstance(idx, (set, dict)):
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
frame[idx]
|
||||
|
||||
return
|
||||
else:
|
||||
result = frame[idx]
|
||||
|
||||
expected = frame.loc[:, idx_check]
|
||||
expected.columns.names = frame.columns.names
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx = idx_type(keys + [missing])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
frame[idx]
|
||||
|
||||
def test_getitem_iloc_generator(self):
|
||||
# GH#39614
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
indexer = (x for x in [1, 2])
|
||||
result = df.iloc[indexer]
|
||||
expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_iloc_two_dimensional_generator(self):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
indexer = (x for x in [1, 2])
|
||||
result = df.iloc[indexer, 1]
|
||||
expected = Series([5, 6], name="b", index=[1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_iloc_dateoffset_days(self):
|
||||
# GH 46671
|
||||
df = DataFrame(
|
||||
list(range(10)),
|
||||
index=date_range("01-01-2022", periods=10, freq=DateOffset(days=1)),
|
||||
)
|
||||
result = df.loc["2022-01-01":"2022-01-03"]
|
||||
expected = DataFrame(
|
||||
[0, 1, 2],
|
||||
index=DatetimeIndex(
|
||||
["2022-01-01", "2022-01-02", "2022-01-03"],
|
||||
dtype="datetime64[ns]",
|
||||
freq=DateOffset(days=1),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
list(range(10)),
|
||||
index=date_range(
|
||||
"01-01-2022", periods=10, freq=DateOffset(days=1, hours=2)
|
||||
),
|
||||
)
|
||||
result = df.loc["2022-01-01":"2022-01-03"]
|
||||
expected = DataFrame(
|
||||
[0, 1, 2],
|
||||
index=DatetimeIndex(
|
||||
["2022-01-01 00:00:00", "2022-01-02 02:00:00", "2022-01-03 04:00:00"],
|
||||
dtype="datetime64[ns]",
|
||||
freq=DateOffset(days=1, hours=2),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
list(range(10)),
|
||||
index=date_range("01-01-2022", periods=10, freq=DateOffset(minutes=3)),
|
||||
)
|
||||
result = df.loc["2022-01-01":"2022-01-03"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self, float_frame):
|
||||
# GH#12533
|
||||
result = float_frame[lambda x: "A"]
|
||||
expected = float_frame.loc[:, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = float_frame[lambda x: ["A", "B"]]
|
||||
expected = float_frame.loc[:, ["A", "B"]]
|
||||
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
|
||||
|
||||
df = float_frame[:3]
|
||||
result = df[lambda x: [True, False, True]]
|
||||
expected = float_frame.iloc[[0, 2], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_columns_one_level(self):
|
||||
# GH#29749
|
||||
df = DataFrame([[1, 2]], columns=[["a", "b"]])
|
||||
expected = DataFrame([1], columns=[["a"]])
|
||||
|
||||
result = df["a"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, "a"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_bool_mask_categorical_index(self):
|
||||
df3 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
[1, 1, 2, 1, 3, 2],
|
||||
dtype=CategoricalDtype([3, 2, 1], ordered=True),
|
||||
name="B",
|
||||
),
|
||||
)
|
||||
df4 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
[1, 1, 2, 1, 3, 2],
|
||||
dtype=CategoricalDtype([3, 2, 1], ordered=False),
|
||||
name="B",
|
||||
),
|
||||
)
|
||||
|
||||
result = df3[df3.index == "a"]
|
||||
expected = df3.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == "a"]
|
||||
expected = df4.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index == 1]
|
||||
expected = df3.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 1]
|
||||
expected = df4.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# since we have an ordered categorical
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=True,
|
||||
# name='B')
|
||||
result = df3[df3.index < 2]
|
||||
expected = df3.iloc[[4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index > 1]
|
||||
expected = df3.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# unordered
|
||||
# cannot be compared
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=False,
|
||||
# name='B')
|
||||
msg = "Unordered Categoricals can only compare equality or not"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index < 2]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index > 1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data1,data2,expected_data",
|
||||
(
|
||||
(
|
||||
[[1, 2], [3, 4]],
|
||||
[[0.5, 6], [7, 8]],
|
||||
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
|
||||
),
|
||||
(
|
||||
[[1, 2], [3, 4]],
|
||||
[[5, 6], [7, 8]],
|
||||
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
|
||||
self,
|
||||
data1,
|
||||
data2,
|
||||
expected_data,
|
||||
):
|
||||
# GH#31954
|
||||
|
||||
df1 = DataFrame(np.array(data1))
|
||||
df2 = DataFrame(np.array(data2))
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
result = df[df > 2]
|
||||
|
||||
exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
|
||||
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.fixture
|
||||
def df_dup_cols(self):
|
||||
dups = ["A", "A", "C", "D"]
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
|
||||
return df
|
||||
|
||||
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
|
||||
# `df.A > 6` is a DataFrame with a different shape from df
|
||||
|
||||
# boolean with the duplicate raises
|
||||
df = df_dup_cols
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df[df.A > 6]
|
||||
|
||||
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
|
||||
# boolean indexing
|
||||
# GH#4879
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
|
||||
)
|
||||
expected = df[df.C > 6]
|
||||
expected.columns = df_dup_cols.columns
|
||||
|
||||
df = df_dup_cols
|
||||
result = df[df.C > 6]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
|
||||
# where
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
|
||||
)
|
||||
# `df > 6` is a DataFrame with the same shape+alignment as df
|
||||
expected = df[df > 6]
|
||||
expected.columns = df_dup_cols.columns
|
||||
|
||||
df = df_dup_cols
|
||||
result = df[df > 6]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_empty_frame_with_boolean(self):
|
||||
# Test for issue GH#11859
|
||||
|
||||
df = DataFrame()
|
||||
df2 = df[df > 0]
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_getitem_returns_view_when_column_is_unique_in_df(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#45316
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
|
||||
df_orig = df.copy()
|
||||
view = df["b"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
view.loc[:] = 100
|
||||
if using_copy_on_write:
|
||||
expected = df_orig
|
||||
else:
|
||||
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_getitem_frozenset_unique_in_column(self):
|
||||
# GH#41062
|
||||
df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"])
|
||||
result = df[frozenset(["KEY"])]
|
||||
expected = Series([1], name=frozenset(["KEY"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemSlice:
|
||||
def test_getitem_slice_float64(self, frame_or_series):
|
||||
values = np.arange(10.0, 50.0, 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 3))
|
||||
if frame_or_series is not DataFrame:
|
||||
data = data[:, 0]
|
||||
|
||||
obj = frame_or_series(data, index=index)
|
||||
|
||||
result = obj[start:end]
|
||||
expected = obj.iloc[5:16]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[start:end]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_getitem_datetime_slice(self):
|
||||
# GH#43223
|
||||
df = DataFrame(
|
||||
{"a": 0},
|
||||
index=DatetimeIndex(
|
||||
[
|
||||
"11.01.2011 22:00",
|
||||
"11.01.2011 23:00",
|
||||
"12.01.2011 00:00",
|
||||
"2011-01-13 00:00",
|
||||
]
|
||||
),
|
||||
)
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
df["2011-01-01":"2011-11-01"]
|
||||
|
||||
def test_getitem_slice_same_dim_only_one_axis(self):
|
||||
# GH#54622
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 8)))
|
||||
result = df.iloc[(slice(None, None, 2),)]
|
||||
assert result.shape == (5, 8)
|
||||
expected = df.iloc[slice(None, None, 2), slice(None)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
|
||||
)
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
df[key]
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,120 @@
|
||||
"""
|
||||
test_insert is specifically for the DataFrame.insert method; not to be
|
||||
confused with tests with "insert" in their names that are really testing
|
||||
__setitem__.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameInsert:
|
||||
def test_insert(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 3)),
|
||||
index=np.arange(5),
|
||||
columns=["c", "b", "a"],
|
||||
)
|
||||
|
||||
df.insert(0, "foo", df["a"])
|
||||
tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
|
||||
tm.assert_series_equal(df["a"], df["foo"], check_names=False)
|
||||
|
||||
df.insert(2, "bar", df["c"])
|
||||
tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
|
||||
tm.assert_almost_equal(df["c"], df["bar"], check_names=False)
|
||||
|
||||
with pytest.raises(ValueError, match="already exists"):
|
||||
df.insert(1, "a", df["b"])
|
||||
|
||||
msg = "cannot insert c, already exists"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(1, "c", df["b"])
|
||||
|
||||
df.columns.name = "some_name"
|
||||
# preserve columns name field
|
||||
df.insert(0, "baz", df["c"])
|
||||
assert df.columns.name == "some_name"
|
||||
|
||||
def test_insert_column_bug_4032(self):
|
||||
# GH#4032, inserting a column and renaming causing errors
|
||||
df = DataFrame({"b": [1.1, 2.2]})
|
||||
|
||||
df = df.rename(columns={})
|
||||
df.insert(0, "a", [1, 2])
|
||||
result = df.rename(columns={})
|
||||
|
||||
expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df.insert(0, "c", [1.3, 2.3])
|
||||
result = df.rename(columns={})
|
||||
|
||||
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_insert_with_columns_dups(self):
|
||||
# GH#14291
|
||||
df = DataFrame()
|
||||
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
|
||||
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
|
||||
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
|
||||
exp = DataFrame(
|
||||
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
|
||||
)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_insert_item_cache(self, using_array_manager, using_copy_on_write):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
|
||||
ser = df[0]
|
||||
|
||||
if using_array_manager:
|
||||
expected_warning = None
|
||||
else:
|
||||
# with BlockManager warn about high fragmentation of single dtype
|
||||
expected_warning = PerformanceWarning
|
||||
|
||||
with tm.assert_produces_warning(expected_warning):
|
||||
for n in range(100):
|
||||
df[n + 3] = df[1] * n
|
||||
|
||||
if using_copy_on_write:
|
||||
ser.iloc[0] = 99
|
||||
assert df.iloc[0, 0] == df[0][0]
|
||||
assert df.iloc[0, 0] != 99
|
||||
else:
|
||||
ser.values[0] = 99
|
||||
assert df.iloc[0, 0] == df[0][0]
|
||||
assert df.iloc[0, 0] == 99
|
||||
|
||||
def test_insert_EA_no_warning(self):
|
||||
# PerformanceWarning about fragmented frame should not be raised when
|
||||
# using EAs (https://github.com/pandas-dev/pandas/issues/44098)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 100, size=(3, 100)), dtype="Int64"
|
||||
)
|
||||
with tm.assert_produces_warning(None):
|
||||
df["a"] = np.array([1, 2, 3])
|
||||
|
||||
def test_insert_frame(self):
|
||||
# GH#42403
|
||||
df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
||||
|
||||
msg = (
|
||||
"Expected a one-dimensional object, got a DataFrame with 2 columns instead."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(1, "newcol", df)
|
||||
|
||||
def test_insert_int64_loc(self):
|
||||
# GH#53193
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df.insert(np.int64(0), "b", 0)
|
||||
tm.assert_frame_equal(df, DataFrame({"b": [0, 0], "a": [1, 2]}))
|
@ -0,0 +1,152 @@
|
||||
"""
|
||||
Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Float64Dtype,
|
||||
Series,
|
||||
StringDtype,
|
||||
Timedelta,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameMask:
|
||||
def test_mask(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
|
||||
cond = df > 0
|
||||
|
||||
rs = df.where(cond, np.nan)
|
||||
tm.assert_frame_equal(rs, df.mask(df <= 0))
|
||||
tm.assert_frame_equal(rs, df.mask(~cond))
|
||||
|
||||
other = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
|
||||
rs = df.where(cond, other)
|
||||
tm.assert_frame_equal(rs, df.mask(df <= 0, other))
|
||||
tm.assert_frame_equal(rs, df.mask(~cond, other))
|
||||
|
||||
def test_mask2(self):
|
||||
# see GH#21891
|
||||
df = DataFrame([1, 2])
|
||||
res = df.mask([[True], [False]])
|
||||
|
||||
exp = DataFrame([np.nan, 2])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_mask_inplace(self):
|
||||
# GH#8801
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
|
||||
cond = df > 0
|
||||
|
||||
rdf = df.copy()
|
||||
|
||||
return_value = rdf.where(cond, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(rdf, df.where(cond))
|
||||
tm.assert_frame_equal(rdf, df.mask(~cond))
|
||||
|
||||
rdf = df.copy()
|
||||
return_value = rdf.where(cond, -df, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(rdf, df.where(cond, -df))
|
||||
tm.assert_frame_equal(rdf, df.mask(~cond, -df))
|
||||
|
||||
def test_mask_edge_case_1xN_frame(self):
|
||||
# GH#4071
|
||||
df = DataFrame([[1, 2]])
|
||||
res = df.mask(DataFrame([[True, False]]))
|
||||
expec = DataFrame([[np.nan, 2]])
|
||||
tm.assert_frame_equal(res, expec)
|
||||
|
||||
def test_mask_callable(self):
|
||||
# GH#12533
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
result = df.mask(lambda x: x > 4, lambda x: x + 1)
|
||||
exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
|
||||
|
||||
# return ndarray and scalar
|
||||
result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
|
||||
exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
|
||||
|
||||
# chain
|
||||
result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
|
||||
exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
|
||||
|
||||
def test_mask_dtype_bool_conversion(self):
|
||||
# GH#3733
|
||||
df = DataFrame(data=np.random.default_rng(2).standard_normal((100, 50)))
|
||||
df = df.where(df > 0) # create nans
|
||||
bools = df > 0
|
||||
mask = isna(df)
|
||||
expected = bools.astype(object).mask(mask)
|
||||
result = bools.mask(mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_stringdtype(frame_or_series):
|
||||
# GH 40824
|
||||
obj = DataFrame(
|
||||
{"A": ["foo", "bar", "baz", NA]},
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
filtered_obj = DataFrame(
|
||||
{"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"A": [NA, "this", "that", NA]},
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
if frame_or_series is Series:
|
||||
obj = obj["A"]
|
||||
filtered_obj = filtered_obj["A"]
|
||||
expected = expected["A"]
|
||||
|
||||
filter_ser = Series([False, True, True, False])
|
||||
result = obj.mask(filter_ser, filtered_obj)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_where_dtype_timedelta():
|
||||
# https://github.com/pandas-dev/pandas/issues/39548
|
||||
df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
|
||||
|
||||
expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
|
||||
tm.assert_frame_equal(df.mask(df.notna()), expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
|
||||
)
|
||||
tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
|
||||
|
||||
|
||||
def test_mask_return_dtype():
|
||||
# GH#50488
|
||||
ser = Series([0.0, 1.0, 2.0, 3.0], dtype=Float64Dtype())
|
||||
cond = ~ser.isna()
|
||||
other = Series([True, False, True, False])
|
||||
excepted = Series([1.0, 0.0, 1.0, 0.0], dtype=ser.dtype)
|
||||
result = ser.mask(cond, other)
|
||||
tm.assert_series_equal(result, excepted)
|
||||
|
||||
|
||||
def test_mask_inplace_no_other():
|
||||
# GH#51685
|
||||
df = DataFrame({"a": [1.0, 2.0], "b": ["x", "y"]})
|
||||
cond = DataFrame({"a": [True, False], "b": [False, True]})
|
||||
df.mask(cond, inplace=True)
|
||||
expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]})
|
||||
tm.assert_frame_equal(df, expected)
|
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_float_dtype
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSetValue:
|
||||
def test_set_value(self, float_frame):
|
||||
for idx in float_frame.index:
|
||||
for col in float_frame.columns:
|
||||
float_frame._set_value(idx, col, 1)
|
||||
assert float_frame[col][idx] == 1
|
||||
|
||||
def test_set_value_resize(self, float_frame, using_infer_string):
|
||||
res = float_frame._set_value("foobar", "B", 0)
|
||||
assert res is None
|
||||
assert float_frame.index[-1] == "foobar"
|
||||
assert float_frame._get_value("foobar", "B") == 0
|
||||
|
||||
float_frame.loc["foobar", "qux"] = 0
|
||||
assert float_frame._get_value("foobar", "qux") == 0
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", "sam")
|
||||
if using_infer_string:
|
||||
assert res["baz"].dtype == "string"
|
||||
else:
|
||||
assert res["baz"].dtype == np.object_
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", True)
|
||||
assert res["baz"].dtype == np.object_
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", 5)
|
||||
assert is_float_dtype(res["baz"])
|
||||
assert isna(res["baz"].drop(["foobar"])).all()
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
res._set_value("foobar", "baz", "sam")
|
||||
assert res.loc["foobar", "baz"] == "sam"
|
||||
|
||||
def test_set_value_with_index_dtype_change(self):
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
index=range(3),
|
||||
columns=list("ABC"),
|
||||
)
|
||||
|
||||
# this is actually ambiguous as the 2 is interpreted as a positional
|
||||
# so column is not created
|
||||
df = df_orig.copy()
|
||||
df._set_value("C", 2, 1.0)
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["C", 2] = 1.0
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||||
|
||||
# create both new
|
||||
df = df_orig.copy()
|
||||
df._set_value("C", "D", 1.0)
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
assert list(df.columns) == list(df_orig.columns) + ["D"]
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["C", "D"] = 1.0
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
assert list(df.columns) == list(df_orig.columns) + ["D"]
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,92 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameTake:
|
||||
def test_take_slices_deprecated(self, float_frame):
|
||||
# GH#51539
|
||||
df = float_frame
|
||||
|
||||
slc = slice(0, 4, 1)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.take(slc, axis=0)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.take(slc, axis=1)
|
||||
|
||||
def test_take(self, float_frame):
|
||||
# homogeneous
|
||||
order = [3, 1, 2, 0]
|
||||
for df in [float_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["D", "B", "C", "A"]]
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
# negative indices
|
||||
order = [2, 1, -1]
|
||||
for df in [float_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["C", "B", "D"]]
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
# illegal indices
|
||||
msg = "indices are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, 30], axis=0)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, -31], axis=0)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, 5], axis=1)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, -5], axis=1)
|
||||
|
||||
def test_take_mixed_type(self, float_string_frame):
|
||||
# mixed-dtype
|
||||
order = [4, 1, 2, 0, 3]
|
||||
for df in [float_string_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# negative indices
|
||||
order = [4, 1, -2]
|
||||
for df in [float_string_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["foo", "B", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
|
||||
# by dtype
|
||||
order = [1, 2, 0, 3]
|
||||
for df in [mixed_float_frame, mixed_int_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["B", "C", "A", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,444 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SettingWithCopyError
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def four_level_index_dataframe():
|
||||
arr = np.array(
|
||||
[
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
|
||||
]
|
||||
)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
|
||||
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
|
||||
names=["one", "two", "three", "four"],
|
||||
)
|
||||
return DataFrame(arr, index=index, columns=list("ABCDE"))
|
||||
|
||||
|
||||
class TestXS:
|
||||
def test_xs(
|
||||
self, float_frame, datetime_frame, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
float_frame_orig = float_frame.copy()
|
||||
idx = float_frame.index[5]
|
||||
xs = float_frame.xs(idx)
|
||||
for item, value in xs.items():
|
||||
if np.isnan(value):
|
||||
assert np.isnan(float_frame[item][idx])
|
||||
else:
|
||||
assert value == float_frame[item][idx]
|
||||
|
||||
# mixed-type xs
|
||||
test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
|
||||
frame = DataFrame(test_data)
|
||||
xs = frame.xs("1")
|
||||
assert xs.dtype == np.object_
|
||||
assert xs["A"] == 1
|
||||
assert xs["B"] == "1"
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')")
|
||||
):
|
||||
datetime_frame.xs(datetime_frame.index[0] - BDay())
|
||||
|
||||
# xs get column
|
||||
series = float_frame.xs("A", axis=1)
|
||||
expected = float_frame["A"]
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
# view is returned if possible
|
||||
series = float_frame.xs("A", axis=1)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
series[:] = 5
|
||||
if using_copy_on_write:
|
||||
# but with CoW the view shouldn't propagate mutations
|
||||
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
|
||||
assert not (expected == 5).all()
|
||||
else:
|
||||
assert (expected == 5).all()
|
||||
|
||||
def test_xs_corner(self):
|
||||
# pathological mixed-type reordering case
|
||||
df = DataFrame(index=[0])
|
||||
df["A"] = 1.0
|
||||
df["B"] = "foo"
|
||||
df["C"] = 2.0
|
||||
df["D"] = "bar"
|
||||
df["E"] = 3.0
|
||||
|
||||
xs = df.xs(0)
|
||||
exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0)
|
||||
tm.assert_series_equal(xs, exp)
|
||||
|
||||
# no columns but Index(dtype=object)
|
||||
df = DataFrame(index=["a", "b", "c"])
|
||||
result = df.xs("a")
|
||||
expected = Series([], name="a", dtype=np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_duplicates(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 2)),
|
||||
index=["b", "b", "c", "b", "a"],
|
||||
)
|
||||
|
||||
cross = df.xs("c")
|
||||
exp = df.iloc[2]
|
||||
tm.assert_series_equal(cross, exp)
|
||||
|
||||
def test_xs_keep_level(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"day": {0: "sat", 1: "sun"},
|
||||
"flavour": {0: "strawberry", 1: "strawberry"},
|
||||
"sales": {0: 10, 1: 12},
|
||||
"year": {0: 2008, 1: 2008},
|
||||
}
|
||||
).set_index(["year", "flavour", "day"])
|
||||
result = df.xs("sat", level="day", drop_level=False)
|
||||
expected = df[:1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_view(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# in 0.14 this will return a view if possible a copy otherwise, but
|
||||
# this is numpy dependent
|
||||
|
||||
dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
|
||||
df_orig = dm.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
dm.xs(2)[:] = 20
|
||||
tm.assert_frame_equal(dm, df_orig)
|
||||
elif using_array_manager:
|
||||
# INFO(ArrayManager) with ArrayManager getting a row as a view is
|
||||
# not possible
|
||||
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
dm.xs(2)[:] = 20
|
||||
assert not (dm.xs(2) == 20).any()
|
||||
else:
|
||||
with tm.raises_chained_assignment_error():
|
||||
dm.xs(2)[:] = 20
|
||||
assert (dm.xs(2) == 20).all()
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_doc_example(self):
|
||||
# TODO: more descriptive name
|
||||
# based on example in advanced.rst
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = list(zip(*arrays))
|
||||
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 8)),
|
||||
index=["A", "B", "C"],
|
||||
columns=index,
|
||||
)
|
||||
|
||||
result = df.xs(("one", "bar"), level=("second", "first"), axis=1)
|
||||
|
||||
expected = df.iloc[:, [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_integer_key(self):
|
||||
# see GH#2107
|
||||
dates = range(20111201, 20111205)
|
||||
ids = list("abcde")
|
||||
index = MultiIndex.from_product([dates, ids], names=["date", "secid"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), 3)),
|
||||
index,
|
||||
["X", "Y", "Z"],
|
||||
)
|
||||
|
||||
result = df.xs(20111201, level="date")
|
||||
expected = df.loc[20111201, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_level(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs("two", level="second")
|
||||
expected = df[df.index.get_level_values(1) == "two"]
|
||||
expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_level_eq_2(self):
|
||||
arr = np.random.default_rng(2).standard_normal((3, 5))
|
||||
index = MultiIndex(
|
||||
levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
|
||||
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
|
||||
)
|
||||
df = DataFrame(arr, index=index)
|
||||
expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
|
||||
result = df.xs("c", level=2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_setting_with_copy_error(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
):
|
||||
# this is a copy in 0.14
|
||||
df = multiindex_dataframe_random_data
|
||||
df_orig = df.copy()
|
||||
result = df.xs("two", level="second")
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
result[:] = 10
|
||||
else:
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
def test_xs_setting_with_copy_error_multiple(
|
||||
self, four_level_index_dataframe, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# this is a copy in 0.14
|
||||
df = four_level_index_dataframe
|
||||
df_orig = df.copy()
|
||||
result = df.xs(("a", 4), level=["one", "four"])
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
result[:] = 10
|
||||
else:
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
|
||||
def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data):
|
||||
# see GH#13719
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = concat([frame] * 2)
|
||||
assert df.index.is_unique is False
|
||||
expected = concat([frame.xs("one", level="second")] * 2)
|
||||
|
||||
if isinstance(key, list):
|
||||
result = df.xs(tuple(key), level=level)
|
||||
else:
|
||||
result = df.xs(key, level=level)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_missing_values_in_index(self):
|
||||
# see GH#6574
|
||||
# missing values in returned index should be preserved
|
||||
acc = [
|
||||
("a", "abcde", 1),
|
||||
("b", "bbcde", 2),
|
||||
("y", "yzcde", 25),
|
||||
("z", "xbcde", 24),
|
||||
("z", None, 26),
|
||||
("z", "zbcde", 25),
|
||||
("z", "ybcde", 26),
|
||||
]
|
||||
df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
|
||||
expected = DataFrame(
|
||||
{"cnt": [24, 26, 25, 26]},
|
||||
index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
|
||||
)
|
||||
|
||||
result = df.xs("z", level="a1")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"key, level, exp_arr, exp_index",
|
||||
[
|
||||
("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
|
||||
("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
|
||||
],
|
||||
)
|
||||
def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index):
|
||||
# see GH#2903
|
||||
arr = np.random.default_rng(2).standard_normal((4, 4))
|
||||
index = MultiIndex(
|
||||
levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(arr, columns=index)
|
||||
result = df.xs(key, level=level, axis=1)
|
||||
expected = DataFrame(exp_arr(arr), columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
lambda df: df.xs(("a", 4), level=["one", "four"]),
|
||||
lambda df: df.xs("a").xs(4, level="four"),
|
||||
],
|
||||
)
|
||||
def test_xs_level_multiple(self, indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list("ABCDE")
|
||||
)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
|
||||
)
|
||||
def test_xs_level0(self, indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
|
||||
codes=[[0, 1], [0, 1], [1, 0]],
|
||||
names=["two", "three", "four"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list("ABCDE")
|
||||
)
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_values(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two")).values
|
||||
expected = df.values[4]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_xs_loc_equality(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two"))
|
||||
expected = df.loc[("bar", "two")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_IndexSlice_argument_not_implemented(self, frame_or_series):
|
||||
# GH#35301
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
|
||||
obj = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
|
||||
if frame_or_series is Series:
|
||||
obj = obj[0]
|
||||
|
||||
expected = obj.iloc[-2:].droplevel(0)
|
||||
|
||||
result = obj.xs(IndexSlice[("foo", "qux", 0), :])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[IndexSlice[("foo", "qux", 0), :]]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_xs_levels_raises(self, frame_or_series):
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
if frame_or_series is Series:
|
||||
obj = obj["A"]
|
||||
|
||||
msg = "Index must be a MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.xs(0, level="as")
|
||||
|
||||
def test_xs_multiindex_droplevel_false(self):
|
||||
# GH#19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
df = DataFrame([[1, 2, 3]], columns=mi)
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
expected = DataFrame(
|
||||
[[1, 2]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_droplevel_false(self):
|
||||
# GH#19056
|
||||
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
expected = DataFrame({"a": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_droplevel_false_view(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#37832
|
||||
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
# check that result still views the same data as df
|
||||
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 2
|
||||
if using_copy_on_write:
|
||||
# with copy on write the subset is never modified
|
||||
expected = DataFrame({"a": [1]})
|
||||
else:
|
||||
# modifying original df also modifies result when having a single block
|
||||
expected = DataFrame({"a": [2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with mixed dataframe, modifying the parent doesn't modify result
|
||||
# TODO the "split" path behaves differently here as with single block
|
||||
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
df.iloc[0, 0] = 2
|
||||
if using_copy_on_write:
|
||||
# with copy on write the subset is never modified
|
||||
expected = DataFrame({"a": [1]})
|
||||
elif using_array_manager:
|
||||
# Here the behavior is consistent
|
||||
expected = DataFrame({"a": [2]})
|
||||
else:
|
||||
# FIXME: iloc does not update the array inplace using
|
||||
# "split" path
|
||||
expected = DataFrame({"a": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_list_indexer_droplevel_false(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")])
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
|
||||
with pytest.raises(KeyError, match="y"):
|
||||
df.xs(("x", "y"), drop_level=False, axis=1)
|
Reference in New Issue
Block a user