forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,63 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
concat,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import AxisInt
|
||||
|
||||
|
||||
def _check_mixed_float(df, dtype=None):
|
||||
# float16 are most likely to be upcasted to float32
|
||||
dtypes = {"A": "float32", "B": "float32", "C": "float16", "D": "float64"}
|
||||
if isinstance(dtype, str):
|
||||
dtypes = {k: dtype for k, v in dtypes.items()}
|
||||
elif isinstance(dtype, dict):
|
||||
dtypes.update(dtype)
|
||||
if dtypes.get("A"):
|
||||
assert df.dtypes["A"] == dtypes["A"]
|
||||
if dtypes.get("B"):
|
||||
assert df.dtypes["B"] == dtypes["B"]
|
||||
if dtypes.get("C"):
|
||||
assert df.dtypes["C"] == dtypes["C"]
|
||||
if dtypes.get("D"):
|
||||
assert df.dtypes["D"] == dtypes["D"]
|
||||
|
||||
|
||||
def _check_mixed_int(df, dtype=None):
|
||||
dtypes = {"A": "int32", "B": "uint64", "C": "uint8", "D": "int64"}
|
||||
if isinstance(dtype, str):
|
||||
dtypes = {k: dtype for k, v in dtypes.items()}
|
||||
elif isinstance(dtype, dict):
|
||||
dtypes.update(dtype)
|
||||
if dtypes.get("A"):
|
||||
assert df.dtypes["A"] == dtypes["A"]
|
||||
if dtypes.get("B"):
|
||||
assert df.dtypes["B"] == dtypes["B"]
|
||||
if dtypes.get("C"):
|
||||
assert df.dtypes["C"] == dtypes["C"]
|
||||
if dtypes.get("D"):
|
||||
assert df.dtypes["D"] == dtypes["D"]
|
||||
|
||||
|
||||
def zip_frames(frames: list[DataFrame], axis: AxisInt = 1) -> DataFrame:
|
||||
"""
|
||||
take a list of frames, zip them together under the
|
||||
assumption that these all have the first frames' index/columns.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_frame : DataFrame
|
||||
"""
|
||||
if axis == 1:
|
||||
columns = frames[0].columns
|
||||
zipped = [f.loc[:, c] for c in columns for f in frames]
|
||||
return concat(zipped, axis=1)
|
||||
else:
|
||||
index = frames[0].index
|
||||
zipped = [f.loc[i, :] for i in index for f in frames]
|
||||
return DataFrame(zipped)
|
100
venv/lib/python3.12/site-packages/pandas/tests/frame/conftest.py
Normal file
100
venv/lib/python3.12/site-packages/pandas/tests/frame/conftest.py
Normal file
@ -0,0 +1,100 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
NaT,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def datetime_frame() -> DataFrame:
|
||||
"""
|
||||
Fixture for DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']
|
||||
"""
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((100, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=100, freq="B"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_string_frame():
|
||||
"""
|
||||
Fixture for DataFrame of floats and strings with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D', 'foo'].
|
||||
"""
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((30, 4)),
|
||||
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
)
|
||||
df["foo"] = "bar"
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_float_frame():
|
||||
"""
|
||||
Fixture for DataFrame of different float types with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D'].
|
||||
"""
|
||||
df = DataFrame(
|
||||
{
|
||||
col: np.random.default_rng(2).random(30, dtype=dtype)
|
||||
for col, dtype in zip(
|
||||
list("ABCD"), ["float32", "float32", "float32", "float64"]
|
||||
)
|
||||
},
|
||||
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
# not supported by numpy random
|
||||
df["C"] = df["C"].astype("float16")
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_int_frame():
|
||||
"""
|
||||
Fixture for DataFrame of different int types with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D'].
|
||||
"""
|
||||
return DataFrame(
|
||||
{
|
||||
col: np.ones(30, dtype=dtype)
|
||||
for col, dtype in zip(list("ABCD"), ["int32", "uint64", "uint8", "int64"])
|
||||
},
|
||||
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def timezone_frame():
|
||||
"""
|
||||
Fixture for DataFrame of date_range Series with different time zones
|
||||
|
||||
Columns are ['A', 'B', 'C']; some entries are missing
|
||||
|
||||
A B C
|
||||
0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
|
||||
1 2013-01-02 NaT NaT
|
||||
2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
|
||||
"""
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": date_range("20130101", periods=3),
|
||||
"B": date_range("20130101", periods=3, tz="US/Eastern"),
|
||||
"C": date_range("20130101", periods=3, tz="CET"),
|
||||
}
|
||||
)
|
||||
df.iloc[1, 1] = NaT
|
||||
df.iloc[1, 2] = NaT
|
||||
return df
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,228 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFromDict:
|
||||
# Note: these tests are specific to the from_dict method, not for
|
||||
# passing dictionaries to DataFrame.__init__
|
||||
|
||||
def test_constructor_list_of_odicts(self):
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["d", 6]]),
|
||||
OrderedDict(),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
|
||||
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
|
||||
]
|
||||
|
||||
result = DataFrame(data)
|
||||
expected = DataFrame.from_dict(
|
||||
dict(zip(range(len(data)), data)), orient="index"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected.reindex(result.index))
|
||||
|
||||
def test_constructor_single_row(self):
|
||||
data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])]
|
||||
|
||||
result = DataFrame(data)
|
||||
expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex(
|
||||
result.index
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
using_pyarrow_string_dtype(), reason="columns inferring logic broken"
|
||||
)
|
||||
def test_constructor_list_of_series(self):
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
|
||||
]
|
||||
sdict = OrderedDict(zip(["x", "y"], data))
|
||||
idx = Index(["a", "b", "c"])
|
||||
|
||||
# all named
|
||||
data2 = [
|
||||
Series([1.5, 3, 4], idx, dtype="O", name="x"),
|
||||
Series([1.5, 3, 6], idx, name="y"),
|
||||
]
|
||||
result = DataFrame(data2)
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# some unnamed
|
||||
data2 = [
|
||||
Series([1.5, 3, 4], idx, dtype="O", name="x"),
|
||||
Series([1.5, 3, 6], idx),
|
||||
]
|
||||
result = DataFrame(data2)
|
||||
|
||||
sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# none named
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["d", 6]]),
|
||||
OrderedDict(),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
|
||||
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
|
||||
]
|
||||
data = [Series(d) for d in data]
|
||||
|
||||
result = DataFrame(data)
|
||||
sdict = OrderedDict(zip(range(len(data)), data))
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected.reindex(result.index))
|
||||
|
||||
result2 = DataFrame(data, index=np.arange(6, dtype=np.int64))
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = DataFrame([Series(dtype=object)])
|
||||
expected = DataFrame(index=[0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
|
||||
]
|
||||
sdict = OrderedDict(zip(range(len(data)), data))
|
||||
|
||||
idx = Index(["a", "b", "c"])
|
||||
data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
|
||||
result = DataFrame(data2)
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_constructor_orient(self, float_string_frame):
|
||||
data_dict = float_string_frame.T._series
|
||||
recons = DataFrame.from_dict(data_dict, orient="index")
|
||||
expected = float_string_frame.reindex(index=recons.index)
|
||||
tm.assert_frame_equal(recons, expected)
|
||||
|
||||
# dict of sequence
|
||||
a = {"hi": [32, 3, 3], "there": [3, 5, 3]}
|
||||
rs = DataFrame.from_dict(a, orient="index")
|
||||
xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
def test_constructor_from_ordered_dict(self):
|
||||
# GH#8425
|
||||
a = OrderedDict(
|
||||
[
|
||||
("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
|
||||
("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
|
||||
("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
|
||||
]
|
||||
)
|
||||
expected = DataFrame.from_dict(a, orient="columns").T
|
||||
result = DataFrame.from_dict(a, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_dict_columns_parameter(self):
|
||||
# GH#18529
|
||||
# Test new columns parameter for from_dict that was added to make
|
||||
# from_items(..., orient='index', columns=[...]) easier to replicate
|
||||
result = DataFrame.from_dict(
|
||||
OrderedDict([("A", [1, 2]), ("B", [4, 5])]),
|
||||
orient="index",
|
||||
columns=["one", "two"],
|
||||
)
|
||||
expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "cannot use columns parameter with orient='columns'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict(
|
||||
{"A": [1, 2], "B": [4, 5]},
|
||||
orient="columns",
|
||||
columns=["one", "two"],
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_dict, orient, expected",
|
||||
[
|
||||
({}, "index", RangeIndex(0)),
|
||||
(
|
||||
[{("a",): 1}, {("a",): 2}],
|
||||
"columns",
|
||||
Index([("a",)], tupleize_cols=False),
|
||||
),
|
||||
(
|
||||
[OrderedDict([(("a",), 1), (("b",), 2)])],
|
||||
"columns",
|
||||
Index([("a",), ("b",)], tupleize_cols=False),
|
||||
),
|
||||
([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)),
|
||||
],
|
||||
)
|
||||
def test_constructor_from_dict_tuples(self, data_dict, orient, expected):
|
||||
# GH#16769
|
||||
df = DataFrame.from_dict(data_dict, orient)
|
||||
result = df.columns
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_frame_dict_constructor_empty_series(self):
|
||||
s1 = Series(
|
||||
[1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])
|
||||
)
|
||||
s2 = Series(
|
||||
[1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])
|
||||
)
|
||||
s3 = Series(dtype=object)
|
||||
|
||||
# it works!
|
||||
DataFrame({"foo": s1, "bar": s2, "baz": s3})
|
||||
DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2})
|
||||
|
||||
def test_from_dict_scalars_requires_index(self):
|
||||
msg = "If using all scalar values, you must pass an index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)]))
|
||||
|
||||
def test_from_dict_orient_invalid(self):
|
||||
msg = (
|
||||
"Expected 'index', 'columns' or 'tight' for orient parameter. "
|
||||
"Got 'abc' instead"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict({"foo": 1, "baz": 3, "bar": 2}, orient="abc")
|
||||
|
||||
def test_from_dict_order_with_single_column(self):
|
||||
data = {
|
||||
"alpha": {
|
||||
"value2": 123,
|
||||
"value1": 532,
|
||||
"animal": 222,
|
||||
"plant": False,
|
||||
"name": "test",
|
||||
}
|
||||
}
|
||||
result = DataFrame.from_dict(
|
||||
data,
|
||||
orient="columns",
|
||||
)
|
||||
expected = DataFrame(
|
||||
[[123], [532], [222], [False], ["test"]],
|
||||
index=["value2", "value1", "animal", "plant", "name"],
|
||||
columns=["alpha"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,505 @@
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.compat import is_platform_little_endian
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
RangeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFromRecords:
|
||||
def test_from_records_dt64tz_frame(self):
|
||||
# GH#51162 don't lose tz when calling from_records with DataFrame input
|
||||
dti = date_range("2016-01-01", periods=10, tz="US/Pacific")
|
||||
df = DataFrame({i: dti for i in range(4)})
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = DataFrame.from_records(df)
|
||||
tm.assert_frame_equal(res, df)
|
||||
|
||||
def test_from_records_with_datetimes(self):
|
||||
# this may fail on certain platforms because of a numpy issue
|
||||
# related GH#6140
|
||||
if not is_platform_little_endian():
|
||||
pytest.skip("known failure of test on non-little endian")
|
||||
|
||||
# construction with a null in a recarray
|
||||
# GH#6140
|
||||
expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]})
|
||||
|
||||
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
|
||||
dtypes = [("EXPIRY", "<M8[ns]")]
|
||||
|
||||
recarray = np.rec.fromarrays(arrdata, dtype=dtypes)
|
||||
|
||||
result = DataFrame.from_records(recarray)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# coercion should work too
|
||||
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
|
||||
dtypes = [("EXPIRY", "<M8[m]")]
|
||||
recarray = np.rec.fromarrays(arrdata, dtype=dtypes)
|
||||
result = DataFrame.from_records(recarray)
|
||||
# we get the closest supported unit, "s"
|
||||
expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
using_pyarrow_string_dtype(), reason="dtype checking logic doesn't work"
|
||||
)
|
||||
def test_from_records_sequencelike(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float64
|
||||
),
|
||||
"A1": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float64
|
||||
),
|
||||
"B": np.array(np.arange(6), dtype=np.int64),
|
||||
"C": ["foo"] * 6,
|
||||
"D": np.array([True, False] * 3, dtype=bool),
|
||||
"E": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float32
|
||||
),
|
||||
"E1": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float32
|
||||
),
|
||||
"F": np.array(np.arange(6), dtype=np.int32),
|
||||
}
|
||||
)
|
||||
|
||||
# this is actually tricky to create the recordlike arrays and
|
||||
# have the dtypes be intact
|
||||
blocks = df._to_dict_of_blocks()
|
||||
tuples = []
|
||||
columns = []
|
||||
dtypes = []
|
||||
for dtype, b in blocks.items():
|
||||
columns.extend(b.columns)
|
||||
dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns])
|
||||
for i in range(len(df.index)):
|
||||
tup = []
|
||||
for _, b in blocks.items():
|
||||
tup.extend(b.iloc[i].values)
|
||||
tuples.append(tuple(tup))
|
||||
|
||||
recarray = np.array(tuples, dtype=dtypes).view(np.rec.recarray)
|
||||
recarray2 = df.to_records()
|
||||
lists = [list(x) for x in tuples]
|
||||
|
||||
# tuples (lose the dtype info)
|
||||
result = DataFrame.from_records(tuples, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
|
||||
# created recarray and with to_records recarray (have dtype info)
|
||||
result2 = DataFrame.from_records(recarray, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
result3 = DataFrame.from_records(recarray2, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
|
||||
# list of tuples (no dtype info)
|
||||
result4 = DataFrame.from_records(lists, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, df, check_dtype=False)
|
||||
tm.assert_frame_equal(result2, df)
|
||||
tm.assert_frame_equal(result3, df)
|
||||
tm.assert_frame_equal(result4, df, check_dtype=False)
|
||||
|
||||
# tuples is in the order of the columns
|
||||
result = DataFrame.from_records(tuples)
|
||||
tm.assert_index_equal(result.columns, RangeIndex(8))
|
||||
|
||||
# test exclude parameter & we are casting the results here (as we don't
|
||||
# have dtype info to recover)
|
||||
columns_to_test = [columns.index("C"), columns.index("E1")]
|
||||
|
||||
exclude = list(set(range(8)) - set(columns_to_test))
|
||||
result = DataFrame.from_records(tuples, exclude=exclude)
|
||||
result.columns = [columns[i] for i in sorted(columns_to_test)]
|
||||
tm.assert_series_equal(result["C"], df["C"])
|
||||
tm.assert_series_equal(result["E1"], df["E1"])
|
||||
|
||||
def test_from_records_sequencelike_empty(self):
|
||||
# empty case
|
||||
result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
|
||||
assert len(result) == 0
|
||||
tm.assert_index_equal(result.columns, Index(["foo", "bar", "baz"]))
|
||||
|
||||
result = DataFrame.from_records([])
|
||||
assert len(result) == 0
|
||||
assert len(result.columns) == 0
|
||||
|
||||
def test_from_records_dictlike(self):
|
||||
# test the dict methods
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float64
|
||||
),
|
||||
"A1": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float64
|
||||
),
|
||||
"B": np.array(np.arange(6), dtype=np.int64),
|
||||
"C": ["foo"] * 6,
|
||||
"D": np.array([True, False] * 3, dtype=bool),
|
||||
"E": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float32
|
||||
),
|
||||
"E1": np.array(
|
||||
np.random.default_rng(2).standard_normal(6), dtype=np.float32
|
||||
),
|
||||
"F": np.array(np.arange(6), dtype=np.int32),
|
||||
}
|
||||
)
|
||||
|
||||
# columns is in a different order here than the actual items iterated
|
||||
# from the dict
|
||||
blocks = df._to_dict_of_blocks()
|
||||
columns = []
|
||||
for b in blocks.values():
|
||||
columns.extend(b.columns)
|
||||
|
||||
asdict = dict(df.items())
|
||||
asdict2 = {x: y.values for x, y in df.items()}
|
||||
|
||||
# dict of series & dict of ndarrays (have dtype info)
|
||||
results = []
|
||||
results.append(DataFrame.from_records(asdict).reindex(columns=df.columns))
|
||||
results.append(
|
||||
DataFrame.from_records(asdict, columns=columns).reindex(columns=df.columns)
|
||||
)
|
||||
results.append(
|
||||
DataFrame.from_records(asdict2, columns=columns).reindex(columns=df.columns)
|
||||
)
|
||||
|
||||
for r in results:
|
||||
tm.assert_frame_equal(r, df)
|
||||
|
||||
def test_from_records_with_index_data(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"]
|
||||
)
|
||||
|
||||
data = np.random.default_rng(2).standard_normal(10)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df1 = DataFrame.from_records(df, index=data)
|
||||
tm.assert_index_equal(df1.index, Index(data))
|
||||
|
||||
def test_from_records_bad_index_column(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"]
|
||||
)
|
||||
|
||||
# should pass
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df1 = DataFrame.from_records(df, index=["C"])
|
||||
tm.assert_index_equal(df1.index, Index(df.C))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df1 = DataFrame.from_records(df, index="C")
|
||||
tm.assert_index_equal(df1.index, Index(df.C))
|
||||
|
||||
# should fail
|
||||
msg = "|".join(
|
||||
[
|
||||
r"'None of \[2\] are in the columns'",
|
||||
]
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
DataFrame.from_records(df, index=[2])
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
DataFrame.from_records(df, index=2)
|
||||
|
||||
def test_from_records_non_tuple(self):
|
||||
class Record:
|
||||
def __init__(self, *args) -> None:
|
||||
self.args = args
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.args[i]
|
||||
|
||||
def __iter__(self) -> Iterator:
|
||||
return iter(self.args)
|
||||
|
||||
recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)]
|
||||
tups = [tuple(rec) for rec in recs]
|
||||
|
||||
result = DataFrame.from_records(recs)
|
||||
expected = DataFrame.from_records(tups)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_len0_with_columns(self):
|
||||
# GH#2633
|
||||
result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
|
||||
expected = Index(["bar"])
|
||||
|
||||
assert len(result) == 0
|
||||
assert result.index.name == "foo"
|
||||
tm.assert_index_equal(result.columns, expected)
|
||||
|
||||
def test_from_records_series_list_dict(self):
|
||||
# GH#27358
|
||||
expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T
|
||||
data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]])
|
||||
result = DataFrame.from_records(data)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_series_categorical_index(self):
|
||||
# GH#32805
|
||||
index = CategoricalIndex(
|
||||
[Interval(-20, -10), Interval(-10, 0), Interval(0, 10)]
|
||||
)
|
||||
series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index)
|
||||
frame = DataFrame.from_records(series_of_dicts, index=index)
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, np.nan], "b": [np.nan, np.nan, 3]}, index=index
|
||||
)
|
||||
tm.assert_frame_equal(frame, expected)
|
||||
|
||||
def test_frame_from_records_utc(self):
|
||||
rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)}
|
||||
|
||||
# it works
|
||||
DataFrame.from_records([rec], index="begin_time")
|
||||
|
||||
def test_from_records_to_records(self):
|
||||
# from numpy documentation
|
||||
arr = np.zeros((2,), dtype=("i4,f4,S10"))
|
||||
arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
|
||||
|
||||
DataFrame.from_records(arr)
|
||||
|
||||
index = Index(np.arange(len(arr))[::-1])
|
||||
indexed_frame = DataFrame.from_records(arr, index=index)
|
||||
tm.assert_index_equal(indexed_frame.index, index)
|
||||
|
||||
# without names, it should go to last ditch
|
||||
arr2 = np.zeros((2, 3))
|
||||
tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
|
||||
|
||||
# wrong length
|
||||
msg = "|".join(
|
||||
[
|
||||
r"Length of values \(2\) does not match length of index \(1\)",
|
||||
]
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_records(arr, index=index[:-1])
|
||||
|
||||
indexed_frame = DataFrame.from_records(arr, index="f1")
|
||||
|
||||
# what to do?
|
||||
records = indexed_frame.to_records()
|
||||
assert len(records.dtype.names) == 3
|
||||
|
||||
records = indexed_frame.to_records(index=False)
|
||||
assert len(records.dtype.names) == 2
|
||||
assert "index" not in records.dtype.names
|
||||
|
||||
def test_from_records_nones(self):
|
||||
tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)]
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"])
|
||||
assert np.isnan(df["c"][0])
|
||||
|
||||
def test_from_records_iterator(self):
|
||||
arr = np.array(
|
||||
[(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)],
|
||||
dtype=[
|
||||
("x", np.float64),
|
||||
("u", np.float32),
|
||||
("y", np.int64),
|
||||
("z", np.int32),
|
||||
],
|
||||
)
|
||||
df = DataFrame.from_records(iter(arr), nrows=2)
|
||||
xp = DataFrame(
|
||||
{
|
||||
"x": np.array([1.0, 3.0], dtype=np.float64),
|
||||
"u": np.array([1.0, 3.0], dtype=np.float32),
|
||||
"y": np.array([2, 4], dtype=np.int64),
|
||||
"z": np.array([2, 4], dtype=np.int32),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(df.reindex_like(xp), xp)
|
||||
|
||||
# no dtypes specified here, so just compare with the default
|
||||
arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)]
|
||||
df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2)
|
||||
tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False)
|
||||
|
||||
def test_from_records_tuples_generator(self):
|
||||
def tuple_generator(length):
|
||||
for i in range(length):
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
yield (i, letters[i % len(letters)], i / length)
|
||||
|
||||
columns_names = ["Integer", "String", "Float"]
|
||||
columns = [
|
||||
[i[j] for i in tuple_generator(10)] for j in range(len(columns_names))
|
||||
]
|
||||
data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
|
||||
expected = DataFrame(data, columns=columns_names)
|
||||
|
||||
generator = tuple_generator(10)
|
||||
result = DataFrame.from_records(generator, columns=columns_names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_lists_generator(self):
|
||||
def list_generator(length):
|
||||
for i in range(length):
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
yield [i, letters[i % len(letters)], i / length]
|
||||
|
||||
columns_names = ["Integer", "String", "Float"]
|
||||
columns = [
|
||||
[i[j] for i in list_generator(10)] for j in range(len(columns_names))
|
||||
]
|
||||
data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
|
||||
expected = DataFrame(data, columns=columns_names)
|
||||
|
||||
generator = list_generator(10)
|
||||
result = DataFrame.from_records(generator, columns=columns_names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_columns_not_modified(self):
|
||||
tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)]
|
||||
|
||||
columns = ["a", "b", "c"]
|
||||
original_columns = list(columns)
|
||||
|
||||
DataFrame.from_records(tuples, columns=columns, index="a")
|
||||
|
||||
assert columns == original_columns
|
||||
|
||||
def test_from_records_decimal(self):
|
||||
tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)]
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=["a"])
|
||||
assert df["a"].dtype == object
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True)
|
||||
assert df["a"].dtype == np.float64
|
||||
assert np.isnan(df["a"].values[-1])
|
||||
|
||||
def test_from_records_duplicates(self):
|
||||
result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
|
||||
|
||||
expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_set_index_name(self):
|
||||
def create_dict(order_id):
|
||||
return {
|
||||
"order_id": order_id,
|
||||
"quantity": np.random.default_rng(2).integers(1, 10),
|
||||
"price": np.random.default_rng(2).integers(1, 10),
|
||||
}
|
||||
|
||||
documents = [create_dict(i) for i in range(10)]
|
||||
# demo missing data
|
||||
documents.append({"order_id": 10, "quantity": 5})
|
||||
|
||||
result = DataFrame.from_records(documents, index="order_id")
|
||||
assert result.index.name == "order_id"
|
||||
|
||||
# MultiIndex
|
||||
result = DataFrame.from_records(documents, index=["order_id", "quantity"])
|
||||
assert result.index.names == ("order_id", "quantity")
|
||||
|
||||
def test_from_records_misc_brokenness(self):
|
||||
# GH#2179
|
||||
|
||||
data = {1: ["foo"], 2: ["bar"]}
|
||||
|
||||
result = DataFrame.from_records(data, columns=["a", "b"])
|
||||
exp = DataFrame(data, columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
# overlap in index/index_names
|
||||
|
||||
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
|
||||
|
||||
result = DataFrame.from_records(data, index=["a", "b", "c"])
|
||||
exp = DataFrame(data, index=["a", "b", "c"])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
def test_from_records_misc_brokenness2(self):
|
||||
# GH#2623
|
||||
rows = []
|
||||
rows.append([datetime(2010, 1, 1), 1])
|
||||
rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj
|
||||
result = DataFrame.from_records(rows, columns=["date", "test"])
|
||||
expected = DataFrame(
|
||||
{"date": [row[0] for row in rows], "test": [row[1] for row in rows]}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
assert result.dtypes["test"] == np.dtype(object)
|
||||
|
||||
def test_from_records_misc_brokenness3(self):
|
||||
rows = []
|
||||
rows.append([datetime(2010, 1, 1), 1])
|
||||
rows.append([datetime(2010, 1, 2), 1])
|
||||
result = DataFrame.from_records(rows, columns=["date", "test"])
|
||||
expected = DataFrame(
|
||||
{"date": [row[0] for row in rows], "test": [row[1] for row in rows]}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_empty(self):
|
||||
# GH#3562
|
||||
result = DataFrame.from_records([], columns=["a", "b", "c"])
|
||||
expected = DataFrame(columns=["a", "b", "c"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = DataFrame.from_records([], columns=["a", "b", "b"])
|
||||
expected = DataFrame(columns=["a", "b", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_empty_with_nonempty_fields_gh3682(self):
|
||||
a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
|
||||
df = DataFrame.from_records(a, index="id")
|
||||
|
||||
ex_index = Index([1], name="id")
|
||||
expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
b = a[:0]
|
||||
df2 = DataFrame.from_records(b, index="id")
|
||||
tm.assert_frame_equal(df2, df.iloc[:0])
|
||||
|
||||
def test_from_records_empty2(self):
|
||||
# GH#42456
|
||||
dtype = [("prop", int)]
|
||||
shape = (0, len(dtype))
|
||||
arr = np.empty(shape, dtype=dtype)
|
||||
|
||||
result = DataFrame.from_records(arr)
|
||||
expected = DataFrame({"prop": np.array([], dtype=int)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
alt = DataFrame(arr)
|
||||
tm.assert_frame_equal(alt, expected)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,199 @@
|
||||
"""
|
||||
Tests for values coercion in setitem-like operations on DataFrame.
|
||||
|
||||
For the most part, these should be multi-column DataFrames, otherwise
|
||||
we would share the tests with Series.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameSetitemCoercion:
|
||||
@pytest.mark.parametrize("consolidate", [True, False])
|
||||
def test_loc_setitem_multiindex_columns(self, consolidate):
|
||||
# GH#18415 Setting values in a single column preserves dtype,
|
||||
# while setting them in multiple columns did unwanted cast.
|
||||
|
||||
# Note that A here has 2 blocks, below we do the same thing
|
||||
# with a consolidated frame.
|
||||
A = DataFrame(np.zeros((6, 5), dtype=np.float32))
|
||||
A = pd.concat([A, A], axis=1, keys=[1, 2])
|
||||
if consolidate:
|
||||
A = A._consolidate()
|
||||
|
||||
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
|
||||
assert (A.dtypes == np.float32).all()
|
||||
|
||||
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
|
||||
|
||||
assert (A.dtypes == np.float32).all()
|
||||
|
||||
A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
|
||||
assert (A.dtypes == np.float32).all()
|
||||
|
||||
# TODO: i think this isn't about MultiIndex and could be done with iloc?
|
||||
|
||||
|
||||
def test_37477():
|
||||
# fixed by GH#45121
|
||||
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
|
||||
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.at[1, "B"] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.loc[1, "B"] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.iat[1, 1] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = orig.copy()
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
df.iloc[1, 1] = 1.2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_6942(indexer_al):
|
||||
# check that the .at __setitem__ after setting "Live" actually sets the data
|
||||
start = Timestamp("2014-04-01")
|
||||
t1 = Timestamp("2014-04-23 12:42:38.883082")
|
||||
t2 = Timestamp("2014-04-24 01:33:30.040039")
|
||||
|
||||
dti = date_range(start, periods=1)
|
||||
orig = DataFrame(index=dti, columns=["timenow", "Live"])
|
||||
|
||||
df = orig.copy()
|
||||
indexer_al(df)[start, "timenow"] = t1
|
||||
|
||||
df["Live"] = True
|
||||
|
||||
df.at[start, "timenow"] = t2
|
||||
assert df.iloc[0, 0] == t2
|
||||
|
||||
|
||||
def test_26395(indexer_al):
|
||||
# .at case fixed by GH#45121 (best guess)
|
||||
df = DataFrame(index=["A", "B", "C"])
|
||||
df["D"] = 0
|
||||
|
||||
indexer_al(df)["C", "D"] = 2
|
||||
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
indexer_al(df)["C", "D"] = 44.5
|
||||
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
indexer_al(df)["C", "D"] = "hello"
|
||||
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="unwanted upcast")
|
||||
def test_15231():
|
||||
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
|
||||
df.loc[2] = Series({"a": 5, "b": 6})
|
||||
assert (df.dtypes == np.int64).all()
|
||||
|
||||
df.loc[3] = Series({"a": 7})
|
||||
|
||||
# df["a"] doesn't have any NaNs, should not have been cast
|
||||
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
||||
|
||||
|
||||
def test_iloc_setitem_unnecesssary_float_upcasting():
|
||||
# GH#12255
|
||||
df = DataFrame(
|
||||
{
|
||||
0: np.array([1, 3], dtype=np.float32),
|
||||
1: np.array([2, 4], dtype=np.float32),
|
||||
2: ["a", "b"],
|
||||
}
|
||||
)
|
||||
orig = df.copy()
|
||||
|
||||
values = df[0].values.reshape(2, 1)
|
||||
df.iloc[:, 0:1] = values
|
||||
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="unwanted casting to dt64")
|
||||
def test_12499():
|
||||
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
|
||||
# which has consequences for the expected df["two"] (though i think at
|
||||
# the time it might not have because of a separate bug). See if it makes
|
||||
# a difference which one we use here.
|
||||
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
|
||||
|
||||
data = [{"one": 0, "two": ts}]
|
||||
orig = DataFrame(data)
|
||||
df = orig.copy()
|
||||
df.loc[1] = [np.nan, NaT]
|
||||
|
||||
expected = DataFrame(
|
||||
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
data = [{"one": 0, "two": ts}]
|
||||
df = orig.copy()
|
||||
df.loc[1, :] = [np.nan, NaT]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_20476():
|
||||
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
|
||||
df = DataFrame(-1, index=range(3), columns=mi)
|
||||
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
|
||||
df["A"] = filler
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: [1, 1, 1],
|
||||
1: [2, 2, 2],
|
||||
2: [3.0, 3.0, 3.0],
|
||||
3: [-1, -1, -1],
|
||||
4: [-1, -1, -1],
|
||||
5: [-1, -1, -1],
|
||||
}
|
||||
)
|
||||
expected.columns = mi
|
||||
exp_dtypes = Series(
|
||||
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
|
||||
index=mi,
|
||||
)
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
@ -0,0 +1,60 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestDataFrameDelItem:
|
||||
def test_delitem(self, float_frame):
|
||||
del float_frame["A"]
|
||||
assert "A" not in float_frame
|
||||
|
||||
def test_delitem_multiindex(self):
|
||||
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), columns=midx)
|
||||
assert len(df.columns) == 4
|
||||
assert ("A",) in df.columns
|
||||
assert "A" in df.columns
|
||||
|
||||
result = df["A"]
|
||||
assert isinstance(result, DataFrame)
|
||||
del df["A"]
|
||||
|
||||
assert len(df.columns) == 2
|
||||
|
||||
# A still in the levels, BUT get a KeyError if trying
|
||||
# to delete
|
||||
assert ("A",) not in df.columns
|
||||
with pytest.raises(KeyError, match=re.escape("('A',)")):
|
||||
del df[("A",)]
|
||||
|
||||
# behavior of dropped/deleted MultiIndex levels changed from
|
||||
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
|
||||
# levels which are dropped/deleted
|
||||
assert "A" not in df.columns
|
||||
with pytest.raises(KeyError, match=re.escape("('A',)")):
|
||||
del df["A"]
|
||||
|
||||
def test_delitem_corner(self, float_frame):
|
||||
f = float_frame.copy()
|
||||
del f["D"]
|
||||
assert len(f.columns) == 3
|
||||
with pytest.raises(KeyError, match=r"^'D'$"):
|
||||
del f["D"]
|
||||
del f["B"]
|
||||
assert len(f.columns) == 2
|
||||
|
||||
def test_delitem_col_still_multiindex(self):
|
||||
arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((3, 4)), columns=index)
|
||||
del df[("a", "", "")]
|
||||
assert isinstance(df.columns, MultiIndex)
|
@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGet:
|
||||
def test_get(self, float_frame):
|
||||
b = float_frame.get("B")
|
||||
tm.assert_series_equal(b, float_frame["B"])
|
||||
|
||||
assert float_frame.get("foo") is None
|
||||
tm.assert_series_equal(
|
||||
float_frame.get("foo", float_frame["B"]), float_frame["B"]
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df",
|
||||
[
|
||||
DataFrame(),
|
||||
DataFrame(columns=list("AB")),
|
||||
DataFrame(columns=list("AB"), index=range(3)),
|
||||
],
|
||||
)
|
||||
def test_get_none(self, df):
|
||||
# see gh-5652
|
||||
assert df.get(None) is None
|
@ -0,0 +1,22 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestGetValue:
|
||||
def test_get_set_value_no_partial_indexing(self):
|
||||
# partial w/ MultiIndex raise exception
|
||||
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
|
||||
df = DataFrame(index=index, columns=range(4))
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
df._get_value(0, 1)
|
||||
|
||||
def test_get_value(self, float_frame):
|
||||
for idx in float_frame.index:
|
||||
for col in float_frame.columns:
|
||||
result = float_frame._get_value(idx, col)
|
||||
expected = float_frame[col][idx]
|
||||
assert result == expected
|
@ -0,0 +1,472 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
DateOffset,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
get_dummies,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import SparseArray
|
||||
|
||||
|
||||
class TestGetitem:
|
||||
def test_getitem_unused_level_raises(self):
|
||||
# GH#20410
|
||||
mi = MultiIndex(
|
||||
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
|
||||
codes=[[1, 0], [1, 0]],
|
||||
)
|
||||
df = DataFrame(-1, index=range(3), columns=mi)
|
||||
|
||||
with pytest.raises(KeyError, match="notevenone"):
|
||||
df["notevenone"]
|
||||
|
||||
def test_getitem_periodindex(self):
|
||||
rng = period_range("1/1/2000", periods=5)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)), columns=rng)
|
||||
|
||||
ts = df[rng[0]]
|
||||
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||||
|
||||
ts = df["1/1/2000"]
|
||||
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||||
|
||||
def test_getitem_list_of_labels_categoricalindex_cols(self):
|
||||
# GH#16115
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
expected = DataFrame([[1, 0], [0, 1]], dtype="bool", index=[0, 1], columns=cats)
|
||||
dummies = get_dummies(cats)
|
||||
result = dummies[list(dummies.columns)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_sparse_column_return_type_and_dtype(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23559
|
||||
data = SparseArray([0, 1])
|
||||
df = DataFrame({"A": data})
|
||||
expected = Series(data, name="A")
|
||||
result = df["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Also check iloc and loc while we're here
|
||||
result = df.iloc[:, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[:, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_string_columns(self):
|
||||
# GH#46185
|
||||
df = DataFrame([[1, 2]], columns=Index(["A", "B"], dtype="string"))
|
||||
result = df.A
|
||||
expected = df["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemListLike:
|
||||
def test_getitem_list_missing_key(self):
|
||||
# GH#13822, incorrect error string with non-unique columns when missing
|
||||
# column is accessed
|
||||
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
|
||||
df.columns = ["x", "x", "z"]
|
||||
|
||||
# Check that we get the correct value in the KeyError
|
||||
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
|
||||
df[["x", "y", "z"]]
|
||||
|
||||
def test_getitem_list_duplicates(self):
|
||||
# GH#1943
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), columns=list("AABC")
|
||||
)
|
||||
df.columns.name = "foo"
|
||||
|
||||
result = df[["B", "C"]]
|
||||
assert result.columns.name == "foo"
|
||||
|
||||
expected = df.iloc[:, 2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_dupe_cols(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
|
||||
msg = "\"None of [Index(['baf'], dtype="
|
||||
with pytest.raises(KeyError, match=re.escape(msg)):
|
||||
df[["baf"]]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_type",
|
||||
[
|
||||
list,
|
||||
iter,
|
||||
Index,
|
||||
set,
|
||||
lambda keys: dict(zip(keys, range(len(keys)))),
|
||||
lambda keys: dict(zip(keys, range(len(keys)))).keys(),
|
||||
],
|
||||
ids=["list", "iter", "Index", "set", "dict", "dict_keys"],
|
||||
)
|
||||
@pytest.mark.parametrize("levels", [1, 2])
|
||||
def test_getitem_listlike(self, idx_type, levels, float_frame):
|
||||
# GH#21294
|
||||
|
||||
if levels == 1:
|
||||
frame, missing = float_frame, "food"
|
||||
else:
|
||||
# MultiIndex columns
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 3)),
|
||||
columns=Index(
|
||||
[("foo", "bar"), ("baz", "qux"), ("peek", "aboo")],
|
||||
name=("sth", "sth2"),
|
||||
),
|
||||
)
|
||||
missing = ("good", "food")
|
||||
|
||||
keys = [frame.columns[1], frame.columns[0]]
|
||||
idx = idx_type(keys)
|
||||
idx_check = list(idx_type(keys))
|
||||
|
||||
if isinstance(idx, (set, dict)):
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
frame[idx]
|
||||
|
||||
return
|
||||
else:
|
||||
result = frame[idx]
|
||||
|
||||
expected = frame.loc[:, idx_check]
|
||||
expected.columns.names = frame.columns.names
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx = idx_type(keys + [missing])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
frame[idx]
|
||||
|
||||
def test_getitem_iloc_generator(self):
|
||||
# GH#39614
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
indexer = (x for x in [1, 2])
|
||||
result = df.iloc[indexer]
|
||||
expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_iloc_two_dimensional_generator(self):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
indexer = (x for x in [1, 2])
|
||||
result = df.iloc[indexer, 1]
|
||||
expected = Series([5, 6], name="b", index=[1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_iloc_dateoffset_days(self):
|
||||
# GH 46671
|
||||
df = DataFrame(
|
||||
list(range(10)),
|
||||
index=date_range("01-01-2022", periods=10, freq=DateOffset(days=1)),
|
||||
)
|
||||
result = df.loc["2022-01-01":"2022-01-03"]
|
||||
expected = DataFrame(
|
||||
[0, 1, 2],
|
||||
index=DatetimeIndex(
|
||||
["2022-01-01", "2022-01-02", "2022-01-03"],
|
||||
dtype="datetime64[ns]",
|
||||
freq=DateOffset(days=1),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
list(range(10)),
|
||||
index=date_range(
|
||||
"01-01-2022", periods=10, freq=DateOffset(days=1, hours=2)
|
||||
),
|
||||
)
|
||||
result = df.loc["2022-01-01":"2022-01-03"]
|
||||
expected = DataFrame(
|
||||
[0, 1, 2],
|
||||
index=DatetimeIndex(
|
||||
["2022-01-01 00:00:00", "2022-01-02 02:00:00", "2022-01-03 04:00:00"],
|
||||
dtype="datetime64[ns]",
|
||||
freq=DateOffset(days=1, hours=2),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
list(range(10)),
|
||||
index=date_range("01-01-2022", periods=10, freq=DateOffset(minutes=3)),
|
||||
)
|
||||
result = df.loc["2022-01-01":"2022-01-03"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self, float_frame):
|
||||
# GH#12533
|
||||
result = float_frame[lambda x: "A"]
|
||||
expected = float_frame.loc[:, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = float_frame[lambda x: ["A", "B"]]
|
||||
expected = float_frame.loc[:, ["A", "B"]]
|
||||
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
|
||||
|
||||
df = float_frame[:3]
|
||||
result = df[lambda x: [True, False, True]]
|
||||
expected = float_frame.iloc[[0, 2], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_columns_one_level(self):
|
||||
# GH#29749
|
||||
df = DataFrame([[1, 2]], columns=[["a", "b"]])
|
||||
expected = DataFrame([1], columns=[["a"]])
|
||||
|
||||
result = df["a"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, "a"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_bool_mask_categorical_index(self):
|
||||
df3 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
[1, 1, 2, 1, 3, 2],
|
||||
dtype=CategoricalDtype([3, 2, 1], ordered=True),
|
||||
name="B",
|
||||
),
|
||||
)
|
||||
df4 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
[1, 1, 2, 1, 3, 2],
|
||||
dtype=CategoricalDtype([3, 2, 1], ordered=False),
|
||||
name="B",
|
||||
),
|
||||
)
|
||||
|
||||
result = df3[df3.index == "a"]
|
||||
expected = df3.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == "a"]
|
||||
expected = df4.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index == 1]
|
||||
expected = df3.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 1]
|
||||
expected = df4.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# since we have an ordered categorical
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=True,
|
||||
# name='B')
|
||||
result = df3[df3.index < 2]
|
||||
expected = df3.iloc[[4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index > 1]
|
||||
expected = df3.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# unordered
|
||||
# cannot be compared
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=False,
|
||||
# name='B')
|
||||
msg = "Unordered Categoricals can only compare equality or not"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index < 2]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index > 1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data1,data2,expected_data",
|
||||
(
|
||||
(
|
||||
[[1, 2], [3, 4]],
|
||||
[[0.5, 6], [7, 8]],
|
||||
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
|
||||
),
|
||||
(
|
||||
[[1, 2], [3, 4]],
|
||||
[[5, 6], [7, 8]],
|
||||
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
|
||||
self,
|
||||
data1,
|
||||
data2,
|
||||
expected_data,
|
||||
):
|
||||
# GH#31954
|
||||
|
||||
df1 = DataFrame(np.array(data1))
|
||||
df2 = DataFrame(np.array(data2))
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
result = df[df > 2]
|
||||
|
||||
exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
|
||||
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.fixture
|
||||
def df_dup_cols(self):
|
||||
dups = ["A", "A", "C", "D"]
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
|
||||
return df
|
||||
|
||||
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
|
||||
# `df.A > 6` is a DataFrame with a different shape from df
|
||||
|
||||
# boolean with the duplicate raises
|
||||
df = df_dup_cols
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df[df.A > 6]
|
||||
|
||||
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
|
||||
# boolean indexing
|
||||
# GH#4879
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
|
||||
)
|
||||
expected = df[df.C > 6]
|
||||
expected.columns = df_dup_cols.columns
|
||||
|
||||
df = df_dup_cols
|
||||
result = df[df.C > 6]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
|
||||
# where
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
|
||||
)
|
||||
# `df > 6` is a DataFrame with the same shape+alignment as df
|
||||
expected = df[df > 6]
|
||||
expected.columns = df_dup_cols.columns
|
||||
|
||||
df = df_dup_cols
|
||||
result = df[df > 6]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_empty_frame_with_boolean(self):
|
||||
# Test for issue GH#11859
|
||||
|
||||
df = DataFrame()
|
||||
df2 = df[df > 0]
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_getitem_returns_view_when_column_is_unique_in_df(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#45316
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
|
||||
df_orig = df.copy()
|
||||
view = df["b"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
view.loc[:] = 100
|
||||
if using_copy_on_write:
|
||||
expected = df_orig
|
||||
else:
|
||||
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_getitem_frozenset_unique_in_column(self):
|
||||
# GH#41062
|
||||
df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"])
|
||||
result = df[frozenset(["KEY"])]
|
||||
expected = Series([1], name=frozenset(["KEY"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemSlice:
|
||||
def test_getitem_slice_float64(self, frame_or_series):
|
||||
values = np.arange(10.0, 50.0, 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 3))
|
||||
if frame_or_series is not DataFrame:
|
||||
data = data[:, 0]
|
||||
|
||||
obj = frame_or_series(data, index=index)
|
||||
|
||||
result = obj[start:end]
|
||||
expected = obj.iloc[5:16]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[start:end]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_getitem_datetime_slice(self):
|
||||
# GH#43223
|
||||
df = DataFrame(
|
||||
{"a": 0},
|
||||
index=DatetimeIndex(
|
||||
[
|
||||
"11.01.2011 22:00",
|
||||
"11.01.2011 23:00",
|
||||
"12.01.2011 00:00",
|
||||
"2011-01-13 00:00",
|
||||
]
|
||||
),
|
||||
)
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
df["2011-01-01":"2011-11-01"]
|
||||
|
||||
def test_getitem_slice_same_dim_only_one_axis(self):
|
||||
# GH#54622
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 8)))
|
||||
result = df.iloc[(slice(None, None, 2),)]
|
||||
assert result.shape == (5, 8)
|
||||
expected = df.iloc[slice(None, None, 2), slice(None)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
|
||||
)
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
df[key]
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,120 @@
|
||||
"""
|
||||
test_insert is specifically for the DataFrame.insert method; not to be
|
||||
confused with tests with "insert" in their names that are really testing
|
||||
__setitem__.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameInsert:
|
||||
def test_insert(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 3)),
|
||||
index=np.arange(5),
|
||||
columns=["c", "b", "a"],
|
||||
)
|
||||
|
||||
df.insert(0, "foo", df["a"])
|
||||
tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
|
||||
tm.assert_series_equal(df["a"], df["foo"], check_names=False)
|
||||
|
||||
df.insert(2, "bar", df["c"])
|
||||
tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
|
||||
tm.assert_almost_equal(df["c"], df["bar"], check_names=False)
|
||||
|
||||
with pytest.raises(ValueError, match="already exists"):
|
||||
df.insert(1, "a", df["b"])
|
||||
|
||||
msg = "cannot insert c, already exists"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(1, "c", df["b"])
|
||||
|
||||
df.columns.name = "some_name"
|
||||
# preserve columns name field
|
||||
df.insert(0, "baz", df["c"])
|
||||
assert df.columns.name == "some_name"
|
||||
|
||||
def test_insert_column_bug_4032(self):
|
||||
# GH#4032, inserting a column and renaming causing errors
|
||||
df = DataFrame({"b": [1.1, 2.2]})
|
||||
|
||||
df = df.rename(columns={})
|
||||
df.insert(0, "a", [1, 2])
|
||||
result = df.rename(columns={})
|
||||
|
||||
expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df.insert(0, "c", [1.3, 2.3])
|
||||
result = df.rename(columns={})
|
||||
|
||||
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_insert_with_columns_dups(self):
|
||||
# GH#14291
|
||||
df = DataFrame()
|
||||
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
|
||||
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
|
||||
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
|
||||
exp = DataFrame(
|
||||
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
|
||||
)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_insert_item_cache(self, using_array_manager, using_copy_on_write):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
|
||||
ser = df[0]
|
||||
|
||||
if using_array_manager:
|
||||
expected_warning = None
|
||||
else:
|
||||
# with BlockManager warn about high fragmentation of single dtype
|
||||
expected_warning = PerformanceWarning
|
||||
|
||||
with tm.assert_produces_warning(expected_warning):
|
||||
for n in range(100):
|
||||
df[n + 3] = df[1] * n
|
||||
|
||||
if using_copy_on_write:
|
||||
ser.iloc[0] = 99
|
||||
assert df.iloc[0, 0] == df[0][0]
|
||||
assert df.iloc[0, 0] != 99
|
||||
else:
|
||||
ser.values[0] = 99
|
||||
assert df.iloc[0, 0] == df[0][0]
|
||||
assert df.iloc[0, 0] == 99
|
||||
|
||||
def test_insert_EA_no_warning(self):
|
||||
# PerformanceWarning about fragmented frame should not be raised when
|
||||
# using EAs (https://github.com/pandas-dev/pandas/issues/44098)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 100, size=(3, 100)), dtype="Int64"
|
||||
)
|
||||
with tm.assert_produces_warning(None):
|
||||
df["a"] = np.array([1, 2, 3])
|
||||
|
||||
def test_insert_frame(self):
|
||||
# GH#42403
|
||||
df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
||||
|
||||
msg = (
|
||||
"Expected a one-dimensional object, got a DataFrame with 2 columns instead."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(1, "newcol", df)
|
||||
|
||||
def test_insert_int64_loc(self):
|
||||
# GH#53193
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df.insert(np.int64(0), "b", 0)
|
||||
tm.assert_frame_equal(df, DataFrame({"b": [0, 0], "a": [1, 2]}))
|
@ -0,0 +1,152 @@
|
||||
"""
|
||||
Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Float64Dtype,
|
||||
Series,
|
||||
StringDtype,
|
||||
Timedelta,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameMask:
|
||||
def test_mask(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
|
||||
cond = df > 0
|
||||
|
||||
rs = df.where(cond, np.nan)
|
||||
tm.assert_frame_equal(rs, df.mask(df <= 0))
|
||||
tm.assert_frame_equal(rs, df.mask(~cond))
|
||||
|
||||
other = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
|
||||
rs = df.where(cond, other)
|
||||
tm.assert_frame_equal(rs, df.mask(df <= 0, other))
|
||||
tm.assert_frame_equal(rs, df.mask(~cond, other))
|
||||
|
||||
def test_mask2(self):
|
||||
# see GH#21891
|
||||
df = DataFrame([1, 2])
|
||||
res = df.mask([[True], [False]])
|
||||
|
||||
exp = DataFrame([np.nan, 2])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_mask_inplace(self):
|
||||
# GH#8801
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
|
||||
cond = df > 0
|
||||
|
||||
rdf = df.copy()
|
||||
|
||||
return_value = rdf.where(cond, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(rdf, df.where(cond))
|
||||
tm.assert_frame_equal(rdf, df.mask(~cond))
|
||||
|
||||
rdf = df.copy()
|
||||
return_value = rdf.where(cond, -df, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(rdf, df.where(cond, -df))
|
||||
tm.assert_frame_equal(rdf, df.mask(~cond, -df))
|
||||
|
||||
def test_mask_edge_case_1xN_frame(self):
|
||||
# GH#4071
|
||||
df = DataFrame([[1, 2]])
|
||||
res = df.mask(DataFrame([[True, False]]))
|
||||
expec = DataFrame([[np.nan, 2]])
|
||||
tm.assert_frame_equal(res, expec)
|
||||
|
||||
def test_mask_callable(self):
|
||||
# GH#12533
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
result = df.mask(lambda x: x > 4, lambda x: x + 1)
|
||||
exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
|
||||
|
||||
# return ndarray and scalar
|
||||
result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
|
||||
exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
|
||||
|
||||
# chain
|
||||
result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
|
||||
exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
|
||||
|
||||
def test_mask_dtype_bool_conversion(self):
|
||||
# GH#3733
|
||||
df = DataFrame(data=np.random.default_rng(2).standard_normal((100, 50)))
|
||||
df = df.where(df > 0) # create nans
|
||||
bools = df > 0
|
||||
mask = isna(df)
|
||||
expected = bools.astype(object).mask(mask)
|
||||
result = bools.mask(mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_stringdtype(frame_or_series):
|
||||
# GH 40824
|
||||
obj = DataFrame(
|
||||
{"A": ["foo", "bar", "baz", NA]},
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
filtered_obj = DataFrame(
|
||||
{"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"A": [NA, "this", "that", NA]},
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
if frame_or_series is Series:
|
||||
obj = obj["A"]
|
||||
filtered_obj = filtered_obj["A"]
|
||||
expected = expected["A"]
|
||||
|
||||
filter_ser = Series([False, True, True, False])
|
||||
result = obj.mask(filter_ser, filtered_obj)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_where_dtype_timedelta():
|
||||
# https://github.com/pandas-dev/pandas/issues/39548
|
||||
df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
|
||||
|
||||
expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
|
||||
tm.assert_frame_equal(df.mask(df.notna()), expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
|
||||
)
|
||||
tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
|
||||
|
||||
|
||||
def test_mask_return_dtype():
|
||||
# GH#50488
|
||||
ser = Series([0.0, 1.0, 2.0, 3.0], dtype=Float64Dtype())
|
||||
cond = ~ser.isna()
|
||||
other = Series([True, False, True, False])
|
||||
excepted = Series([1.0, 0.0, 1.0, 0.0], dtype=ser.dtype)
|
||||
result = ser.mask(cond, other)
|
||||
tm.assert_series_equal(result, excepted)
|
||||
|
||||
|
||||
def test_mask_inplace_no_other():
|
||||
# GH#51685
|
||||
df = DataFrame({"a": [1.0, 2.0], "b": ["x", "y"]})
|
||||
cond = DataFrame({"a": [True, False], "b": [False, True]})
|
||||
df.mask(cond, inplace=True)
|
||||
expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]})
|
||||
tm.assert_frame_equal(df, expected)
|
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_float_dtype
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSetValue:
|
||||
def test_set_value(self, float_frame):
|
||||
for idx in float_frame.index:
|
||||
for col in float_frame.columns:
|
||||
float_frame._set_value(idx, col, 1)
|
||||
assert float_frame[col][idx] == 1
|
||||
|
||||
def test_set_value_resize(self, float_frame, using_infer_string):
|
||||
res = float_frame._set_value("foobar", "B", 0)
|
||||
assert res is None
|
||||
assert float_frame.index[-1] == "foobar"
|
||||
assert float_frame._get_value("foobar", "B") == 0
|
||||
|
||||
float_frame.loc["foobar", "qux"] = 0
|
||||
assert float_frame._get_value("foobar", "qux") == 0
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", "sam")
|
||||
if using_infer_string:
|
||||
assert res["baz"].dtype == "string"
|
||||
else:
|
||||
assert res["baz"].dtype == np.object_
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", True)
|
||||
assert res["baz"].dtype == np.object_
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", 5)
|
||||
assert is_float_dtype(res["baz"])
|
||||
assert isna(res["baz"].drop(["foobar"])).all()
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Setting an item of incompatible dtype"
|
||||
):
|
||||
res._set_value("foobar", "baz", "sam")
|
||||
assert res.loc["foobar", "baz"] == "sam"
|
||||
|
||||
def test_set_value_with_index_dtype_change(self):
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
index=range(3),
|
||||
columns=list("ABC"),
|
||||
)
|
||||
|
||||
# this is actually ambiguous as the 2 is interpreted as a positional
|
||||
# so column is not created
|
||||
df = df_orig.copy()
|
||||
df._set_value("C", 2, 1.0)
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["C", 2] = 1.0
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||||
|
||||
# create both new
|
||||
df = df_orig.copy()
|
||||
df._set_value("C", "D", 1.0)
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
assert list(df.columns) == list(df_orig.columns) + ["D"]
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["C", "D"] = 1.0
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
assert list(df.columns) == list(df_orig.columns) + ["D"]
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,92 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameTake:
|
||||
def test_take_slices_deprecated(self, float_frame):
|
||||
# GH#51539
|
||||
df = float_frame
|
||||
|
||||
slc = slice(0, 4, 1)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.take(slc, axis=0)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.take(slc, axis=1)
|
||||
|
||||
def test_take(self, float_frame):
|
||||
# homogeneous
|
||||
order = [3, 1, 2, 0]
|
||||
for df in [float_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["D", "B", "C", "A"]]
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
# negative indices
|
||||
order = [2, 1, -1]
|
||||
for df in [float_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["C", "B", "D"]]
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
# illegal indices
|
||||
msg = "indices are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, 30], axis=0)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, -31], axis=0)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, 5], axis=1)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, -5], axis=1)
|
||||
|
||||
def test_take_mixed_type(self, float_string_frame):
|
||||
# mixed-dtype
|
||||
order = [4, 1, 2, 0, 3]
|
||||
for df in [float_string_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# negative indices
|
||||
order = [4, 1, -2]
|
||||
for df in [float_string_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["foo", "B", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
|
||||
# by dtype
|
||||
order = [1, 2, 0, 3]
|
||||
for df in [mixed_float_frame, mixed_int_frame]:
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["B", "C", "A", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,444 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SettingWithCopyError
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def four_level_index_dataframe():
|
||||
arr = np.array(
|
||||
[
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
|
||||
]
|
||||
)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
|
||||
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
|
||||
names=["one", "two", "three", "four"],
|
||||
)
|
||||
return DataFrame(arr, index=index, columns=list("ABCDE"))
|
||||
|
||||
|
||||
class TestXS:
|
||||
def test_xs(
|
||||
self, float_frame, datetime_frame, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
float_frame_orig = float_frame.copy()
|
||||
idx = float_frame.index[5]
|
||||
xs = float_frame.xs(idx)
|
||||
for item, value in xs.items():
|
||||
if np.isnan(value):
|
||||
assert np.isnan(float_frame[item][idx])
|
||||
else:
|
||||
assert value == float_frame[item][idx]
|
||||
|
||||
# mixed-type xs
|
||||
test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
|
||||
frame = DataFrame(test_data)
|
||||
xs = frame.xs("1")
|
||||
assert xs.dtype == np.object_
|
||||
assert xs["A"] == 1
|
||||
assert xs["B"] == "1"
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')")
|
||||
):
|
||||
datetime_frame.xs(datetime_frame.index[0] - BDay())
|
||||
|
||||
# xs get column
|
||||
series = float_frame.xs("A", axis=1)
|
||||
expected = float_frame["A"]
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
# view is returned if possible
|
||||
series = float_frame.xs("A", axis=1)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
series[:] = 5
|
||||
if using_copy_on_write:
|
||||
# but with CoW the view shouldn't propagate mutations
|
||||
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
|
||||
assert not (expected == 5).all()
|
||||
else:
|
||||
assert (expected == 5).all()
|
||||
|
||||
def test_xs_corner(self):
|
||||
# pathological mixed-type reordering case
|
||||
df = DataFrame(index=[0])
|
||||
df["A"] = 1.0
|
||||
df["B"] = "foo"
|
||||
df["C"] = 2.0
|
||||
df["D"] = "bar"
|
||||
df["E"] = 3.0
|
||||
|
||||
xs = df.xs(0)
|
||||
exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0)
|
||||
tm.assert_series_equal(xs, exp)
|
||||
|
||||
# no columns but Index(dtype=object)
|
||||
df = DataFrame(index=["a", "b", "c"])
|
||||
result = df.xs("a")
|
||||
expected = Series([], name="a", dtype=np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_duplicates(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 2)),
|
||||
index=["b", "b", "c", "b", "a"],
|
||||
)
|
||||
|
||||
cross = df.xs("c")
|
||||
exp = df.iloc[2]
|
||||
tm.assert_series_equal(cross, exp)
|
||||
|
||||
def test_xs_keep_level(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"day": {0: "sat", 1: "sun"},
|
||||
"flavour": {0: "strawberry", 1: "strawberry"},
|
||||
"sales": {0: 10, 1: 12},
|
||||
"year": {0: 2008, 1: 2008},
|
||||
}
|
||||
).set_index(["year", "flavour", "day"])
|
||||
result = df.xs("sat", level="day", drop_level=False)
|
||||
expected = df[:1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_view(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# in 0.14 this will return a view if possible a copy otherwise, but
|
||||
# this is numpy dependent
|
||||
|
||||
dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
|
||||
df_orig = dm.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
dm.xs(2)[:] = 20
|
||||
tm.assert_frame_equal(dm, df_orig)
|
||||
elif using_array_manager:
|
||||
# INFO(ArrayManager) with ArrayManager getting a row as a view is
|
||||
# not possible
|
||||
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
dm.xs(2)[:] = 20
|
||||
assert not (dm.xs(2) == 20).any()
|
||||
else:
|
||||
with tm.raises_chained_assignment_error():
|
||||
dm.xs(2)[:] = 20
|
||||
assert (dm.xs(2) == 20).all()
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_doc_example(self):
|
||||
# TODO: more descriptive name
|
||||
# based on example in advanced.rst
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = list(zip(*arrays))
|
||||
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 8)),
|
||||
index=["A", "B", "C"],
|
||||
columns=index,
|
||||
)
|
||||
|
||||
result = df.xs(("one", "bar"), level=("second", "first"), axis=1)
|
||||
|
||||
expected = df.iloc[:, [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_integer_key(self):
|
||||
# see GH#2107
|
||||
dates = range(20111201, 20111205)
|
||||
ids = list("abcde")
|
||||
index = MultiIndex.from_product([dates, ids], names=["date", "secid"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), 3)),
|
||||
index,
|
||||
["X", "Y", "Z"],
|
||||
)
|
||||
|
||||
result = df.xs(20111201, level="date")
|
||||
expected = df.loc[20111201, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_level(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs("two", level="second")
|
||||
expected = df[df.index.get_level_values(1) == "two"]
|
||||
expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_level_eq_2(self):
|
||||
arr = np.random.default_rng(2).standard_normal((3, 5))
|
||||
index = MultiIndex(
|
||||
levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
|
||||
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
|
||||
)
|
||||
df = DataFrame(arr, index=index)
|
||||
expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
|
||||
result = df.xs("c", level=2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_setting_with_copy_error(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
):
|
||||
# this is a copy in 0.14
|
||||
df = multiindex_dataframe_random_data
|
||||
df_orig = df.copy()
|
||||
result = df.xs("two", level="second")
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
result[:] = 10
|
||||
else:
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
def test_xs_setting_with_copy_error_multiple(
|
||||
self, four_level_index_dataframe, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# this is a copy in 0.14
|
||||
df = four_level_index_dataframe
|
||||
df_orig = df.copy()
|
||||
result = df.xs(("a", 4), level=["one", "four"])
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
result[:] = 10
|
||||
else:
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
|
||||
def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data):
|
||||
# see GH#13719
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = concat([frame] * 2)
|
||||
assert df.index.is_unique is False
|
||||
expected = concat([frame.xs("one", level="second")] * 2)
|
||||
|
||||
if isinstance(key, list):
|
||||
result = df.xs(tuple(key), level=level)
|
||||
else:
|
||||
result = df.xs(key, level=level)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_missing_values_in_index(self):
|
||||
# see GH#6574
|
||||
# missing values in returned index should be preserved
|
||||
acc = [
|
||||
("a", "abcde", 1),
|
||||
("b", "bbcde", 2),
|
||||
("y", "yzcde", 25),
|
||||
("z", "xbcde", 24),
|
||||
("z", None, 26),
|
||||
("z", "zbcde", 25),
|
||||
("z", "ybcde", 26),
|
||||
]
|
||||
df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
|
||||
expected = DataFrame(
|
||||
{"cnt": [24, 26, 25, 26]},
|
||||
index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
|
||||
)
|
||||
|
||||
result = df.xs("z", level="a1")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"key, level, exp_arr, exp_index",
|
||||
[
|
||||
("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
|
||||
("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
|
||||
],
|
||||
)
|
||||
def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index):
|
||||
# see GH#2903
|
||||
arr = np.random.default_rng(2).standard_normal((4, 4))
|
||||
index = MultiIndex(
|
||||
levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(arr, columns=index)
|
||||
result = df.xs(key, level=level, axis=1)
|
||||
expected = DataFrame(exp_arr(arr), columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
lambda df: df.xs(("a", 4), level=["one", "four"]),
|
||||
lambda df: df.xs("a").xs(4, level="four"),
|
||||
],
|
||||
)
|
||||
def test_xs_level_multiple(self, indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list("ABCDE")
|
||||
)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
|
||||
)
|
||||
def test_xs_level0(self, indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
|
||||
codes=[[0, 1], [0, 1], [1, 0]],
|
||||
names=["two", "three", "four"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list("ABCDE")
|
||||
)
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_values(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two")).values
|
||||
expected = df.values[4]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_xs_loc_equality(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two"))
|
||||
expected = df.loc[("bar", "two")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_IndexSlice_argument_not_implemented(self, frame_or_series):
|
||||
# GH#35301
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
|
||||
obj = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
|
||||
if frame_or_series is Series:
|
||||
obj = obj[0]
|
||||
|
||||
expected = obj.iloc[-2:].droplevel(0)
|
||||
|
||||
result = obj.xs(IndexSlice[("foo", "qux", 0), :])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[IndexSlice[("foo", "qux", 0), :]]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_xs_levels_raises(self, frame_or_series):
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
if frame_or_series is Series:
|
||||
obj = obj["A"]
|
||||
|
||||
msg = "Index must be a MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.xs(0, level="as")
|
||||
|
||||
def test_xs_multiindex_droplevel_false(self):
|
||||
# GH#19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
df = DataFrame([[1, 2, 3]], columns=mi)
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
expected = DataFrame(
|
||||
[[1, 2]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_droplevel_false(self):
|
||||
# GH#19056
|
||||
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
expected = DataFrame({"a": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_droplevel_false_view(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#37832
|
||||
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
# check that result still views the same data as df
|
||||
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 2
|
||||
if using_copy_on_write:
|
||||
# with copy on write the subset is never modified
|
||||
expected = DataFrame({"a": [1]})
|
||||
else:
|
||||
# modifying original df also modifies result when having a single block
|
||||
expected = DataFrame({"a": [2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with mixed dataframe, modifying the parent doesn't modify result
|
||||
# TODO the "split" path behaves differently here as with single block
|
||||
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
df.iloc[0, 0] = 2
|
||||
if using_copy_on_write:
|
||||
# with copy on write the subset is never modified
|
||||
expected = DataFrame({"a": [1]})
|
||||
elif using_array_manager:
|
||||
# Here the behavior is consistent
|
||||
expected = DataFrame({"a": [2]})
|
||||
else:
|
||||
# FIXME: iloc does not update the array inplace using
|
||||
# "split" path
|
||||
expected = DataFrame({"a": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_list_indexer_droplevel_false(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")])
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
|
||||
with pytest.raises(KeyError, match="y"):
|
||||
df.xs(("x", "y"), drop_level=False, axis=1)
|
@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) DataFrame methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.series.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user