venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,40 @@
|
||||
""" common utilities """
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Literal,
|
||||
)
|
||||
|
||||
|
||||
def _mklbl(prefix: str, n: int):
|
||||
return [f"{prefix}{i}" for i in range(n)]
|
||||
|
||||
|
||||
def check_indexing_smoketest_or_raises(
|
||||
obj,
|
||||
method: Literal["iloc", "loc"],
|
||||
key: Any,
|
||||
axes: Literal[0, 1] | None = None,
|
||||
fails=None,
|
||||
) -> None:
|
||||
if axes is None:
|
||||
axes_list = [0, 1]
|
||||
else:
|
||||
assert axes in [0, 1]
|
||||
axes_list = [axes]
|
||||
|
||||
for ax in axes_list:
|
||||
if ax < obj.ndim:
|
||||
# create a tuple accessor
|
||||
new_axes = [slice(None)] * obj.ndim
|
||||
new_axes[ax] = key
|
||||
axified = tuple(new_axes)
|
||||
try:
|
||||
getattr(obj, method).__getitem__(axified)
|
||||
except (IndexError, TypeError, KeyError) as detail:
|
||||
# if we are in fails, the ok, otherwise raise it
|
||||
if fails is not None:
|
||||
if isinstance(detail, fails):
|
||||
return
|
||||
raise
|
@ -0,0 +1,127 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_ints():
|
||||
return Series(np.random.default_rng(2).random(4), index=np.arange(0, 8, 2))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_ints():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=np.arange(0, 8, 2),
|
||||
columns=np.arange(0, 12, 3),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_uints():
|
||||
return Series(
|
||||
np.random.default_rng(2).random(4),
|
||||
index=Index(np.arange(0, 8, 2, dtype=np.uint64)),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_uints():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=Index(range(0, 8, 2), dtype=np.uint64),
|
||||
columns=Index(range(0, 12, 3), dtype=np.uint64),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_labels():
|
||||
return Series(np.random.default_rng(2).standard_normal(4), index=list("abcd"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_labels():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_ts():
|
||||
return Series(
|
||||
np.random.default_rng(2).standard_normal(4),
|
||||
index=date_range("20130101", periods=4),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_ts():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=date_range("20130101", periods=4),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_floats():
|
||||
return Series(
|
||||
np.random.default_rng(2).random(4),
|
||||
index=Index(range(0, 8, 2), dtype=np.float64),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_floats():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=Index(range(0, 8, 2), dtype=np.float64),
|
||||
columns=Index(range(0, 12, 3), dtype=np.float64),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_mixed():
|
||||
return Series(np.random.default_rng(2).standard_normal(4), index=[2, 4, "null", 8])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_mixed():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), index=[2, 4, "null", 8]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_empty():
|
||||
return DataFrame()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_empty():
|
||||
return Series(dtype=object)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_multi():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
columns=MultiIndex.from_product([[5, 6], [7, 8]]),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_multi():
|
||||
return Series(
|
||||
np.random.default_rng(2).random(4),
|
||||
index=MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,225 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.fixture
|
||||
def series_with_interval_index(self):
|
||||
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl):
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
expected = ser.iloc[:3]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[:3])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[:2.5])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5])
|
||||
if indexer_sl is tm.loc:
|
||||
tm.assert_series_equal(expected, ser.loc[-1:3])
|
||||
|
||||
expected = ser.iloc[1:4]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
|
||||
|
||||
expected = ser.iloc[2:5]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
|
||||
|
||||
@pytest.mark.parametrize("direction", ["increasing", "decreasing"])
|
||||
def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl):
|
||||
tpls = [(0, 1), (2, 3), (4, 5)]
|
||||
if direction == "decreasing":
|
||||
tpls = tpls[::-1]
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
ser = Series(list("abc"), idx)
|
||||
|
||||
for key, expected in zip(idx.left, ser):
|
||||
if idx.closed_left:
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
indexer_sl(ser)[key]
|
||||
|
||||
for key, expected in zip(idx.right, ser):
|
||||
if idx.closed_right:
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
indexer_sl(ser)[key]
|
||||
|
||||
for key, expected in zip(idx.mid, ser):
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
|
||||
def test_getitem_non_matching(self, series_with_interval_index, indexer_sl):
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
# this is a departure from our current
|
||||
# indexing scheme, but simpler
|
||||
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
|
||||
indexer_sl(ser)[[-1, 3, 4, 5]]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
|
||||
indexer_sl(ser)[[-1, 3]]
|
||||
|
||||
def test_loc_getitem_large_series(self, monkeypatch):
|
||||
size_cutoff = 20
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
ser = Series(
|
||||
np.arange(size_cutoff),
|
||||
index=IntervalIndex.from_breaks(np.arange(size_cutoff + 1)),
|
||||
)
|
||||
|
||||
result1 = ser.loc[:8]
|
||||
result2 = ser.loc[0:8]
|
||||
result3 = ser.loc[0:8:1]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
def test_loc_getitem_frame(self):
|
||||
# CategoricalIndex with IntervalIndex categories
|
||||
df = DataFrame({"A": range(10)})
|
||||
ser = pd.cut(df.A, 5)
|
||||
df["B"] = ser
|
||||
df = df.set_index("B")
|
||||
|
||||
result = df.loc[4]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match="10"):
|
||||
df.loc[10]
|
||||
|
||||
# single list-like
|
||||
result = df.loc[[4]]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-unique
|
||||
result = df.loc[[4, 5]]
|
||||
expected = df.take([4, 5, 4, 5])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
r"None of \[Index\(\[10\], dtype='object', name='B'\)\] "
|
||||
r"are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[[10]]
|
||||
|
||||
# partial missing
|
||||
with pytest.raises(KeyError, match=r"\[10\] not in index"):
|
||||
df.loc[[10, 4]]
|
||||
|
||||
def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
|
||||
# GH#41831
|
||||
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
key = index[:-1]
|
||||
|
||||
obj = frame_or_series(range(2), index=index)
|
||||
if frame_or_series is DataFrame and indexer_sl is tm.setitem:
|
||||
obj = obj.T
|
||||
|
||||
result = indexer_sl(obj)[key]
|
||||
expected = obj
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_setitem_interval_with_slice(self):
|
||||
# GH#54722
|
||||
ii = IntervalIndex.from_breaks(range(4, 15))
|
||||
ser = Series(range(10), index=ii)
|
||||
|
||||
orig = ser.copy()
|
||||
|
||||
# This should be a no-op (used to raise)
|
||||
ser.loc[1:3] = 20
|
||||
tm.assert_series_equal(ser, orig)
|
||||
|
||||
ser.loc[6:8] = 19
|
||||
orig.iloc[1:4] = 19
|
||||
tm.assert_series_equal(ser, orig)
|
||||
|
||||
ser2 = Series(range(5), index=ii[::2])
|
||||
orig2 = ser2.copy()
|
||||
|
||||
# this used to raise
|
||||
ser2.loc[6:8] = 22 # <- raises on main, sets on branch
|
||||
orig2.iloc[1] = 22
|
||||
tm.assert_series_equal(ser2, orig2)
|
||||
|
||||
ser2.loc[5:7] = 21
|
||||
orig2.iloc[:2] = 21
|
||||
tm.assert_series_equal(ser2, orig2)
|
||||
|
||||
|
||||
class TestIntervalIndexInsideMultiIndex:
|
||||
def test_mi_intervalindex_slicing_with_scalar(self):
|
||||
# GH#27456
|
||||
ii = IntervalIndex.from_arrays(
|
||||
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP"
|
||||
)
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]),
|
||||
pd.Index(
|
||||
["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"]
|
||||
),
|
||||
ii,
|
||||
]
|
||||
)
|
||||
|
||||
idx.names = ["Item", "RID", "MP"]
|
||||
df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]})
|
||||
df.index = idx
|
||||
|
||||
query_df = DataFrame(
|
||||
{
|
||||
"Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"],
|
||||
"RID": ["RID1", "RID1", "RID1", "RID2", "RID2"],
|
||||
"MP": [0.2, 1.5, 1.6, 11.1, 10.9],
|
||||
}
|
||||
)
|
||||
|
||||
query_df = query_df.sort_index()
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP])
|
||||
query_df.index = idx
|
||||
result = df.value.loc[query_df.index]
|
||||
|
||||
# the IntervalIndex level is indexed with floats, which map to
|
||||
# the intervals containing them. Matching the behavior we would get
|
||||
# with _only_ an IntervalIndex, we get an IntervalIndex level back.
|
||||
sliced_level = ii.take([0, 1, 1, 3, 2])
|
||||
expected_index = pd.MultiIndex.from_arrays(
|
||||
[idx.get_level_values(0), idx.get_level_values(1), sliced_level]
|
||||
)
|
||||
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base",
|
||||
[101, 1010],
|
||||
)
|
||||
def test_reindex_behavior_with_interval_index(self, base):
|
||||
# GH 51826
|
||||
|
||||
ser = Series(
|
||||
range(base),
|
||||
index=IntervalIndex.from_arrays(range(base), range(1, base + 1)),
|
||||
)
|
||||
expected_result = Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)
|
||||
result = ser.reindex(index=[np.nan, 1.0])
|
||||
tm.assert_series_equal(result, expected_result)
|
@ -0,0 +1,229 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.fixture
|
||||
def series_with_interval_index(self):
|
||||
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_loc_with_interval(self, series_with_interval_index, indexer_sl):
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
expected = 0
|
||||
result = indexer_sl(ser)[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = ser.iloc[3:5]
|
||||
result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing or not exact
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
|
||||
indexer_sl(ser)[Interval(3, 5, closed="left")]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
indexer_sl(ser)[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
|
||||
):
|
||||
indexer_sl(ser)[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
|
||||
indexer_sl(ser)[Interval(5, 6)]
|
||||
|
||||
def test_loc_with_scalar(self, series_with_interval_index, indexer_sl):
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
assert indexer_sl(ser)[1] == 0
|
||||
assert indexer_sl(ser)[1.5] == 1
|
||||
assert indexer_sl(ser)[2] == 1
|
||||
|
||||
expected = ser.iloc[1:4]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
|
||||
|
||||
expected = ser.iloc[[1, 1, 2, 1]]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]])
|
||||
|
||||
expected = ser.iloc[2:5]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
|
||||
|
||||
def test_loc_with_slices(self, series_with_interval_index, indexer_sl):
|
||||
# loc with slices:
|
||||
# - Interval objects: only works with exact matches
|
||||
# - scalars: only works for non-overlapping, monotonic intervals,
|
||||
# and start/stop select location based on the interval that
|
||||
# contains them:
|
||||
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
# slice of interval
|
||||
|
||||
expected = ser.iloc[:3]
|
||||
result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser.iloc[3:]
|
||||
result = indexer_sl(ser)[Interval(3, 4) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = "Interval objects are not currently supported"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
indexer_sl(ser)[Interval(3, 6) :]
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
indexer_sl(ser)[Interval(3, 4, closed="left") :]
|
||||
|
||||
def test_slice_step_ne1(self, series_with_interval_index):
|
||||
# GH#31658 slice of scalar with step != 1
|
||||
ser = series_with_interval_index.copy()
|
||||
expected = ser.iloc[0:4:2]
|
||||
|
||||
result = ser[0:4:2]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result2 = ser[0:4][::2]
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
def test_slice_float_start_stop(self, series_with_interval_index):
|
||||
# GH#31658 slicing with integers is positional, with floats is not
|
||||
# supported
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[1.5:9.5:2]
|
||||
|
||||
def test_slice_interval_step(self, series_with_interval_index):
|
||||
# GH#31658 allows for integer step!=1, not Interval step
|
||||
ser = series_with_interval_index.copy()
|
||||
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[0 : 4 : Interval(0, 1)]
|
||||
|
||||
def test_loc_with_overlap(self, indexer_sl):
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
# scalar
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = indexer_sl(ser)[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# interval
|
||||
expected = 0
|
||||
result = indexer_sl(ser)[Interval(1, 5)]
|
||||
assert expected == result
|
||||
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
indexer_sl(ser)[Interval(3, 5)]
|
||||
|
||||
msg = (
|
||||
r"None of \[IntervalIndex\(\[\(3, 5\]\], "
|
||||
r"dtype='interval\[int64, right\]'\)\] are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indexer_sl(ser)[[Interval(3, 5)]]
|
||||
|
||||
# slices with interval (only exact matches)
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = (
|
||||
"'can only get slices from an IntervalIndex if bounds are "
|
||||
"non-overlapping and all monotonic increasing or decreasing'"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)]
|
||||
|
||||
if indexer_sl is tm.loc:
|
||||
# slices with scalar raise for overlapping intervals
|
||||
# TODO KeyError is the appropriate error?
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[1:4]
|
||||
|
||||
def test_non_unique(self, indexer_sl):
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = indexer_sl(ser)[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = indexer_sl(ser)[[Interval(1, 3)]]
|
||||
expected = ser.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_non_unique_moar(self, indexer_sl):
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
expected = ser.iloc[[0, 1]]
|
||||
result = indexer_sl(ser)[Interval(1, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[Interval(1, 3) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser.iloc[[0, 1]]
|
||||
result = indexer_sl(ser)[[Interval(1, 3)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_loc_getitem_missing_key_error_message(
|
||||
self, frame_or_series, series_with_interval_index
|
||||
):
|
||||
# GH#27365
|
||||
ser = series_with_interval_index.copy()
|
||||
obj = frame_or_series(ser)
|
||||
with pytest.raises(KeyError, match=r"\[6\]"):
|
||||
obj.loc[[4, 5, 6]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"intervals",
|
||||
[
|
||||
([Interval(-np.inf, 0.0), Interval(0.0, 1.0)]),
|
||||
([Interval(-np.inf, -2.0), Interval(-2.0, -1.0)]),
|
||||
([Interval(-1.0, 0.0), Interval(0.0, np.inf)]),
|
||||
([Interval(1.0, 2.0), Interval(2.0, np.inf)]),
|
||||
],
|
||||
)
|
||||
def test_repeating_interval_index_with_infs(intervals):
|
||||
# GH 46658
|
||||
|
||||
interval_index = Index(intervals * 51)
|
||||
|
||||
expected = np.arange(1, 102, 2, dtype=np.intp)
|
||||
result = interval_index.get_indexer_for([intervals[1]])
|
||||
|
||||
tm.assert_equal(result, expected)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas.errors import SettingWithCopyError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write):
|
||||
# Inplace ops, originally from:
|
||||
# https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
|
||||
events = {
|
||||
("eyes", "left"): a,
|
||||
("eyes", "right"): b,
|
||||
("ears", "left"): c,
|
||||
("ears", "right"): d,
|
||||
}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
|
||||
zed = DataFrame(events, index=["a", "b"], columns=multiind)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
elif warn_copy_on_write:
|
||||
with tm.assert_produces_warning(None):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.assert_produces_warning(None):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
|
||||
def test_cache_updating(using_copy_on_write, warn_copy_on_write):
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.default_rng(2).random((10, 3))
|
||||
df = DataFrame(a, columns=["x", "y", "z"])
|
||||
df_original = df.copy()
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["z"].iloc[0] = 1.0
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
|
||||
else:
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), "z"] = 2
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 2
|
||||
|
||||
|
||||
def test_indexer_caching(monkeypatch):
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
size_cutoff = 20
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)])
|
||||
s = Series(np.zeros(size_cutoff), index=index)
|
||||
|
||||
# setitem
|
||||
s[s == 0] = 1
|
||||
expected = Series(np.ones(size_cutoff), index=index)
|
||||
tm.assert_series_equal(s, expected)
|
@ -0,0 +1,50 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
Series,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(["a", "a", "a", "b", "b"])
|
||||
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
|
||||
s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc["a", Period("2012-01")]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc["a", datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_multiindex_datetime_columns():
|
||||
# GH35015, using datetime as column indices raises exception
|
||||
|
||||
mi = MultiIndex.from_tuples(
|
||||
[(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"]
|
||||
)
|
||||
|
||||
df = DataFrame([], columns=mi)
|
||||
|
||||
expected_df = DataFrame(
|
||||
[],
|
||||
columns=MultiIndex.from_arrays(
|
||||
[[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"]
|
||||
),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
@ -0,0 +1,410 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"access_method",
|
||||
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"level1_value, expected",
|
||||
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
|
||||
)
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"])
|
||||
ser = Series([1, 2, 3], index=mi)
|
||||
expected.index.name = "A"
|
||||
|
||||
result = access_method(ser, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0_value", ["D", "A"])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
arr = np.random.default_rng(2).standard_normal((len(index), 1))
|
||||
df = DataFrame(arr, index=index, columns=["val"])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != "A":
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val["A"]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val["X"]
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(
|
||||
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer_sl(s)[2000, 3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer_sl
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer_sl(s)[2000, 3, 10]
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error,expected_error_msg",
|
||||
[
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
|
||||
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
|
||||
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
|
||||
(
|
||||
lambda s: s.iloc[len(s)],
|
||||
IndexError,
|
||||
"single positional indexer is out-of-bounds",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
indexer,
|
||||
expected_error,
|
||||
expected_error_msg,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df["foo", "one"].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error_msg",
|
||||
[
|
||||
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df["foobar"], r"^'foobar'$"),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_tuple_string_column_names():
|
||||
# GH#50372
|
||||
mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")])
|
||||
df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi)
|
||||
df["single_index"] = 0
|
||||
|
||||
df_flat = df.copy()
|
||||
df_flat.columns = df_flat.columns.to_flat_index()
|
||||
df_flat["new_single_index"] = 0
|
||||
|
||||
result = df_flat[[("a", "aa"), "new_single_index"]]
|
||||
expected = DataFrame(
|
||||
[[0, 0], [1, 0], [2, 0]], columns=Index([("a", "aa"), "new_single_index"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
|
||||
df.columns = [
|
||||
["level1 item1", "level1 item2"],
|
||||
["", "level2 item2"],
|
||||
["level3 item1", "level3 item2"],
|
||||
]
|
||||
|
||||
result = df["level1 item1"]
|
||||
expected = DataFrame(
|
||||
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_slice",
|
||||
[
|
||||
(lambda df: df["foo"], slice(3)),
|
||||
(lambda df: df["bar"], slice(3, 5)),
|
||||
(lambda df: df.loc[:, "bar"], slice(3, 5)),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mixed_depth_get():
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
|
||||
|
||||
result = df["a"]
|
||||
expected = df["a", "", ""].rename("a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["routine1", "result1"]
|
||||
expected = df["routine1", "result1", ""]
|
||||
expected = expected.rename(("routine1", "result1"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_nan_multiindex(nulls_fixture):
|
||||
# GH#29751
|
||||
# loc on a multiindex containing nan values
|
||||
n = nulls_fixture # for code readability
|
||||
cols = ["a", "b", "c"]
|
||||
df = DataFrame(
|
||||
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
|
||||
columns=cols,
|
||||
).set_index(["a", "b"])
|
||||
df["c"] = df["c"].astype("int64")
|
||||
|
||||
idx = (21, n)
|
||||
result = df.loc[:idx]
|
||||
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[idx:]
|
||||
expected = DataFrame(
|
||||
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
|
||||
).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx1, idx2 = (21, n), (31, n)
|
||||
result = df.loc[idx1:idx2]
|
||||
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected",
|
||||
[
|
||||
(
|
||||
(["b"], ["bar", np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["a", "b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", np.nan)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], ["bar"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2], [5]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar")]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], [np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[3], [6]],
|
||||
columns=MultiIndex(
|
||||
codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_nan_cols_multiindex(
|
||||
indexer,
|
||||
expected,
|
||||
nulls_fixture,
|
||||
):
|
||||
# Slicing MultiIndex including levels with nan values, for more information
|
||||
# see GH#25154
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", nulls_fixture)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
|
||||
result = df.loc[:, indexer]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
|
||||
index = ["h1", "h3", "h5"]
|
||||
columns = MultiIndex(
|
||||
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=["main", "sub"],
|
||||
)
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
|
||||
)
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(["h1", "h3", "h5"])
|
||||
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
|
||||
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
|
||||
result = df["A"]["A1"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame(
|
||||
[["d", 4, 4], ["e", 5, 5]],
|
||||
index=Index(["B2", "B2"], name="sub"),
|
||||
columns=["h1", "h3", "h5"],
|
||||
).T
|
||||
result = df["A"]["B2"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_empty_slice():
|
||||
# GH 15454
|
||||
df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
|
||||
result = df[[]]
|
||||
expected = DataFrame(
|
||||
index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_empty_multiindex():
|
||||
# GH#36936
|
||||
arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]]
|
||||
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
|
||||
df = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
|
||||
# loc on empty multiindex == loc with False mask
|
||||
empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index
|
||||
result = df.loc[empty_multiindex, :]
|
||||
expected = df.loc[[False] * len(df.index), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# replacing value with loc on empty multiindex
|
||||
df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5
|
||||
result = df
|
||||
expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((3, 3))
|
||||
return DataFrame(
|
||||
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
(
|
||||
lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs("b", drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.default_rng(2).standard_normal((4, 3))
|
||||
df = DataFrame(
|
||||
arr,
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
|
||||
)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
|
||||
)
|
||||
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ("Ohio", "Red")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [
|
||||
["str00", "str01"],
|
||||
["str10", "str11"],
|
||||
["str20", "srt21"],
|
||||
["str30", "str31"],
|
||||
["str40", "str41"],
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
|
||||
)
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, indexes, values, expected_k",
|
||||
[
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
|
||||
],
|
||||
)
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=["i", "j", "k"])
|
||||
df = df.set_index(["i", "j"])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df["k"] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def m():
|
||||
return 5
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def n():
|
||||
return 100
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cols():
|
||||
return ["jim", "joe", "jolie", "joline", "jolia"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vals(n):
|
||||
vals = [
|
||||
np.random.default_rng(2).integers(0, 10, n),
|
||||
np.random.default_rng(2).choice(list("abcdefghij"), n),
|
||||
np.random.default_rng(2).choice(
|
||||
pd.date_range("20141009", periods=10).tolist(), n
|
||||
),
|
||||
np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n),
|
||||
np.random.default_rng(2).standard_normal(n),
|
||||
]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
return vals
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def keys(n, m, vals):
|
||||
# bunch of keys for testing
|
||||
keys = [
|
||||
np.random.default_rng(2).integers(0, 11, m),
|
||||
np.random.default_rng(2).choice(list("abcdefghijk"), m),
|
||||
np.random.default_rng(2).choice(
|
||||
pd.date_range("20141009", periods=11).tolist(), m
|
||||
),
|
||||
np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m),
|
||||
]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += [t[:-1] for t in vals[:: n // m]]
|
||||
return keys
|
||||
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
@pytest.fixture
|
||||
def df(vals, cols):
|
||||
return DataFrame(vals, columns=cols)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a(df):
|
||||
return pd.concat([df, df])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def b(df, cols):
|
||||
return df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
|
||||
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
|
||||
def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
|
||||
# GH7724, GH2646
|
||||
|
||||
frame = request.getfixturevalue(frame_fixture)
|
||||
if lexsort_depth == 0:
|
||||
df = frame.copy(deep=False)
|
||||
else:
|
||||
df = frame.sort_values(by=cols[:lexsort_depth])
|
||||
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index._lexsort_depth < lexsort_depth
|
||||
for key in keys:
|
||||
mask = np.ones(len(df), dtype=bool)
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[: i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[: i + 1] in mi.index
|
||||
right = df[mask].copy(deep=False)
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
|
||||
assert return_value is None
|
||||
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
return_value = right.set_index(cols[:-1], inplace=True)
|
||||
assert return_value is None
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(
|
||||
right["jolia"].values, name=right.index[0], index=["jolia"]
|
||||
)
|
||||
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
@ -0,0 +1,992 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
IndexingError,
|
||||
PerformanceWarning,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index)
|
||||
|
||||
|
||||
class TestMultiIndexLoc:
|
||||
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[("bar", "two"), "B"] = 5
|
||||
assert frame.loc[("bar", "two"), "B"] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = list(range(3))
|
||||
df.loc[("bar", "two"), 1] = 7
|
||||
assert df.loc[("bar", "two"), 1] == 7
|
||||
|
||||
def test_loc_getitem_general(self, any_real_numpy_dtype):
|
||||
# GH#2817
|
||||
dtype = any_real_numpy_dtype
|
||||
data = {
|
||||
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
"num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12},
|
||||
}
|
||||
df = DataFrame(data)
|
||||
df = df.astype({"col": dtype, "num": dtype})
|
||||
df = df.set_index(keys=["col", "num"])
|
||||
key = 4.0, 12
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
return_value = df.sort_index(inplace=True)
|
||||
assert return_value is None
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be float64 Index
|
||||
col_arr = np.array([4.0] * 3, dtype=dtype)
|
||||
year_arr = np.array([12] * 3, dtype=dtype)
|
||||
index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"])
|
||||
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_getitem_multiindex_missing_label_raises(self):
|
||||
# GH#21593
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
def test_loc_getitem_list_of_tuples_with_multiindex(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
ser = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = ser.reindex(ser.index[49:51])
|
||||
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype="float64",
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_labels(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j"], ["X", "X", "Y"]],
|
||||
)
|
||||
|
||||
# the first 2 rows
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc["i"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# 2nd (last) column
|
||||
expected = df.iloc[:, [2]].droplevel(0, axis=1)
|
||||
result = df.loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# bottom right corner
|
||||
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
|
||||
result = df.loc["j"].loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with a tuple
|
||||
expected = df.iloc[[0, 1]]
|
||||
result = df.loc[("i", "X")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_ints(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc[4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_missing_label_raises(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
|
||||
def test_loc_multiindex_list_missing_label(self, key, pos):
|
||||
# GH 27148 - lists with missing labels _do_ raise
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[key]
|
||||
|
||||
def test_loc_multiindex_too_many_dims_raises(self):
|
||||
# GH 14885
|
||||
s = Series(
|
||||
range(8),
|
||||
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
|
||||
s.loc["a", "b"]
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
|
||||
s.loc["a", "d", "g"]
|
||||
with pytest.raises(IndexingError, match="Too many indexers"):
|
||||
s.loc["a", "d", "g", "j"]
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ["Attribute" + str(i) for i in range(1)]
|
||||
attribute_values = ["Value" + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
|
||||
)
|
||||
|
||||
expected = df.loc[([1, 2],), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.loc[:, "a":"c"]
|
||||
|
||||
result = s.loc[0:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer getitem with list of indexers skips wrong element
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(len(single_level)),
|
||||
index=single_level,
|
||||
)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
|
||||
)
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys)
|
||||
)
|
||||
if indexer_type_1 is set or indexer_type_2 is set:
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
df.loc[indexer, "Data"]
|
||||
|
||||
return
|
||||
else:
|
||||
result = df.loc[indexer, "Data"]
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = frame_or_series([1, 2], index=mi)
|
||||
obj.loc[("a",)] = 0
|
||||
expected = frame_or_series([0, 2], index=mi)
|
||||
tm.assert_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize("indexer", [("a",), ("a")])
|
||||
def test_multiindex_one_dimensional_tuple_columns(self, indexer):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = DataFrame([1, 2], index=mi)
|
||||
obj.loc[indexer, :] = 0
|
||||
expected = DataFrame([0, 2], index=mi)
|
||||
tm.assert_frame_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
|
||||
)
|
||||
def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
|
||||
# GH#39147
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
|
||||
df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
|
||||
df.loc[indexer, ["c", "d"]] = 1.0
|
||||
expected = DataFrame(
|
||||
[[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
|
||||
index=mi,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_sorted_multiindex_after_union(self):
|
||||
# GH#44752
|
||||
midx = MultiIndex.from_product(
|
||||
[pd.date_range("20110101", periods=2), Index(["a", "b"])]
|
||||
)
|
||||
ser1 = Series(1, index=midx)
|
||||
ser2 = Series(1, index=midx[:2])
|
||||
df = pd.concat([ser1, ser2], axis=1)
|
||||
expected = df.copy()
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({0: ser1, 1: ser2})
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_no_second_level_index(self):
|
||||
# GH#43599
|
||||
df = DataFrame(
|
||||
index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]),
|
||||
columns=["Val"],
|
||||
)
|
||||
res = df.loc[np.s_[:, "c", :]]
|
||||
expected = DataFrame(
|
||||
index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"]
|
||||
)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_multi_index_key_error(self):
|
||||
# GH 51892
|
||||
df = DataFrame(
|
||||
{
|
||||
(1, 2): ["a", "b", "c"],
|
||||
(1, 3): ["d", "e", "f"],
|
||||
(2, 2): ["g", "h", "i"],
|
||||
(2, 4): ["j", "k", "l"],
|
||||
}
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"(1, 4)"):
|
||||
df.loc[0, (1, 4)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, pos",
|
||||
[
|
||||
([], []), # empty ok
|
||||
(["A"], slice(3)),
|
||||
(["A", "D"], []), # "D" isn't present -> raise
|
||||
(["D", "E"], []), # no values found -> raise
|
||||
(["D"], []), # same, with single item list: GH 27148
|
||||
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
|
||||
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
|
||||
)
|
||||
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
|
||||
expected = ser.iloc[pos]
|
||||
|
||||
if expected.size == 0 and indexer != []:
|
||||
with pytest.raises(KeyError, match=str(indexer)):
|
||||
ser.loc[indexer]
|
||||
elif indexer == (slice(None), ["foo", "bah"]):
|
||||
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
|
||||
with pytest.raises(KeyError, match="'bah'"):
|
||||
ser.loc[indexer]
|
||||
else:
|
||||
result = ser.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 6)),
|
||||
index=range(5),
|
||||
columns=multi_index,
|
||||
)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame(
|
||||
[[np.mean, np.median], ["mean", "median"]],
|
||||
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
|
||||
index=["function", "name"],
|
||||
)
|
||||
result = df.loc["function", ("functs", "mean")]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.arange(10),
|
||||
"b": np.arange(10),
|
||||
"c": np.random.default_rng(2).standard_normal(10),
|
||||
"d": np.random.default_rng(2).standard_normal(10),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
|
||||
df.loc[("bar", "three"), "B"]
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[("bar", "three"), "B"] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[("bar", "three"), "B"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_setitem_single_column_slice():
|
||||
# case from https://github.com/pandas-dev/pandas/issues/27841
|
||||
df = DataFrame(
|
||||
"string",
|
||||
index=list("abcd"),
|
||||
columns=MultiIndex.from_product([["Main"], ("another", "one")]),
|
||||
)
|
||||
df["labels"] = "a"
|
||||
df.loc[:, "labels"] = df.index
|
||||
tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
|
||||
|
||||
# test with non-object block
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
index=range(4),
|
||||
columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
|
||||
)
|
||||
expected = df.copy()
|
||||
df.loc[:, "B"] = np.arange(4)
|
||||
expected.iloc[:, 2] = np.arange(4)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_nan_multiindex(using_infer_string):
|
||||
# GH 5286
|
||||
tups = [
|
||||
("Good Things", "C", np.nan),
|
||||
("Good Things", "R", np.nan),
|
||||
("Bad Things", "C", np.nan),
|
||||
("Bad Things", "T", np.nan),
|
||||
("Okay Things", "N", "B"),
|
||||
("Okay Things", "N", "D"),
|
||||
("Okay Things", "B", np.nan),
|
||||
("Okay Things", "D", np.nan),
|
||||
]
|
||||
df = DataFrame(
|
||||
np.ones((8, 4)),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
|
||||
)
|
||||
result = df.loc["Good Things"].loc["C"]
|
||||
expected = DataFrame(
|
||||
np.ones((1, 4)),
|
||||
index=Index(
|
||||
[np.nan],
|
||||
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
|
||||
name="u3",
|
||||
),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_period_string_indexing():
|
||||
# GH 9892
|
||||
a = pd.period_range("2013Q1", "2013Q4", freq="Q")
|
||||
i = (1111, 2222, 3333)
|
||||
idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
|
||||
df = DataFrame(
|
||||
index=idx,
|
||||
columns=(
|
||||
"OMS",
|
||||
"OMK",
|
||||
"RES",
|
||||
"DRIFT_IND",
|
||||
"OEVRIG_IND",
|
||||
"FIN_IND",
|
||||
"VARE_UD",
|
||||
"LOEN_UD",
|
||||
"FIN_UD",
|
||||
),
|
||||
)
|
||||
result = df.loc[("2013Q1", 1111), "OMS"]
|
||||
|
||||
alt = df.loc[(a[0], 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
# Because the resolution of the string matches, it is an exact lookup,
|
||||
# not a slice
|
||||
assert np.isnan(result)
|
||||
|
||||
alt = df.loc[("2013Q1", 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
|
||||
def test_loc_datetime_mask_slicing():
|
||||
# GH 16699
|
||||
dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
|
||||
m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
|
||||
df = DataFrame(
|
||||
data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
|
||||
)
|
||||
result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
|
||||
expected = Series(
|
||||
[3],
|
||||
name="C1",
|
||||
index=MultiIndex.from_tuples(
|
||||
[(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
|
||||
names=["Idx1", "Idx2"],
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_datetime_series_tuple_slicing():
|
||||
# https://github.com/pandas-dev/pandas/issues/35858
|
||||
date = pd.Timestamp("2000")
|
||||
ser = Series(
|
||||
1,
|
||||
index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
|
||||
name="c",
|
||||
)
|
||||
result = ser.loc[:, [date]]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_loc_with_mi_indexer():
|
||||
# https://github.com/pandas-dev/pandas/issues/35351
|
||||
df = DataFrame(
|
||||
data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples(
|
||||
[(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
|
||||
),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
|
||||
result = df.loc[idx, :]
|
||||
expected = DataFrame(
|
||||
[["a", 1], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_mi_with_level1_named_0():
|
||||
# GH#37194
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
|
||||
ser = Series(range(3), index=dti)
|
||||
df = ser.to_frame()
|
||||
df[1] = dti
|
||||
|
||||
df2 = df.set_index(0, append=True)
|
||||
assert df2.index.names == (None, 0)
|
||||
df2.index.get_loc(dti[0]) # smoke test
|
||||
|
||||
result = df2.loc[dti[0]]
|
||||
expected = df2.iloc[[0]].droplevel(None)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2 = df2[1]
|
||||
assert ser2.index.names == (None, 0)
|
||||
|
||||
result = ser2.loc[dti[0]]
|
||||
expected = ser2.iloc[[0]].droplevel(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_str_slice():
|
||||
# GH#15928
|
||||
df = DataFrame(
|
||||
[
|
||||
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
|
||||
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||||
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||||
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
|
||||
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
|
||||
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
|
||||
],
|
||||
columns="time,ticker,bid,ask".split(","),
|
||||
)
|
||||
df2 = df.set_index(["ticker", "time"]).sort_index()
|
||||
|
||||
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
|
||||
expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
|
||||
def test_3levels_leading_period_index():
|
||||
# GH#24091
|
||||
pi = pd.PeriodIndex(
|
||||
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
|
||||
name="datetime",
|
||||
freq="D",
|
||||
)
|
||||
lev2 = ["A", "A", "Z", "W"]
|
||||
lev3 = ["B", "C", "Q", "F"]
|
||||
mi = MultiIndex.from_arrays([pi, lev2, lev3])
|
||||
|
||||
ser = Series(range(4), index=mi, dtype=np.float64)
|
||||
result = ser.loc[(pi[0], "A", "B")]
|
||||
assert result == 0.0
|
||||
|
||||
|
||||
class TestKeyErrorsWithMultiIndex:
|
||||
def test_missing_keys_raises_keyerror(self):
|
||||
# GH#27420 KeyError, not TypeError
|
||||
df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
|
||||
df2 = df.set_index(["A", "B"])
|
||||
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df2.loc[(1, 6)]
|
||||
|
||||
def test_missing_key_raises_keyerror2(self):
|
||||
# GH#21168 KeyError, not "IndexingError: Too many indexers"
|
||||
ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
|
||||
|
||||
with pytest.raises(KeyError, match=r"\(0, 3\)"):
|
||||
ser.loc[0, 3]
|
||||
|
||||
def test_missing_key_combination(self):
|
||||
# GH: 19556
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
np.array(["a", "a", "b", "b"]),
|
||||
np.array(["1", "2", "2", "3"]),
|
||||
np.array(["c", "d", "c", "d"]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi)
|
||||
msg = r"\('b', '1', slice\(None, None, None\)\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[("b", "1", slice(None)), :]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.index.get_locs(("b", "1", slice(None)))
|
||||
with pytest.raises(KeyError, match=r"\('b', '1'\)"):
|
||||
df.loc[("b", "1"), :]
|
||||
|
||||
|
||||
def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
result = ser[2000, 5]
|
||||
expected = df.loc[2000, 5]["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_with_nan():
|
||||
# GH: 27104
|
||||
df = DataFrame(
|
||||
{"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
|
||||
).set_index(["ind1", "ind2"])
|
||||
result = df.loc[["a"]]
|
||||
expected = DataFrame(
|
||||
{"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["a"]
|
||||
expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_non_found_tuple():
|
||||
# GH: 25236
|
||||
df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
|
||||
["a", "b", "c"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
|
||||
df.loc[(2.0, 2.0, 3.0)]
|
||||
|
||||
|
||||
def test_get_loc_datetime_index():
|
||||
# GH#24263
|
||||
index = pd.date_range("2001-01-01", periods=100)
|
||||
mi = MultiIndex.from_arrays([index])
|
||||
# Check if get_loc matches for Index and MultiIndex
|
||||
assert mi.get_loc("2001-01") == slice(0, 31, None)
|
||||
assert index.get_loc("2001-01") == slice(0, 31, None)
|
||||
|
||||
loc = mi[::2].get_loc("2001-01")
|
||||
expected = index[::2].get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.repeat(2).get_loc("2001-01")
|
||||
expected = index.repeat(2).get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.append(mi).get_loc("2001-01")
|
||||
expected = index.append(index).get_loc("2001-01")
|
||||
# TODO: standardize return type for MultiIndex.get_loc
|
||||
tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
|
||||
|
||||
|
||||
def test_loc_setitem_indexer_differently_ordered():
|
||||
# GH#34603
|
||||
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
|
||||
|
||||
indexer = ("a", [1, 0])
|
||||
df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
|
||||
expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_index_differently_ordered_slice_none():
|
||||
# GH#31330
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6], [7, 8]],
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
result = df.loc[(slice(None), [2, 1]), :]
|
||||
expected = DataFrame(
|
||||
[[3, 4], [7, 8], [1, 2], [5, 6]],
|
||||
index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
|
||||
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
|
||||
# GH#40978
|
||||
df = DataFrame(
|
||||
[1] * 8,
|
||||
index=MultiIndex.from_tuples(
|
||||
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
|
||||
),
|
||||
columns=["a"],
|
||||
)
|
||||
result = df.loc[(slice(None), indexer), :]
|
||||
expected = DataFrame(
|
||||
[1] * 8,
|
||||
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
|
||||
columns=["a"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[df.index.isin(indexer, level=1), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_loc_getitem_drops_levels_for_one_row_dataframe():
|
||||
# GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
|
||||
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
|
||||
df = DataFrame({"d": [0]}, index=mi)
|
||||
expected = df.droplevel([0, 2])
|
||||
result = df.loc["x", :, "z"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser = Series([0], index=mi)
|
||||
result = ser.loc["x", :, "z"]
|
||||
expected = Series([0], index=Index(["y"], name="b"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_columns_loc_list_label_order():
|
||||
# GH 10710
|
||||
cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
|
||||
df = DataFrame(np.zeros((5, 6)), columns=cols)
|
||||
result = df.loc[:, ["B", "A"]]
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 4)),
|
||||
columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_partial_indexing_list_raises():
|
||||
# GH 13501
|
||||
frame = DataFrame(
|
||||
np.arange(12).reshape((4, 3)),
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
|
||||
)
|
||||
frame.index.names = ["key1", "key2"]
|
||||
frame.columns.names = ["state", "color"]
|
||||
with pytest.raises(KeyError, match="\\[2\\] not in index"):
|
||||
frame.loc[["b", 2], "Colorado"]
|
||||
|
||||
|
||||
def test_mi_indexing_list_nonexistent_raises():
|
||||
# GH 15452
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
|
||||
with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
|
||||
s.loc[["not", "found"]]
|
||||
|
||||
|
||||
def test_mi_add_cell_missing_row_non_unique():
|
||||
# GH 16018
|
||||
result = DataFrame(
|
||||
[[1, 2, 5, 6], [3, 4, 7, 8]],
|
||||
index=["a", "a"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
result.loc["c"] = -1
|
||||
result.loc["c", (1, "A")] = 3
|
||||
result.loc["d", (1, "A")] = 3
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 5.0, 6.0],
|
||||
[3.0, 4.0, 7.0, 8.0],
|
||||
[3.0, -1.0, -1, -1],
|
||||
[3.0, np.nan, np.nan, np.nan],
|
||||
],
|
||||
index=["a", "a", "c", "d"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_get_scalar_casting_to_float():
|
||||
# GH#41369
|
||||
df = DataFrame(
|
||||
{"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
|
||||
)
|
||||
result = df.loc[(3, 4), "b"]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
result = df.loc[[(3, 4)], "b"].iloc[0]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
|
||||
|
||||
def test_loc_empty_single_selector_with_names():
|
||||
# GH 19517
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
|
||||
s2 = Series(index=idx, dtype=np.float64)
|
||||
result = s2.loc["a"]
|
||||
expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_keyerror_rightmost_key_missing():
|
||||
# GH 20951
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [100, 100, 200, 200, 300, 300],
|
||||
"B": [10, 10, 20, 21, 31, 33],
|
||||
"C": range(6),
|
||||
}
|
||||
)
|
||||
df = df.set_index(["A", "B"])
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
df.loc[(100, 1)]
|
||||
|
||||
|
||||
def test_multindex_series_loc_with_tuple_label():
|
||||
# GH#43908
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
|
||||
ser = Series([1, 2], index=mi)
|
||||
result = ser.loc[(3, (4, 5))]
|
||||
assert result == 2
|
@ -0,0 +1,235 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.index as libindex
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.boolean import BooleanDtype
|
||||
|
||||
|
||||
class TestMultiIndexBasic:
|
||||
def test_multiindex_perf_warn(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.default_rng(2).random(4),
|
||||
}
|
||||
).set_index(["jim", "joe"])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(1, "z")]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0,)]
|
||||
|
||||
@pytest.mark.parametrize("offset", [-5, 5])
|
||||
def test_indexing_over_hashtable_size_cutoff(self, monkeypatch, offset):
|
||||
size_cutoff = 20
|
||||
n = size_cutoff + offset
|
||||
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
# GH 3588
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
result = df.set_index(["a", "b"], drop=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index(["R1", "R2", np.nan, "R4"], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_exclusive_nat_column_indexing(self):
|
||||
# GH 38025
|
||||
# test multi indexing when one column exclusively contains NaT values
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
df = df.set_index(["a", "b"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_nested_tuples_duplicates(self):
|
||||
# GH#30892
|
||||
|
||||
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
|
||||
idx = Index(["a", "a", "c"])
|
||||
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
|
||||
|
||||
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
|
||||
|
||||
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
|
||||
|
||||
df2 = df.copy(deep=True)
|
||||
df2.loc[(dti[0], "a"), "c2"] = 1.0
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df3 = df.copy(deep=True)
|
||||
df3.loc[[(dti[0], "a")], "c2"] = 1.0
|
||||
tm.assert_frame_equal(df3, expected)
|
||||
|
||||
def test_multiindex_with_datatime_level_preserves_freq(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/35563
|
||||
idx = Index(range(2), name="A")
|
||||
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
|
||||
mi = MultiIndex.from_product([idx, dti])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi)
|
||||
result = df.loc[0].index
|
||||
tm.assert_index_equal(result, dti)
|
||||
assert result.freq == dti.freq
|
||||
|
||||
def test_multiindex_complex(self):
|
||||
# GH#42145
|
||||
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
|
||||
non_complex_data = [3, 4, 5]
|
||||
result = DataFrame(
|
||||
{
|
||||
"x": complex_data,
|
||||
"y": non_complex_data,
|
||||
"z": non_complex_data,
|
||||
}
|
||||
)
|
||||
result.set_index(["x", "y"], inplace=True)
|
||||
expected = DataFrame(
|
||||
{"z": non_complex_data},
|
||||
index=MultiIndex.from_arrays(
|
||||
[complex_data, non_complex_data],
|
||||
names=("x", "y"),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rename_multiindex_with_duplicates(self):
|
||||
# GH 38015
|
||||
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
df = DataFrame(index=mi)
|
||||
df = df.rename(index={"A": "Apple"}, level=0)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
expected = DataFrame(index=mi2)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_align_multiindex_with_nan_overlap_only(self):
|
||||
# GH 38439
|
||||
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]])
|
||||
ser1 = Series([1, 2], index=mi1)
|
||||
ser2 = Series([1, 2], index=mi2)
|
||||
result1, result2 = ser1.align(ser2)
|
||||
|
||||
mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]])
|
||||
expected1 = Series([1.0, np.nan, 2.0], index=mi)
|
||||
expected2 = Series([np.nan, 2.0, 1.0], index=mi)
|
||||
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_series_align_multiindex_with_nan(self):
|
||||
# GH 38439
|
||||
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]])
|
||||
ser1 = Series([1, 2], index=mi1)
|
||||
ser2 = Series([1, 2], index=mi2)
|
||||
result1, result2 = ser1.align(ser2)
|
||||
|
||||
mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
expected1 = Series([1, 2], index=mi)
|
||||
expected2 = Series([2, 1], index=mi)
|
||||
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_nunique_smoke(self):
|
||||
# GH 34019
|
||||
n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique()
|
||||
assert n == 1
|
||||
|
||||
def test_multiindex_repeated_keys(self):
|
||||
# GH19414
|
||||
tm.assert_series_equal(
|
||||
Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[
|
||||
["a", "a", "b", "b"]
|
||||
],
|
||||
Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])),
|
||||
)
|
||||
|
||||
def test_multiindex_with_na_missing_key(self):
|
||||
# GH46173
|
||||
df = DataFrame.from_dict(
|
||||
{
|
||||
("foo",): [1, 2, 3],
|
||||
("bar",): [5, 6, 7],
|
||||
(None,): [8, 9, 0],
|
||||
}
|
||||
)
|
||||
with pytest.raises(KeyError, match="missing_key"):
|
||||
df[[("missing_key",)]]
|
||||
|
||||
def test_multiindex_dtype_preservation(self):
|
||||
# GH51261
|
||||
columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"])
|
||||
df = DataFrame(["value"], columns=columns).astype("category")
|
||||
df_no_multiindex = df["A"]
|
||||
assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype)
|
||||
|
||||
# geopandas 1763 analogue
|
||||
df = DataFrame(
|
||||
[[1, 0], [0, 1]],
|
||||
columns=[
|
||||
["foo", "foo"],
|
||||
["location", "location"],
|
||||
["x", "y"],
|
||||
],
|
||||
).assign(bools=Series([True, False], dtype="boolean"))
|
||||
assert isinstance(df["bools"].dtype, BooleanDtype)
|
||||
|
||||
def test_multiindex_from_tuples_with_nan(self):
|
||||
# GH#23578
|
||||
result = MultiIndex.from_tuples([("a", "b", "c"), np.nan, ("d", "", "")])
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "b", "c"), (np.nan, np.nan, np.nan), ("d", "", "")]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,269 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexPartial:
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ["a", "b"]
|
||||
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2), columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(
|
||||
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
|
||||
)
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs("foo")
|
||||
result2 = frame.loc["foo"]
|
||||
expected = frame.T["foo"].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
|
||||
codes=[
|
||||
[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1],
|
||||
[0, 1, 0, 1, 0, 1, 0, 1],
|
||||
],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=index,
|
||||
columns=list("abcd"),
|
||||
)
|
||||
|
||||
result = df.xs(("foo", "one"))
|
||||
expected = df.loc["foo", "one"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc["bar":"baz"]
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(
|
||||
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
|
||||
)
|
||||
df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx)
|
||||
|
||||
result = df.loc[("a", "y"), :]
|
||||
expected = df.loc[("a", "y")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[("a", "y"), [1, 0]]
|
||||
expected = df.loc[("a", "y")][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
|
||||
df.loc[("a", "foo"), :]
|
||||
|
||||
# TODO(ArrayManager) rewrite test to not use .values
|
||||
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_partial_set(
|
||||
self,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.iloc[65:85] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].loc[2000, 4] = 1
|
||||
df.loc[(2000, 4), "A"] = 1
|
||||
else:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].loc[2000, 4] = 1
|
||||
exp.iloc[65:85, 0] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.iloc[:100] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].iloc[14] = 5
|
||||
if using_copy_on_write:
|
||||
assert df["A"].iloc[14] == exp["A"].iloc[14]
|
||||
else:
|
||||
assert df["A"].iloc[14] == 5
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, float])
|
||||
def test_getitem_intkey_leading_level(
|
||||
self, multiindex_year_month_day_dataframe_random_data, dtype
|
||||
):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
levels = ymd.index.levels
|
||||
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
|
||||
ser = ymd["A"]
|
||||
mi = ser.index
|
||||
assert isinstance(mi, MultiIndex)
|
||||
if dtype is int:
|
||||
assert mi.levels[0].dtype == np.dtype(int)
|
||||
else:
|
||||
assert mi.levels[0].dtype == np.float64
|
||||
|
||||
assert 14 not in mi.levels[0]
|
||||
assert not mi.levels[0]._should_fallback_to_positional
|
||||
assert not mi._should_fallback_to_positional
|
||||
|
||||
with pytest.raises(KeyError, match="14"):
|
||||
ser[14]
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_idx, exp_values",
|
||||
[
|
||||
(
|
||||
slice("2019-2", None),
|
||||
DatetimeIndex(["2019-02-01"], dtype="M8[ns]"),
|
||||
[2, 3],
|
||||
),
|
||||
(
|
||||
slice(None, "2019-2"),
|
||||
date_range("2019", periods=2, freq="MS"),
|
||||
[0, 1, 2, 3],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
|
||||
# GH: 25165
|
||||
date_idx = date_range("2019", periods=2, freq="MS")
|
||||
df = DataFrame(
|
||||
list(range(4)),
|
||||
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
expected = DataFrame(
|
||||
exp_values,
|
||||
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
result = df[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis=0)[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = df.swaplevel(0, 1).sort_index()
|
||||
expected = expected.swaplevel(0, 1).sort_index()
|
||||
|
||||
result = df2.loc[:, indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_partial_both_axis():
|
||||
# gh-12660
|
||||
iterables = [["a", "b"], [2, 1]]
|
||||
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
|
||||
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns
|
||||
)
|
||||
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
|
||||
result = df.loc["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,589 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SettingWithCopyError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
|
||||
class TestMultiIndexSetItem:
|
||||
def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
|
||||
target.loc[indexers] = value
|
||||
result = target.loc[indexers]
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
|
||||
def test_setitem_multiindex(self):
|
||||
# GH#7190
|
||||
cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
|
||||
)
|
||||
t, n = 0, 2
|
||||
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
columns=cols,
|
||||
index=index,
|
||||
)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=0)
|
||||
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=1)
|
||||
|
||||
df = DataFrame(columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=2)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=((t, n), "X"),
|
||||
value=np.array(3),
|
||||
expected=3,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex2(self):
|
||||
# GH#5206
|
||||
df = DataFrame(
|
||||
np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
|
||||
)
|
||||
df["F"] = 99
|
||||
row_selection = df["A"] % 2 == 0
|
||||
col_selection = ["B", "C"]
|
||||
df.loc[row_selection, col_selection] = df["F"]
|
||||
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
|
||||
tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df["F"],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex3(self):
|
||||
# GH#11372
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
|
||||
)
|
||||
cols = MultiIndex.from_product(
|
||||
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((12, 4)), index=idx, columns=cols
|
||||
)
|
||||
|
||||
subidx = MultiIndex.from_arrays(
|
||||
[["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")]
|
||||
)
|
||||
subcols = MultiIndex.from_arrays(
|
||||
[["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
vals = DataFrame(
|
||||
np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols
|
||||
)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# set all columns
|
||||
vals = DataFrame(
|
||||
np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols
|
||||
)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# identity
|
||||
copy = df.copy()
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(df.index, df.columns),
|
||||
value=df,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=copy,
|
||||
)
|
||||
|
||||
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
|
||||
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_multiindex_setitem(self):
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [
|
||||
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
|
||||
np.array(["one", "two", "one", "one", "two", "one"]),
|
||||
np.arange(0, 6, 1),
|
||||
]
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((6, 3)),
|
||||
index=arrays,
|
||||
columns=["A", "B", "C"],
|
||||
).sort_index()
|
||||
|
||||
expected = df_orig.loc[["bar"]] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[["bar"]] *= 2
|
||||
tm.assert_frame_equal(df.loc[["bar"]], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
msg = "cannot align on a multi-index with out specifying the join levels"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["bar"] *= 2
|
||||
|
||||
def test_multiindex_setitem2(self):
|
||||
# from SO
|
||||
# https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict(
|
||||
{
|
||||
"price": {
|
||||
("DE", "Coal", "Stock"): 2,
|
||||
("DE", "Gas", "Stock"): 4,
|
||||
("DE", "Elec", "Demand"): 1,
|
||||
("FR", "Gas", "Stock"): 5,
|
||||
("FR", "Solar", "SupIm"): 0,
|
||||
("FR", "Wind", "SupIm"): 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
df_orig.index = MultiIndex.from_tuples(
|
||||
df_orig.index, names=["Sit", "Com", "Type"]
|
||||
)
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 1, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], "price"] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
df["d"] = np.nan
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df.loc[4, "d"] = arr
|
||||
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
|
||||
|
||||
def test_multiindex_assignment_single_dtype(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH3777 part 2b
|
||||
# single dtype
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
dtype=np.int64,
|
||||
)
|
||||
view = df["c"].iloc[:2].values
|
||||
|
||||
# arr can be losslessly cast to int, so this setitem is inplace
|
||||
# INFO(CoW-warn) this does not warn because we directly took .values
|
||||
# above, so no reference to a pandas object is alive for `view`
|
||||
df.loc[4, "c"] = arr
|
||||
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
|
||||
result = df.loc[4, "c"]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# extra check for inplace-ness
|
||||
if not using_copy_on_write:
|
||||
tm.assert_numpy_array_equal(view, exp.values)
|
||||
|
||||
# arr + 0.5 cannot be cast losslessly to int, so we upcast
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="item of incompatible dtype"
|
||||
):
|
||||
df.loc[4, "c"] = arr + 0.5
|
||||
result = df.loc[4, "c"]
|
||||
exp = exp + 0.5
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# scalar ok
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.loc[4, "c"] = 10
|
||||
exp = Series(10, index=[8, 10], name="c", dtype="float64")
|
||||
tm.assert_series_equal(df.loc[4, "c"], exp)
|
||||
|
||||
# invalid assignments
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0]
|
||||
|
||||
# But with a length-1 listlike column indexer this behaves like
|
||||
# `df.loc[4, "c"] = 0
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.loc[4, ["c"]] = [0]
|
||||
assert (df.loc[4, "c"] == 0).all()
|
||||
|
||||
def test_groupby_example(self):
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64,
|
||||
columns=col_names,
|
||||
)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df["new_col"] = np.nan
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
df.loc[name, "new_col"] = new_vals
|
||||
|
||||
def test_series_setitem(
|
||||
self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write
|
||||
):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s.iloc[49])
|
||||
|
||||
with pytest.raises(KeyError, match="49"):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
s[49]
|
||||
|
||||
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values.copy()
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match="boolean values only"):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [["t1", "t2"], ["a", "b", "c"]]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
|
||||
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, "value"]
|
||||
tm.assert_series_equal(df["value"], result)
|
||||
|
||||
result = df.loc[df.index[1:3], "value"]
|
||||
tm.assert_series_equal(df["value"][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, "value"] = 10
|
||||
result["value"] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=[["a", "a", "b", "b"], [0, 1, 0, 1]],
|
||||
)
|
||||
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"]
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"].values
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
def test_frame_setitem_multi_column2(self):
|
||||
# ---------------------------------------
|
||||
# GH#1803
|
||||
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df["A"] = 0.0 # Doesn't work
|
||||
assert (df["A"].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df["B", "1"] = [1, 2, 3]
|
||||
df["A"] = df["B", "1"]
|
||||
|
||||
sliced_a1 = df["A", "1"]
|
||||
sliced_a2 = df["A", "2"]
|
||||
sliced_b1 = df["B", "1"]
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ("A", "1")
|
||||
assert sliced_a2.name == ("A", "2")
|
||||
assert sliced_b1.name == ("B", "1")
|
||||
|
||||
def test_loc_getitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
|
||||
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
|
||||
)
|
||||
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), 4)),
|
||||
index=index,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
res = obj.loc[1:2]
|
||||
exp = obj.reindex(obj.index[2:])
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
obj.loc[1:2] = 7
|
||||
assert (obj.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft["foo", "two"]
|
||||
dft["foo", "two"] = s > s.median()
|
||||
tm.assert_series_equal(dft["foo", "two"], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[("foo", "two")])
|
||||
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_loc(
|
||||
self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
frame_original = frame.copy()
|
||||
col = frame["B"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
col[subset] = 97
|
||||
if using_copy_on_write:
|
||||
# chained setitem doesn't work with CoW
|
||||
tm.assert_frame_equal(frame, frame_original)
|
||||
else:
|
||||
assert (frame.loc[subset, "B"] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame(
|
||||
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
|
||||
)
|
||||
|
||||
df = df.set_index(["A", "B"])
|
||||
mi = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[mi, "C"] = "_"
|
||||
|
||||
assert (df.xs((1, 1))["C"] == "_").all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df["A"] = df["A"].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
def test_setitem_nonmonotonic(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/31449
|
||||
index = MultiIndex.from_tuples(
|
||||
[("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
|
||||
)
|
||||
df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
|
||||
df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
|
||||
expected = DataFrame({"e": [99, 1, 100]}, index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestSetitemWithExpansionMultiIndex:
|
||||
def test_setitem_new_column_mixed_depth(self):
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result["b"] = [1, 2, 3, 4]
|
||||
expected["b", "", ""] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_new_column_all_na(self):
|
||||
# GH#1534
|
||||
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df["new"] = s
|
||||
assert df["new"].isna().all()
|
||||
|
||||
def test_setitem_enlargement_keep_index_names(self):
|
||||
# GH#53053
|
||||
mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"])
|
||||
df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"])
|
||||
df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)]
|
||||
mi_expected = MultiIndex.from_tuples(
|
||||
[(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data=[[10, 20, 30], [10, 20, 30]],
|
||||
index=mi_expected,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
|
||||
# is not a view
|
||||
def test_frame_setitem_view_direct(
|
||||
multiindex_dataframe_random_data, using_copy_on_write
|
||||
):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
if using_copy_on_write:
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values != 0).all()
|
||||
else:
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(
|
||||
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(
|
||||
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_partial_multiindex():
|
||||
# GH 54875
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [3, 4, 5],
|
||||
"c": 6,
|
||||
"d": 7,
|
||||
}
|
||||
).set_index(["a", "b", "c"])
|
||||
ser = Series(8, index=df.index.droplevel("c"))
|
||||
result = df.copy()
|
||||
result["d"] = ser
|
||||
expected = df.copy()
|
||||
expected["d"] = 8
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,796 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
|
||||
|
||||
class TestMultiIndexSlicers:
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product(
|
||||
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
|
||||
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3")
|
||||
]
|
||||
]
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ["foo"])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["A", "a"]
|
||||
expected = DataFrame(
|
||||
{"bar": [1, 5, 9], "foo": [0, 4, 8]},
|
||||
index=Index([1, 2, 3], name="two"),
|
||||
columns=Index(["bar", "foo"], name="lvl1"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.to_numpy())), index=ix)
|
||||
result = s.loc["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = s.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in s.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"cannot index with a boolean indexer "
|
||||
"that is not the same length as the index"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[1\] not in index"):
|
||||
# slice(None) is on the index, [1] is on the columns, but 1 is
|
||||
# not in the columns, so we raise
|
||||
# This used to treat [1] as positional GH#16396
|
||||
df.loc[slice(None), [1]]
|
||||
|
||||
# not lexsorted
|
||||
assert df.index._lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index._lexsort_depth == 0
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be "
|
||||
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice("bar")), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 3],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 2],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
200000,
|
||||
200000,
|
||||
]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([["a"] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
|
||||
index=index,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
slice(1, 1),
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
idx[
|
||||
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
|
||||
],
|
||||
idx[1:1],
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
1,
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[
|
||||
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
|
||||
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
|
||||
"DATE": [
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
"2013-10-01",
|
||||
"2013-07-09",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
],
|
||||
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
|
||||
}
|
||||
)
|
||||
|
||||
df["DATE"] = pd.to_datetime(df["DATE"])
|
||||
df1 = df.set_index(["A", "B", "DATE"])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice("A1")), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice("A2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice("B1", "B2")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice("A2"), slice("B0")), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice("B2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 1"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc["A1", ("a", slice("foo"))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(
|
||||
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
|
||||
)
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc["A1", (slice(None), "foo")]
|
||||
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = (
|
||||
DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
.sort_index()
|
||||
.sort_index(axis=1)
|
||||
)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="columns")[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
for i in [-1, 2, "foo"]:
|
||||
msg = f"No axis named {i} for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc(axis=i)[:, :, ["C1", "C3"]]
|
||||
|
||||
def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1":"a2"]
|
||||
expected = df.iloc[:, :-3]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1"]
|
||||
expected = df.iloc[:, :3]
|
||||
expected.columns = ["b1", "b2", "b3"]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_ax_single_level_indexer_simple_df(self):
|
||||
# GH29519
|
||||
# test single level indexing on single index column data frame
|
||||
df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
|
||||
result = df.loc(axis=1)["a"]
|
||||
expected = Series(np.array([0, 3, 6]), name="a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["A", "a"] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype="int64"
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "setting an array element with a sequence."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100], [100, 100]], dtype="int64"
|
||||
)
|
||||
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[100, 100, 100, 100], dtype="int64"
|
||||
)
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
|
||||
(slice(None), 1), (slice(None), ["foo"])
|
||||
]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
|
||||
rhs.loc[:, ("c", "bah")] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
ser = Series(
|
||||
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
|
||||
)
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
|
||||
tm.assert_indexing_slices_equivalent(
|
||||
ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
|
||||
)
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ["a", "b", "c", "d"]
|
||||
idx = MultiIndex.from_product([freq, range(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc["a"]
|
||||
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc["d"]
|
||||
expected = DataFrame(
|
||||
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = ymd["A"].copy()
|
||||
exp = ymd["A"].copy()
|
||||
s[5:] = 0
|
||||
exp.iloc[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, loc, iloc",
|
||||
[
|
||||
# dtype = int, step = -1
|
||||
("int", slice(None, None, -1), slice(None, None, -1)),
|
||||
("int", slice(3, None, -1), slice(3, None, -1)),
|
||||
("int", slice(None, 1, -1), slice(None, 0, -1)),
|
||||
("int", slice(3, 1, -1), slice(3, 0, -1)),
|
||||
# dtype = int, step = -2
|
||||
("int", slice(None, None, -2), slice(None, None, -2)),
|
||||
("int", slice(3, None, -2), slice(3, None, -2)),
|
||||
("int", slice(None, 1, -2), slice(None, 0, -2)),
|
||||
("int", slice(3, 1, -2), slice(3, 0, -2)),
|
||||
# dtype = str, step = -1
|
||||
("str", slice(None, None, -1), slice(None, None, -1)),
|
||||
("str", slice("d", None, -1), slice(3, None, -1)),
|
||||
("str", slice(None, "b", -1), slice(None, 0, -1)),
|
||||
("str", slice("d", "b", -1), slice(3, 0, -1)),
|
||||
# dtype = str, step = -2
|
||||
("str", slice(None, None, -2), slice(None, None, -2)),
|
||||
("str", slice("d", None, -2), slice(3, None, -2)),
|
||||
("str", slice(None, "b", -2), slice(None, 0, -2)),
|
||||
("str", slice("d", "b", -2), slice(3, 0, -2)),
|
||||
],
|
||||
)
|
||||
def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
|
||||
# GH#38071
|
||||
labels = {
|
||||
"str": list("abcde"),
|
||||
"int": range(5),
|
||||
}[dtype]
|
||||
|
||||
mi = MultiIndex.from_arrays([labels] * 2)
|
||||
df = DataFrame(1.0, index=mi, columns=["A"])
|
||||
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
expected = df.iloc[iloc, :]
|
||||
result_get_loc = df.loc[SLC[loc], :]
|
||||
result_get_locs_level_0 = df.loc[SLC[loc, :], :]
|
||||
result_get_locs_level_1 = df.loc[SLC[:, loc], :]
|
||||
|
||||
tm.assert_frame_equal(result_get_loc, expected)
|
||||
tm.assert_frame_equal(result_get_locs_level_0, expected)
|
||||
tm.assert_frame_equal(result_get_locs_level_1, expected)
|
@ -0,0 +1,153 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted:
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame(
|
||||
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
|
||||
)
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
|
||||
xp = Series(["x"], index=xp_idx, name="data")
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, : np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("key", [None, lambda x: x])
|
||||
def test_frame_getitem_not_sorted2(self, key):
|
||||
# 13431
|
||||
df = DataFrame(
|
||||
{
|
||||
"col1": ["b", "d", "b", "a"],
|
||||
"col2": [3, 1, 1, 2],
|
||||
"data": ["one", "two", "three", "four"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = df.set_index(["col1", "col2"])
|
||||
df2_original = df2.copy()
|
||||
|
||||
df2.index = df2.index.set_levels(["b", "d", "a"], level="col1")
|
||||
df2.index = df2.index.set_codes([0, 1, 0, 2], level="col1")
|
||||
assert not df2.index.is_monotonic_increasing
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index(key=key)
|
||||
assert expected.index.is_monotonic_increasing
|
||||
|
||||
result = df2.sort_index(level=0, key=key)
|
||||
assert result.index.is_monotonic_increasing
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_sort_values_key(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
index = index.sort_values( # sort by third letter
|
||||
key=lambda x: x.map(lambda entry: entry[2])
|
||||
)
|
||||
result = DataFrame(range(8), index=index)
|
||||
|
||||
arrays = [
|
||||
["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
expected = DataFrame(range(8), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_argsort_with_na(self):
|
||||
# GH48495
|
||||
arrays = [
|
||||
array([2, NA, 1], dtype="Int64"),
|
||||
array([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.argsort()
|
||||
expected = np.array([2, 0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_sort_values_with_na(self):
|
||||
# GH48495
|
||||
arrays = [
|
||||
array([2, NA, 1], dtype="Int64"),
|
||||
array([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
index = index.sort_values()
|
||||
result = DataFrame(range(3), index=index)
|
||||
|
||||
arrays = [
|
||||
array([1, 2, NA], dtype="Int64"),
|
||||
array([3, 1, 2], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
expected = DataFrame(range(3), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df["foo", "four"] = "foo"
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df["foo"]
|
||||
result2 = df.loc[:, "foo"]
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs("foo")
|
||||
result2 = df.loc["foo"]
|
||||
expected = df.reindex(df.index[arrays[0] == "foo"])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(np.random.default_rng(2).standard_normal(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s["qux"]
|
||||
result2 = s.loc["qux"]
|
||||
expected = s[arrays[0] == "qux"]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
@ -0,0 +1,257 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timezone,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_at_timezone():
|
||||
# https://github.com/pandas-dev/pandas/issues/33544
|
||||
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
|
||||
expected = DataFrame(
|
||||
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_selection_methods_of_assigned_col():
|
||||
# GH 29282
|
||||
df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0])
|
||||
df["c"] = df2["c"]
|
||||
df.at[1, "c"] = 11
|
||||
result = df
|
||||
expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.at[1, "c"]
|
||||
assert result == 11
|
||||
|
||||
result = df["c"]
|
||||
expected = Series([9, 11, 7], name="c")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df[["c"]]
|
||||
expected = DataFrame({"c": [9, 11, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestAtSetItem:
|
||||
def test_at_setitem_item_cache_cleared(self):
|
||||
# GH#22372 Note the multi-step construction is necessary to trigger
|
||||
# the original bug. pandas/issues/22372#issuecomment-413345309
|
||||
df = DataFrame(index=[0])
|
||||
df["x"] = 1
|
||||
df["cost"] = 2
|
||||
|
||||
# accessing df["cost"] adds "cost" to the _item_cache
|
||||
df["cost"]
|
||||
|
||||
# This loc[[0]] lookup used to call _consolidate_inplace at the
|
||||
# BlockManager level, which failed to clear the _item_cache
|
||||
df.loc[[0]]
|
||||
|
||||
df.at[0, "x"] = 4
|
||||
df.at[0, "cost"] = 789
|
||||
|
||||
expected = DataFrame(
|
||||
{"x": [4], "cost": 789},
|
||||
index=[0],
|
||||
columns=Index(["x", "cost"], dtype=object),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# And in particular, check that the _item_cache has updated correctly.
|
||||
tm.assert_series_equal(df["cost"], expected["cost"])
|
||||
|
||||
def test_at_setitem_mixed_index_assignment(self):
|
||||
# GH#19860
|
||||
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
ser.at["a"] = 11
|
||||
assert ser.iat[0] == 11
|
||||
ser.at[1] = 22
|
||||
assert ser.iat[3] == 22
|
||||
|
||||
def test_at_setitem_categorical_missing(self):
|
||||
df = DataFrame(
|
||||
index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"])
|
||||
)
|
||||
df.at[1, 1] = "foo"
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[np.nan, np.nan, np.nan],
|
||||
[np.nan, "foo", np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
],
|
||||
dtype=CategoricalDtype(["foo", "bar"]),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_at_setitem_multiindex(self):
|
||||
df = DataFrame(
|
||||
np.zeros((3, 2), dtype="int64"),
|
||||
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
|
||||
)
|
||||
df.at[0, "a"] = 10
|
||||
expected = DataFrame(
|
||||
[[10, 10], [0, 0], [0, 0]],
|
||||
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("row", (Timestamp("2019-01-01"), "2019-01-01"))
|
||||
def test_at_datetime_index(self, row):
|
||||
# Set float64 dtype to avoid upcast when setting .5
|
||||
df = DataFrame(
|
||||
data=[[1] * 2], index=DatetimeIndex(data=["2019-01-01", "2019-01-02"])
|
||||
).astype({0: "float64"})
|
||||
expected = DataFrame(
|
||||
data=[[0.5, 1], [1.0, 1]],
|
||||
index=DatetimeIndex(data=["2019-01-01", "2019-01-02"]),
|
||||
)
|
||||
|
||||
df.at[row, 0] = 0.5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestAtSetItemWithExpansion:
|
||||
def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture):
|
||||
# GH#25506
|
||||
ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture)
|
||||
result = Series(ts)
|
||||
result.at[1] = ts
|
||||
expected = Series([ts, ts])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestAtWithDuplicates:
|
||||
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
|
||||
# GH#33041 check that falling back to loc doesn't allow non-scalar
|
||||
# args to slip in
|
||||
|
||||
arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2)
|
||||
df = DataFrame(arr, columns=["A", "A"])
|
||||
|
||||
msg = "Invalid call for scalar access"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[[1, 2]]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[1, ["A"]]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[:, "A"]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[[1, 2]] = 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[1, ["A"]] = 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[:, "A"] = 1
|
||||
|
||||
|
||||
class TestAtErrors:
|
||||
# TODO: De-duplicate/parametrize
|
||||
# test_at_series_raises_key_error2, test_at_frame_raises_key_error2
|
||||
|
||||
def test_at_series_raises_key_error(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
|
||||
ser = Series([1, 2, 3], index=[3, 2, 1])
|
||||
result = indexer_al(ser)[1]
|
||||
assert result == 3
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(ser)["a"]
|
||||
|
||||
def test_at_frame_raises_key_error(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
|
||||
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
|
||||
|
||||
result = indexer_al(df)[1, 0]
|
||||
assert result == 3
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(df)["a", 0]
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(df)[1, "a"]
|
||||
|
||||
def test_at_series_raises_key_error2(self, indexer_al):
|
||||
# at should not fallback
|
||||
# GH#7814
|
||||
# GH#31724 .at should match .loc
|
||||
ser = Series([1, 2, 3], index=list("abc"))
|
||||
result = indexer_al(ser)["a"]
|
||||
assert result == 1
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
indexer_al(ser)[0]
|
||||
|
||||
def test_at_frame_raises_key_error2(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
df = DataFrame({"A": [1, 2, 3]}, index=list("abc"))
|
||||
result = indexer_al(df)["a", "A"]
|
||||
assert result == 1
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
indexer_al(df)["a", 0]
|
||||
|
||||
def test_at_frame_multiple_columns(self):
|
||||
# GH#48296 - at shouldn't modify multiple columns
|
||||
df = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
new_row = [6, 7]
|
||||
with pytest.raises(
|
||||
InvalidIndexError,
|
||||
match=f"You can only assign a scalar value not a \\{type(new_row)}",
|
||||
):
|
||||
df.at[5] = new_row
|
||||
|
||||
def test_at_getitem_mixed_index_no_fallback(self):
|
||||
# GH#19860
|
||||
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
ser.at[0]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
ser.at[4]
|
||||
|
||||
def test_at_categorical_integers(self):
|
||||
# CategoricalIndex with integer categories that don't happen to match
|
||||
# the Categorical's codes
|
||||
ci = CategoricalIndex([3, 4])
|
||||
|
||||
arr = np.arange(4).reshape(2, 2)
|
||||
frame = DataFrame(arr, index=ci)
|
||||
|
||||
for df in [frame, frame.T]:
|
||||
for key in [0, 1]:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
df.at[key, key]
|
||||
|
||||
def test_at_applied_for_rows(self):
|
||||
# GH#48729 .at should raise InvalidIndexError when assigning rows
|
||||
df = DataFrame(index=["a"], columns=["col1", "col2"])
|
||||
new_row = [123, 15]
|
||||
with pytest.raises(
|
||||
InvalidIndexError,
|
||||
match=f"You can only assign a scalar value not a \\{type(new_row)}",
|
||||
):
|
||||
df.at["a"] = new_row
|
@ -0,0 +1,573 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
list("aabbca"), dtype=CategoricalDtype(list("cab")), name="B"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df2():
|
||||
return DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
list("aabbca"), dtype=CategoricalDtype(list("cabe")), name="B"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
def test_loc_scalar(self, df):
|
||||
dtype = CategoricalDtype(list("cab"))
|
||||
result = df.loc["a"]
|
||||
bidx = Series(list("aaa"), name="B").astype(dtype)
|
||||
assert bidx.dtype == dtype
|
||||
|
||||
expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = df.copy()
|
||||
df.loc["a"] = 20
|
||||
bidx2 = Series(list("aabbca"), name="B").astype(dtype)
|
||||
assert bidx2.dtype == dtype
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20],
|
||||
},
|
||||
index=Index(bidx2),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# value not in the categories
|
||||
with pytest.raises(KeyError, match=r"^'d'$"):
|
||||
df.loc["d"]
|
||||
|
||||
df2 = df.copy()
|
||||
expected = df2.copy()
|
||||
expected.index = expected.index.astype(object)
|
||||
expected.loc["d"] = 10
|
||||
df2.loc["d"] = 10
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_loc_setitem_with_expansion_non_category(self, df):
|
||||
# Setting-with-expansion with a new key "d" that is not among caegories
|
||||
df.loc["a"] = 20
|
||||
|
||||
# Setting a new row on an existing column
|
||||
df3 = df.copy()
|
||||
df3.loc["d", "A"] = 10
|
||||
bidx3 = Index(list("aabbcad"), name="B")
|
||||
expected3 = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20, 10.0],
|
||||
},
|
||||
index=Index(bidx3),
|
||||
)
|
||||
tm.assert_frame_equal(df3, expected3)
|
||||
|
||||
# Setting a new row _and_ new column
|
||||
df4 = df.copy()
|
||||
df4.loc["d", "C"] = 10
|
||||
expected3 = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
|
||||
},
|
||||
index=Index(bidx3),
|
||||
)
|
||||
tm.assert_frame_equal(df4, expected3)
|
||||
|
||||
def test_loc_getitem_scalar_non_category(self, df):
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
df.loc[1]
|
||||
|
||||
def test_slicing(self):
|
||||
cat = Series(Categorical([1, 2, 3, 4]))
|
||||
reverse = cat[::-1]
|
||||
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(reverse.__array__(), exp)
|
||||
|
||||
df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
|
||||
df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||||
|
||||
expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
|
||||
result = df.iloc[10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
{"value": np.arange(11, 21).astype("int64")},
|
||||
index=np.arange(10, 20).astype("int64"),
|
||||
)
|
||||
expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||||
result = df.iloc[10:20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
|
||||
result = df.loc[8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slicing_and_getting_ops(self):
|
||||
# systematically test the slicing operations:
|
||||
# for all slicing ops:
|
||||
# - returning a dataframe
|
||||
# - returning a column
|
||||
# - returning a row
|
||||
# - returning a single value
|
||||
|
||||
cats = Categorical(
|
||||
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 3, 4, 5, 6, 7]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
# the expected values
|
||||
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||||
idx2 = Index(["j", "k"])
|
||||
values2 = [3, 4]
|
||||
|
||||
# 2:4,: | "j":"k",:
|
||||
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||||
|
||||
# :,"cats" | :,0
|
||||
exp_col = Series(cats, index=idx, name="cats")
|
||||
|
||||
# "j",: | 2,:
|
||||
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")
|
||||
|
||||
# "j","cats | 2,0
|
||||
exp_val = "b"
|
||||
|
||||
# iloc
|
||||
# frame
|
||||
res_df = df.iloc[2:4, :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
# row
|
||||
res_row = df.iloc[2, :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert isinstance(res_col.dtype, CategoricalDtype)
|
||||
|
||||
# single value
|
||||
res_val = df.iloc[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# loc
|
||||
# frame
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert isinstance(res_col.dtype, CategoricalDtype)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", df.columns[0]]
|
||||
assert res_val == exp_val
|
||||
|
||||
# iat
|
||||
res_val = df.iat[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# at
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# fancy indexing
|
||||
exp_fancy = df.iloc[[2]]
|
||||
|
||||
res_fancy = df[df["cats"] == "b"]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
res_fancy = df[df["values"] == 3]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
|
||||
# get_value
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# i : int, slice, or sequence of integers
|
||||
res_row = df.iloc[2]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
res_df = df.iloc[slice(2, 4)]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
res_df = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert isinstance(res_col.dtype, CategoricalDtype)
|
||||
|
||||
res_df = df.iloc[:, slice(0, 2)]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
res_df = df.iloc[:, [0, 1]]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
def test_slicing_doc_examples(self):
|
||||
# GH 7918
|
||||
cats = Categorical(
|
||||
["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 2, 2, 3, 4, 5]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
result = df.iloc[2:4, :]
|
||||
expected = DataFrame(
|
||||
{
|
||||
"cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
|
||||
"values": [2, 2],
|
||||
},
|
||||
index=["j", "k"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[2:4, :].dtypes
|
||||
expected = Series(["category", "int64"], ["cats", "values"], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", "cats"]
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
|
||||
index=["h", "i", "j"],
|
||||
name="cats",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", df.columns[0:1]]
|
||||
expected = DataFrame(
|
||||
{"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
|
||||
index=["h", "i", "j"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_listlike_labels(self, df):
|
||||
# list of labels
|
||||
result = df.loc[["c", "a"]]
|
||||
expected = df.iloc[[4, 0, 1, 5]]
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_loc_getitem_listlike_unused_category(self, df2):
|
||||
# GH#37901 a label that is in index.categories but not in index
|
||||
# listlike containing an element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
|
||||
df2.loc[["a", "b", "e"]]
|
||||
|
||||
def test_loc_getitem_label_unused_category(self, df2):
|
||||
# element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=r"^'e'$"):
|
||||
df2.loc["e"]
|
||||
|
||||
def test_loc_getitem_non_category(self, df2):
|
||||
# not all labels in the categories
|
||||
with pytest.raises(KeyError, match=re.escape("['d'] not in index")):
|
||||
df2.loc[["a", "d"]]
|
||||
|
||||
def test_loc_setitem_expansion_label_unused_category(self, df2):
|
||||
# assigning with a label that is in the categories but not in the index
|
||||
df = df2.copy()
|
||||
df.loc["e"] = 20
|
||||
result = df.loc[["a", "b", "e"]]
|
||||
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
|
||||
expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_listlike_dtypes(self):
|
||||
# GH 11586
|
||||
|
||||
# unique categories and codes
|
||||
index = CategoricalIndex(["a", "b", "c"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
|
||||
exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_listlike_dtypes_duplicated_categories_and_codes(self):
|
||||
# duplicated categories and codes
|
||||
index = CategoricalIndex(["a", "b", "a"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"]),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_listlike_dtypes_unused_category(self):
|
||||
# contains unused category
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_getitem_listlike_unused_category_raises_keyerror(self):
|
||||
# key that is an *unused* category raises
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
with pytest.raises(KeyError, match="e"):
|
||||
# For comparison, check the scalar behavior
|
||||
df.loc["e"]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
|
||||
df.loc[["a", "e"]]
|
||||
|
||||
def test_ix_categorical_index(self):
|
||||
# GH 12531
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
index=list("ABC"),
|
||||
columns=list("XYZ"),
|
||||
)
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
expect = Series(df.loc["A", :], index=cdf.columns, name="A")
|
||||
tm.assert_series_equal(cdf.loc["A", :], expect)
|
||||
|
||||
expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
|
||||
tm.assert_series_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
|
||||
expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
|
||||
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
|
||||
expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
|
||||
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
||||
)
|
||||
def test_ix_categorical_index_non_unique(self, infer_string):
|
||||
# non-unique
|
||||
with option_context("future.infer_string", infer_string):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
index=list("ABA"),
|
||||
columns=list("XYX"),
|
||||
)
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
|
||||
expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
|
||||
tm.assert_frame_equal(cdf.loc["A", :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
|
||||
expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
|
||||
tm.assert_frame_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[["A", "B"], :],
|
||||
columns=cdf.columns,
|
||||
index=CategoricalIndex(list("AAB")),
|
||||
)
|
||||
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[:, ["X", "Y"]],
|
||||
index=cdf.index,
|
||||
columns=CategoricalIndex(list("XXY")),
|
||||
)
|
||||
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
def test_loc_slice(self, df):
|
||||
# GH9748
|
||||
msg = (
|
||||
"cannot do slice indexing on CategoricalIndex with these "
|
||||
r"indexers \[1\] of type int"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc[1:5]
|
||||
|
||||
result = df.loc["b":"c"]
|
||||
expected = df.iloc[[2, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_and_at_with_categorical_index(self):
|
||||
# GH 20629
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"])
|
||||
)
|
||||
|
||||
s = df[0]
|
||||
assert s.loc["A"] == 1
|
||||
assert s.at["A"] == 1
|
||||
|
||||
assert df.loc["B", 1] == 4
|
||||
assert df.at["B", 1] == 4
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values",
|
||||
[
|
||||
# python types
|
||||
[1, 2, 3],
|
||||
[-1, -2, -3],
|
||||
[1.5, 2.5, 3.5],
|
||||
[-1.5, -2.5, -3.5],
|
||||
# numpy int/uint
|
||||
*(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES),
|
||||
# numpy floats
|
||||
*(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES),
|
||||
# numpy object
|
||||
np.array([1, "b", 3.5], dtype=object),
|
||||
# pandas scalars
|
||||
[Interval(1, 4), Interval(4, 6), Interval(6, 9)],
|
||||
[Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)],
|
||||
[Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")],
|
||||
# pandas Integer arrays
|
||||
*(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES),
|
||||
# other pandas arrays
|
||||
pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array,
|
||||
pd.date_range("2019-01-01", periods=3).array,
|
||||
pd.timedelta_range(start="1d", periods=3).array,
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_with_non_string_categories(self, idx_values, ordered):
|
||||
# GH-17569
|
||||
cat_idx = CategoricalIndex(idx_values, ordered=ordered)
|
||||
df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx)
|
||||
sl = slice(idx_values[0], idx_values[1])
|
||||
|
||||
# scalar selection
|
||||
result = df.loc[idx_values[0]]
|
||||
expected = Series(["foo"], index=["A"], name=idx_values[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list selection
|
||||
result = df.loc[idx_values[:2]]
|
||||
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice selection
|
||||
result = df.loc[sl]
|
||||
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# scalar assignment
|
||||
result = df.copy()
|
||||
result.loc[idx_values[0]] = "qux"
|
||||
expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# list assignment
|
||||
result = df.copy()
|
||||
result.loc[idx_values[:2], "A"] = ["qux", "qux2"]
|
||||
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice assignment
|
||||
result = df.copy()
|
||||
result.loc[sl, "A"] = ["qux", "qux2"]
|
||||
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_categorical_with_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
|
||||
ser = Series(range(3), index=ci)
|
||||
|
||||
assert ser[np.nan] == 2
|
||||
assert ser.loc[np.nan] == 2
|
||||
|
||||
df = DataFrame(ser)
|
||||
assert df.loc[np.nan, 0] == 2
|
||||
assert df.loc[np.nan][0] == 2
|
@ -0,0 +1,647 @@
|
||||
from string import ascii_letters
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
SettingWithCopyError,
|
||||
SettingWithCopyWarning,
|
||||
)
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
|
||||
|
||||
def random_text(nobs=100):
|
||||
# Construct a DataFrame where each row is a random slice from 'letters'
|
||||
idxs = np.random.default_rng(2).integers(len(ascii_letters), size=(nobs, 2))
|
||||
idxs.sort(axis=1)
|
||||
strings = [ascii_letters[x[0] : x[1]] for x in idxs]
|
||||
|
||||
return DataFrame(strings, columns=["letters"])
|
||||
|
||||
|
||||
class TestCaching:
|
||||
def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
|
||||
# this is chained assignment, but will 'work'
|
||||
with option_context("chained_assignment", None):
|
||||
# #3970
|
||||
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
|
||||
|
||||
# Creates a second float block
|
||||
df["cc"] = 0.0
|
||||
|
||||
# caches a reference to the 'bb' series
|
||||
df["bb"]
|
||||
|
||||
# Assignment to wrong series
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["bb"].iloc[0] = 0.17
|
||||
df._clear_item_cache()
|
||||
if not using_copy_on_write:
|
||||
tm.assert_almost_equal(df["bb"][0], 0.17)
|
||||
else:
|
||||
# with ArrayManager, parent is not mutated with chained assignment
|
||||
tm.assert_almost_equal(df["bb"][0], 2.2)
|
||||
|
||||
@pytest.mark.parametrize("do_ref", [True, False])
|
||||
def test_setitem_cache_updating(self, do_ref):
|
||||
# GH 5424
|
||||
cont = ["one", "two", "three", "four", "five", "six", "seven"]
|
||||
|
||||
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
|
||||
|
||||
# ref the cache
|
||||
if do_ref:
|
||||
df.loc[0, "c"]
|
||||
|
||||
# set it
|
||||
df.loc[7, "c"] = 1
|
||||
|
||||
assert df.loc[0, "c"] == 0.0
|
||||
assert df.loc[7, "c"] == 1.0
|
||||
|
||||
def test_setitem_cache_updating_slices(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH 7084
|
||||
# not updating cache on series setting with slices
|
||||
expected = DataFrame(
|
||||
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
|
||||
)
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
|
||||
|
||||
# loop through df to update out
|
||||
six = Timestamp("5/7/2014")
|
||||
eix = Timestamp("5/9/2014")
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
# try via a chain indexing
|
||||
# this actually works
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
out_original = out.copy()
|
||||
for ix, row in df.iterrows():
|
||||
v = out[row["C"]][six:eix] + row["D"]
|
||||
with tm.raises_chained_assignment_error(
|
||||
(ix == 0) or warn_copy_on_write or using_copy_on_write
|
||||
):
|
||||
out[row["C"]][six:eix] = v
|
||||
|
||||
if not using_copy_on_write:
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
else:
|
||||
tm.assert_frame_equal(out, out_original)
|
||||
tm.assert_series_equal(out["A"], out_original["A"])
|
||||
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] += row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
def test_altering_series_clears_parent_cache(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH #33675
|
||||
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
|
||||
ser = df["A"]
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
assert "A" not in df._item_cache
|
||||
else:
|
||||
assert "A" in df._item_cache
|
||||
|
||||
# Adding a new entry to ser swaps in a new array, so "A" needs to
|
||||
# be removed from df._item_cache
|
||||
ser["c"] = 5
|
||||
assert len(ser) == 3
|
||||
assert "A" not in df._item_cache
|
||||
assert df["A"] is not ser
|
||||
assert len(df["A"]) == 2
|
||||
|
||||
|
||||
class TestChaining:
|
||||
def test_setitem_chained_setfault(self, using_copy_on_write):
|
||||
# GH6026
|
||||
data = ["right", "left", "left", "left", "right", "left", "timeout"]
|
||||
mdata = ["right", "left", "left", "left", "right", "left", "none"]
|
||||
|
||||
df = DataFrame({"response": np.array(data)})
|
||||
mask = df.response == "timeout"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.response[mask] = "none"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": data}))
|
||||
else:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
recarray = np.rec.fromarrays([data], names=["response"])
|
||||
df = DataFrame(recarray)
|
||||
mask = df.response == "timeout"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.response[mask] = "none"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": data}))
|
||||
else:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
df = DataFrame({"response": data, "response1": data})
|
||||
df_original = df.copy()
|
||||
mask = df.response == "timeout"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.response[mask] = "none"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
else:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
|
||||
|
||||
# GH 6056
|
||||
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||||
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].iloc[0] = np.nan
|
||||
if using_copy_on_write:
|
||||
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
|
||||
else:
|
||||
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.A.iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment(self, using_copy_on_write):
|
||||
with option_context("chained_assignment", "raise"):
|
||||
# work with the chain
|
||||
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
|
||||
df = DataFrame(
|
||||
np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64"
|
||||
)
|
||||
df_original = df.copy()
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = -6
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
else:
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_raises(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# test with the chaining
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
df_original = df.copy()
|
||||
assert df._is_copy is None
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = -6
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = np.nan
|
||||
elif not using_array_manager:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = np.nan
|
||||
|
||||
assert df["A"]._is_copy is None
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||||
# a mixed dataframe
|
||||
df["A"][0] = -5
|
||||
df["A"][1] = -6
|
||||
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_fails(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# Using a copy (the chain), fails
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["A"] = -5
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = -5
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_doc_example(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# Doc example
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["one", "one", "two", "three", "two", "one", "six"],
|
||||
"c": Series(range(7), dtype="int64"),
|
||||
}
|
||||
)
|
||||
assert df._is_copy is None
|
||||
|
||||
indexer = df.a.str.startswith("o")
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[indexer]["c"] = 42
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df[indexer]["c"] = 42
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_object_dtype(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
df = DataFrame(
|
||||
{"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
|
||||
)
|
||||
df_original = df.copy()
|
||||
|
||||
if not using_copy_on_write and not warn_copy_on_write:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = 111
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
elif not using_array_manager:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = 111
|
||||
|
||||
df.loc[0, "A"] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||||
# a mixed dataframe
|
||||
df["A"][0] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_is_copy_pickle(self):
|
||||
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.ensure_clean("__tmp__pickle") as path:
|
||||
df.to_pickle(path)
|
||||
df2 = pd.read_pickle(path)
|
||||
df2["B"] = df2["A"]
|
||||
df2["B"] = df2["A"]
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_setting_entire_column(self):
|
||||
# gh-5597: a spurious raise as we are setting the entire column here
|
||||
|
||||
df = random_text(100000)
|
||||
|
||||
# Always a copy
|
||||
x = df.iloc[[0, 1, 2]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
x = df.iloc[[0, 1, 2, 4]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
# Explicitly copy
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer].copy()
|
||||
|
||||
assert df._is_copy is None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_implicit_take(self):
|
||||
# Implicitly take
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer]
|
||||
|
||||
assert df._is_copy is not None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_implicit_take2(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
pytest.skip("_is_copy is not always set for CoW")
|
||||
# Implicitly take 2
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
|
||||
df = df.loc[indexer]
|
||||
assert df._is_copy is not None
|
||||
df.loc[:, "letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
# with the enforcement of #45333 in 2.0, the .loc[:, letters] setting
|
||||
# is inplace, so df._is_copy remains non-None.
|
||||
assert df._is_copy is not None
|
||||
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
assert df._is_copy is None
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_str(self):
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_is_copy(self):
|
||||
# an identical take, so no copy
|
||||
df = DataFrame({"a": [1]}).dropna()
|
||||
assert df._is_copy is None
|
||||
df["a"] += 1
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_sorting(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||||
ser = df.iloc[:, 0].sort_values()
|
||||
|
||||
tm.assert_series_equal(ser, df.iloc[:, 0].sort_values())
|
||||
tm.assert_series_equal(ser, df[0].sort_values())
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_false_positives(self):
|
||||
# see gh-6025: false positives
|
||||
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "b"
|
||||
str(df)
|
||||
|
||||
df = df[df["column2"] != 8]
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "c"
|
||||
str(df)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_undefined_column(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# from SO:
|
||||
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||||
df = DataFrame(np.arange(0, 9), columns=["count"])
|
||||
df["group"] = "b"
|
||||
df_original = df.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_changing_dtype(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# Mixed type setting but same dtype & changing dtype
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": date_range("20130101", periods=5),
|
||||
"B": np.random.default_rng(2).standard_normal(5),
|
||||
"C": np.arange(5, dtype="int64"),
|
||||
"D": ["a", "b", "c", "d", "e"],
|
||||
}
|
||||
)
|
||||
df_original = df.copy()
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[2]["D"] = "foo"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[2]["C"] = "foo"
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)):
|
||||
df["C"][2] = "foo"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
else:
|
||||
assert df.loc[2, "C"] == "foo"
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[2]["D"] = "foo"
|
||||
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[2]["C"] = "foo"
|
||||
|
||||
if not using_array_manager:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["C"][2] = "foo"
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
|
||||
# changing the dtype or not
|
||||
df["C"][2] = "foo"
|
||||
assert df.loc[2, "C"] == "foo"
|
||||
|
||||
def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write):
|
||||
# operating on a copy
|
||||
df = DataFrame(
|
||||
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
|
||||
)
|
||||
df_original = df.copy()
|
||||
mask = pd.isna(df.c)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
|
||||
def test_setting_with_copy_bug_no_warning(self):
|
||||
# invalid warning as we are returning a new object
|
||||
# GH 8730
|
||||
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
|
||||
df2 = df1[["x"]]
|
||||
|
||||
# this should not raise
|
||||
df2["y"] = ["g", "h", "i"]
|
||||
|
||||
def test_detect_chained_assignment_warnings_errors(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["A"] = 111
|
||||
return
|
||||
|
||||
with option_context("chained_assignment", "warn"):
|
||||
with tm.assert_produces_warning(SettingWithCopyWarning):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
with option_context("chained_assignment", "raise"):
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
|
||||
def test_detect_chained_assignment_warning_stacklevel(
|
||||
self, rhs, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#42570
|
||||
df = DataFrame(np.arange(25).reshape(5, 5))
|
||||
df_original = df.copy()
|
||||
chained = df.loc[:3]
|
||||
with option_context("chained_assignment", "warn"):
|
||||
if not using_copy_on_write and not warn_copy_on_write:
|
||||
with tm.assert_produces_warning(SettingWithCopyWarning) as t:
|
||||
chained[2] = rhs
|
||||
assert t[0].filename == __file__
|
||||
else:
|
||||
# INFO(CoW) no warning, and original dataframe not changed
|
||||
chained[2] = rhs
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
|
||||
# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_chained_getitem_with_lists(self):
|
||||
# GH6394
|
||||
# Regression in chained getitem indexing with embedded list-like from
|
||||
# 0.12
|
||||
|
||||
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
|
||||
expected = df["A"].iloc[2]
|
||||
result = df.loc[2, "A"]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result2 = df.iloc[2]["A"]
|
||||
tm.assert_numpy_array_equal(result2, expected)
|
||||
result3 = df["A"].loc[2]
|
||||
tm.assert_numpy_array_equal(result3, expected)
|
||||
result4 = df["A"].iloc[2]
|
||||
tm.assert_numpy_array_equal(result4, expected)
|
||||
|
||||
def test_cache_updating(self):
|
||||
# GH 4939, make sure to update the cache on setitem
|
||||
|
||||
df = DataFrame(
|
||||
np.zeros((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
)
|
||||
df["A"] # cache series
|
||||
df.loc["Hello Friend"] = df.iloc[0]
|
||||
assert "Hello Friend" in df["A"].index
|
||||
assert "Hello Friend" in df["B"].index
|
||||
|
||||
def test_cache_updating2(self, using_copy_on_write):
|
||||
# 10264
|
||||
df = DataFrame(
|
||||
np.zeros((5, 5), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e"],
|
||||
index=range(5),
|
||||
)
|
||||
df["f"] = 0
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
df.f.values[3] = 1
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
return
|
||||
|
||||
df.f.values[3] = 1
|
||||
|
||||
df.f.values[3] = 2
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 6), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e", "f"],
|
||||
index=range(5),
|
||||
)
|
||||
expected.at[3, "f"] = 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
expected = Series([0, 0, 0, 2, 0], name="f")
|
||||
tm.assert_series_equal(df.f, expected)
|
||||
|
||||
def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
|
||||
# GH#3970
|
||||
with option_context("chained_assignment", None):
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["bb"].iloc[0] = 0.13
|
||||
|
||||
# GH#3970 this lookup used to break the chained setting to 0.15
|
||||
df.iloc[ck]
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["bb"].iloc[0] = 0.15
|
||||
|
||||
if not using_copy_on_write:
|
||||
assert df["bb"].iloc[0] == 0.15
|
||||
else:
|
||||
assert df["bb"].iloc[0] == 2.2
|
||||
|
||||
def test_getitem_loc_assignment_slice_state(self):
|
||||
# GH 13569
|
||||
df = DataFrame({"a": [10, 20, 30]})
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].loc[4] = 40
|
||||
tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]}))
|
||||
tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a"))
|
@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import check_array_indexer
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
# integer
|
||||
([1, 2], np.array([1, 2], dtype=np.intp)),
|
||||
(np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)),
|
||||
(pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)),
|
||||
(pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)),
|
||||
# boolean
|
||||
([True, False, True], np.array([True, False, True], dtype=np.bool_)),
|
||||
(np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)),
|
||||
(
|
||||
pd.array([True, False, True], dtype="boolean"),
|
||||
np.array([True, False, True], dtype=np.bool_),
|
||||
),
|
||||
# other
|
||||
([], np.array([], dtype=np.intp)),
|
||||
],
|
||||
)
|
||||
def test_valid_input(indexer, expected):
|
||||
arr = np.array([1, 2, 3])
|
||||
result = check_array_indexer(arr, indexer)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")]
|
||||
)
|
||||
def test_boolean_na_returns_indexer(indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
result = check_array_indexer(arr, indexer)
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
[True, False],
|
||||
pd.array([True, False], dtype="boolean"),
|
||||
np.array([True, False], dtype=np.bool_),
|
||||
],
|
||||
)
|
||||
def test_bool_raise_length(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "Boolean index has wrong length"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")]
|
||||
)
|
||||
def test_int_raise_missing_values(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
[0.0, 1.0],
|
||||
np.array([1.0, 2.0], dtype="float64"),
|
||||
np.array([True, False], dtype=object),
|
||||
pd.Index([True, False], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_raise_invalid_array_dtypes(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "arrays used as indices must be of integer or boolean type"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
def test_raise_nullable_string_dtype(nullable_string_dtype):
|
||||
indexer = pd.array(["a", "b"], dtype=nullable_string_dtype)
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "arrays used as indices must be of integer or boolean type"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
|
||||
def test_pass_through_non_array_likes(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
result = check_array_indexer(arr, indexer)
|
||||
assert result == indexer
|
@ -0,0 +1,940 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.compat import (
|
||||
IS64,
|
||||
is_platform_windows,
|
||||
)
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
###############################################################
|
||||
# Index / Series common tests which may trigger dtype coercions
|
||||
###############################################################
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="class")
|
||||
def check_comprehensiveness(request):
|
||||
# Iterate over combination of dtype, method and klass
|
||||
# and ensure that each are contained within a collected test
|
||||
cls = request.cls
|
||||
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
|
||||
|
||||
def has_test(combo):
|
||||
klass, dtype, method = combo
|
||||
cls_funcs = request.node.session.items
|
||||
return any(
|
||||
klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
|
||||
)
|
||||
|
||||
opts = request.config.option
|
||||
if opts.lf or opts.keyword:
|
||||
# If we are running with "last-failed" or -k foo, we expect to only
|
||||
# run a subset of tests.
|
||||
yield
|
||||
|
||||
else:
|
||||
for combo in combos:
|
||||
if not has_test(combo):
|
||||
raise AssertionError(
|
||||
f"test method is not defined: {cls.__name__}, {combo}"
|
||||
)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
class CoercionBase:
|
||||
klasses = ["index", "series"]
|
||||
dtypes = [
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64",
|
||||
"datetime64tz",
|
||||
"timedelta64",
|
||||
"period",
|
||||
]
|
||||
|
||||
@property
|
||||
def method(self):
|
||||
raise NotImplementedError(self)
|
||||
|
||||
|
||||
class TestSetitemCoercion(CoercionBase):
|
||||
method = "setitem"
|
||||
|
||||
# disable comprehensiveness tests, as most of these have been moved to
|
||||
# tests.series.indexing.test_setitem in SetitemCastingEquivalents subclasses.
|
||||
klasses: list[str] = []
|
||||
|
||||
def test_setitem_series_no_coercion_from_values_list(self):
|
||||
# GH35865 - int casted to str when internally calling np.array(ser.values)
|
||||
ser = pd.Series(["a", 1])
|
||||
ser[:] = list(ser.values)
|
||||
|
||||
expected = pd.Series(["a", 1])
|
||||
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def _assert_setitem_index_conversion(
|
||||
self, original_series, loc_key, expected_index, expected_dtype
|
||||
):
|
||||
"""test index's coercion triggered by assign key"""
|
||||
temp = original_series.copy()
|
||||
# GH#33469 pre-2.0 with int loc_key and temp.index.dtype == np.float64
|
||||
# `temp[loc_key] = 5` treated loc_key as positional
|
||||
temp[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
temp = original_series.copy()
|
||||
temp.loc[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
|
||||
)
|
||||
def test_setitem_index_object(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
|
||||
assert obj.index.dtype == object
|
||||
|
||||
if exp_dtype is IndexError:
|
||||
temp = obj.copy()
|
||||
warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
|
||||
msg = "index 5 is out of bounds for axis 0 with size 4"
|
||||
with pytest.raises(exp_dtype, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
temp[5] = 5
|
||||
else:
|
||||
exp_index = pd.Index(list("abcd") + [val], dtype=object)
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)]
|
||||
)
|
||||
def test_setitem_index_int64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4])
|
||||
assert obj.index.dtype == np.int64
|
||||
|
||||
exp_index = pd.Index([0, 1, 2, 3, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val,exp_dtype", [(5, np.float64), (5.1, np.float64), ("x", object)]
|
||||
)
|
||||
def test_setitem_index_float64(self, val, exp_dtype, request):
|
||||
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
|
||||
assert obj.index.dtype == np.float64
|
||||
|
||||
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_series_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_complex128(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_datetime64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_datetime64tz(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestInsertIndexCoercion(CoercionBase):
|
||||
klasses = ["index"]
|
||||
method = "insert"
|
||||
|
||||
def _assert_insert_conversion(self, original, value, expected, expected_dtype):
|
||||
"""test coercion triggered by insert"""
|
||||
target = original.copy()
|
||||
res = target.insert(1, value)
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(1, 1, object),
|
||||
(1.1, 1.1, object),
|
||||
(False, False, object),
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Index(list("abcd"), dtype=object)
|
||||
assert obj.dtype == object
|
||||
|
||||
exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object)
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(1, 1, None),
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, False, object), # GH#36319
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_int_index(
|
||||
self, any_int_numpy_dtype, insert, coerced_val, coerced_dtype
|
||||
):
|
||||
dtype = any_int_numpy_dtype
|
||||
obj = pd.Index([1, 2, 3, 4], dtype=dtype)
|
||||
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
|
||||
|
||||
exp = pd.Index([1, coerced_val, 2, 3, 4], dtype=coerced_dtype)
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(1, 1.0, None),
|
||||
# When float_numpy_dtype=float32, this is not the case
|
||||
# see the correction below
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, False, object), # GH#36319
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_float_index(
|
||||
self, float_numpy_dtype, insert, coerced_val, coerced_dtype
|
||||
):
|
||||
dtype = float_numpy_dtype
|
||||
obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
|
||||
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
|
||||
|
||||
if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
|
||||
# Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
|
||||
# the expected dtype will be float32 if the original dtype was float32
|
||||
coerced_dtype = np.float32
|
||||
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
|
||||
],
|
||||
ids=["datetime64", "datetime64tz"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"insert_value",
|
||||
[pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1],
|
||||
)
|
||||
def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
|
||||
obj = pd.DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz
|
||||
).as_unit("ns")
|
||||
assert obj.dtype == exp_dtype
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"],
|
||||
tz=fill_val.tz,
|
||||
).as_unit("ns")
|
||||
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val.tz:
|
||||
# mismatched tzawareness
|
||||
ts = pd.Timestamp("2012-01-01")
|
||||
result = obj.insert(1, ts)
|
||||
expected = obj.astype(object).insert(1, ts)
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
|
||||
result = obj.insert(1, ts)
|
||||
# once deprecation is enforced:
|
||||
expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
|
||||
assert expected.dtype == obj.dtype
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
else:
|
||||
# mismatched tzawareness
|
||||
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
|
||||
result = obj.insert(1, ts)
|
||||
expected = obj.astype(object).insert(1, ts)
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
item = 1
|
||||
result = obj.insert(1, item)
|
||||
expected = obj.astype(object).insert(1, item)
|
||||
assert expected[1] == item
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_index_timedelta64(self):
|
||||
obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
|
||||
assert obj.dtype == "timedelta64[ns]"
|
||||
|
||||
# timedelta64 + timedelta64 => timedelta64
|
||||
exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"])
|
||||
self._assert_insert_conversion(
|
||||
obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
|
||||
)
|
||||
|
||||
for item in [pd.Timestamp("2012-01-01"), 1]:
|
||||
result = obj.insert(1, item)
|
||||
expected = obj.astype(object).insert(1, item)
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
|
||||
(pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
|
||||
(1, 1, object),
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M")
|
||||
assert obj.dtype == "period[M]"
|
||||
|
||||
data = [
|
||||
pd.Period("2011-01", freq="M"),
|
||||
coerced_val,
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
pd.Period("2011-04", freq="M"),
|
||||
]
|
||||
if isinstance(insert, pd.Period):
|
||||
exp = pd.PeriodIndex(data, freq="M")
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
# string that can be parsed to appropriate PeriodDtype
|
||||
self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype)
|
||||
|
||||
else:
|
||||
result = obj.insert(0, insert)
|
||||
expected = obj.astype(object).insert(0, insert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M"
|
||||
# casts that string to Period[M], not clear that is desirable
|
||||
if not isinstance(insert, pd.Timestamp):
|
||||
# non-castable string
|
||||
result = obj.insert(0, str(insert))
|
||||
expected = obj.astype(object).insert(0, str(insert))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_insert_index_complex128(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_insert_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestWhereCoercion(CoercionBase):
|
||||
method = "where"
|
||||
_cond = np.array([True, False, True, False])
|
||||
|
||||
def _assert_where_conversion(
|
||||
self, original, cond, values, expected, expected_dtype
|
||||
):
|
||||
"""test coercion triggered by where"""
|
||||
target = original.copy()
|
||||
res = target.where(cond, values)
|
||||
tm.assert_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
def _construct_exp(self, obj, klass, fill_val, exp_dtype):
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
elif isinstance(fill_val, (datetime, np.datetime64)):
|
||||
values = pd.date_range(fill_val, periods=4)
|
||||
else:
|
||||
values = klass(x * fill_val for x in [5, 6, 7, 8])
|
||||
|
||||
exp = klass([obj[0], values[1], obj[2], values[3]], dtype=exp_dtype)
|
||||
return values, exp
|
||||
|
||||
def _run_test(self, obj, fill_val, klass, exp_dtype):
|
||||
cond = klass(self._cond)
|
||||
|
||||
exp = klass([obj[0], fill_val, obj[2], fill_val], dtype=exp_dtype)
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
values, exp = self._construct_exp(obj, klass, fill_val, exp_dtype)
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
|
||||
)
|
||||
def test_where_object(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass(list("abcd"), dtype=object)
|
||||
assert obj.dtype == object
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
|
||||
)
|
||||
def test_where_int64(self, index_or_series, fill_val, exp_dtype, request):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val, exp_dtype",
|
||||
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
|
||||
)
|
||||
def test_where_float64(self, index_or_series, fill_val, exp_dtype, request):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass([1.1, 2.2, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, object),
|
||||
],
|
||||
)
|
||||
def test_where_complex128(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=np.complex128)
|
||||
assert obj.dtype == np.complex128
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)],
|
||||
)
|
||||
def test_where_series_bool(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass([True, False, True, False])
|
||||
assert obj.dtype == np.bool_
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
|
||||
],
|
||||
ids=["datetime64", "datetime64tz"],
|
||||
)
|
||||
def test_where_datetime64(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None))
|
||||
assert obj.dtype == "datetime64[ns]"
|
||||
|
||||
fv = fill_val
|
||||
# do the check with each of the available datetime scalars
|
||||
if exp_dtype == "datetime64[ns]":
|
||||
for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
|
||||
self._run_test(obj, scalar, klass, exp_dtype)
|
||||
else:
|
||||
for scalar in [fv, fv.to_pydatetime()]:
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_index_complex128(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_series_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_series_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")]
|
||||
)
|
||||
def test_where_index_timedelta64(self, value):
|
||||
tdi = pd.timedelta_range("1 Day", periods=4)
|
||||
cond = np.array([True, False, False, True])
|
||||
|
||||
expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"])
|
||||
result = tdi.where(cond, value)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# wrong-dtyped NaT
|
||||
dtnat = np.datetime64("NaT", "ns")
|
||||
expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object)
|
||||
assert expected[1] is dtnat
|
||||
|
||||
result = tdi.where(cond, dtnat)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_index_period(self):
|
||||
dti = pd.date_range("2016-01-01", periods=3, freq="QS")
|
||||
pi = dti.to_period("Q")
|
||||
|
||||
cond = np.array([False, True, False])
|
||||
|
||||
# Passing a valid scalar
|
||||
value = pi[-1] + pi.freq * 10
|
||||
expected = pd.PeriodIndex([value, pi[1], value])
|
||||
result = pi.where(cond, value)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Case passing ndarray[object] of Periods
|
||||
other = np.asarray(pi + pi.freq * 10, dtype=object)
|
||||
result = pi.where(cond, other)
|
||||
expected = pd.PeriodIndex([other[0], pi[1], other[2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Passing a mismatched scalar -> casts to object
|
||||
td = pd.Timedelta(days=4)
|
||||
expected = pd.Index([td, pi[1], td], dtype=object)
|
||||
result = pi.where(cond, td)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
per = pd.Period("2020-04-21", "D")
|
||||
expected = pd.Index([per, pi[1], per], dtype=object)
|
||||
result = pi.where(cond, per)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestFillnaSeriesCoercion(CoercionBase):
|
||||
# not indexing, but place here for consistency
|
||||
|
||||
method = "fillna"
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_has_comprehensive_tests(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
|
||||
"""test coercion triggered by fillna"""
|
||||
target = original.copy()
|
||||
res = target.fillna(value)
|
||||
tm.assert_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val, fill_dtype",
|
||||
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
|
||||
)
|
||||
def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass(["a", np.nan, "c", "d"], dtype=object)
|
||||
assert obj.dtype == object
|
||||
|
||||
exp = klass(["a", fill_val, "c", "d"], dtype=object)
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
|
||||
)
|
||||
def test_fillna_float64(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass([1.1, np.nan, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = klass([1.1, fill_val, 3.3, 4.4])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, object),
|
||||
],
|
||||
)
|
||||
def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128)
|
||||
assert obj.dtype == np.complex128
|
||||
|
||||
exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
|
||||
(1, object),
|
||||
("x", object),
|
||||
],
|
||||
ids=["datetime64", "datetime64tz", "object", "object"],
|
||||
)
|
||||
def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
pd.NaT,
|
||||
pd.Timestamp("2011-01-03"),
|
||||
pd.Timestamp("2011-01-04"),
|
||||
]
|
||||
)
|
||||
assert obj.dtype == "datetime64[ns]"
|
||||
|
||||
exp = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
fill_val,
|
||||
pd.Timestamp("2011-01-03"),
|
||||
pd.Timestamp("2011-01-04"),
|
||||
]
|
||||
)
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
|
||||
(pd.Timestamp("2012-01-01"), object),
|
||||
# pre-2.0 with a mismatched tz we would get object result
|
||||
(pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"),
|
||||
(1, object),
|
||||
("x", object),
|
||||
],
|
||||
)
|
||||
def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
tz = "US/Eastern"
|
||||
|
||||
obj = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
pd.NaT,
|
||||
pd.Timestamp("2011-01-03", tz=tz),
|
||||
pd.Timestamp("2011-01-04", tz=tz),
|
||||
]
|
||||
)
|
||||
assert obj.dtype == "datetime64[ns, US/Eastern]"
|
||||
|
||||
if getattr(fill_val, "tz", None) is None:
|
||||
fv = fill_val
|
||||
else:
|
||||
fv = fill_val.tz_convert(tz)
|
||||
exp = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
fv,
|
||||
pd.Timestamp("2011-01-03", tz=tz),
|
||||
pd.Timestamp("2011-01-04", tz=tz),
|
||||
]
|
||||
)
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val",
|
||||
[
|
||||
1,
|
||||
1.1,
|
||||
1 + 1j,
|
||||
True,
|
||||
pd.Interval(1, 2, closed="left"),
|
||||
pd.Timestamp("2012-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2012-01-01"),
|
||||
pd.Timedelta(days=1),
|
||||
pd.Period("2016-01-01", "D"),
|
||||
],
|
||||
)
|
||||
def test_fillna_interval(self, index_or_series, fill_val):
|
||||
ii = pd.interval_range(1.0, 5.0, closed="right").insert(1, np.nan)
|
||||
assert isinstance(ii.dtype, pd.IntervalDtype)
|
||||
obj = index_or_series(ii)
|
||||
|
||||
exp = index_or_series([ii[0], fill_val, ii[2], ii[3], ii[4]], dtype=object)
|
||||
|
||||
fill_dtype = object
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_series_int64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_int64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_series_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_series_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val",
|
||||
[
|
||||
1,
|
||||
1.1,
|
||||
1 + 1j,
|
||||
True,
|
||||
pd.Interval(1, 2, closed="left"),
|
||||
pd.Timestamp("2012-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2012-01-01"),
|
||||
pd.Timedelta(days=1),
|
||||
pd.Period("2016-01-01", "W"),
|
||||
],
|
||||
)
|
||||
def test_fillna_series_period(self, index_or_series, fill_val):
|
||||
pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
|
||||
assert isinstance(pi.dtype, pd.PeriodDtype)
|
||||
obj = index_or_series(pi)
|
||||
|
||||
exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)
|
||||
|
||||
fill_dtype = object
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestReplaceSeriesCoercion(CoercionBase):
|
||||
klasses = ["series"]
|
||||
method = "replace"
|
||||
|
||||
rep: dict[str, list] = {}
|
||||
rep["object"] = ["a", "b"]
|
||||
rep["int64"] = [4, 5]
|
||||
rep["float64"] = [1.1, 2.2]
|
||||
rep["complex128"] = [1 + 1j, 2 + 2j]
|
||||
rep["bool"] = [True, False]
|
||||
rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")]
|
||||
|
||||
for tz in ["UTC", "US/Eastern"]:
|
||||
# to test tz => different tz replacement
|
||||
key = f"datetime64[ns, {tz}]"
|
||||
rep[key] = [
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
pd.Timestamp("2011-01-03", tz=tz),
|
||||
]
|
||||
|
||||
rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
|
||||
|
||||
@pytest.fixture(params=["dict", "series"])
|
||||
def how(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, UTC]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
"timedelta64[ns]",
|
||||
]
|
||||
)
|
||||
def from_key(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, UTC]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
"timedelta64[ns]",
|
||||
],
|
||||
ids=[
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64",
|
||||
"datetime64tz",
|
||||
"datetime64tz",
|
||||
"timedelta64",
|
||||
],
|
||||
)
|
||||
def to_key(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def replacer(self, how, from_key, to_key):
|
||||
"""
|
||||
Object we will pass to `Series.replace`
|
||||
"""
|
||||
if how == "dict":
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == "series":
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
return replacer
|
||||
|
||||
# Expected needs adjustment for the infer string option, seems to work as expecetd
|
||||
@pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
|
||||
def test_replace_series(self, how, to_key, from_key, replacer):
|
||||
index = pd.Index([3, 4], name="xxx")
|
||||
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if from_key.startswith("datetime") and to_key.startswith("datetime"):
|
||||
# tested below
|
||||
return
|
||||
elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
|
||||
# tested below
|
||||
return
|
||||
|
||||
if (from_key == "float64" and to_key in ("int64")) or (
|
||||
from_key == "complex128" and to_key in ("int64", "float64")
|
||||
):
|
||||
if not IS64 or is_platform_windows():
|
||||
pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}")
|
||||
|
||||
# Expected: do not downcast by replacement
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key)
|
||||
|
||||
else:
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
|
||||
assert exp.dtype == to_key
|
||||
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
warn = FutureWarning
|
||||
if (
|
||||
exp.dtype == obj.dtype
|
||||
or exp.dtype == object
|
||||
or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc")
|
||||
):
|
||||
warn = None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = obj.replace(replacer)
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_key",
|
||||
["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
|
||||
)
|
||||
def test_replace_series_datetime_tz(
|
||||
self, how, to_key, from_key, replacer, using_infer_string
|
||||
):
|
||||
index = pd.Index([3, 4], name="xyz")
|
||||
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
|
||||
assert obj.dtype == from_key
|
||||
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
|
||||
if using_infer_string and to_key == "object":
|
||||
assert exp.dtype == "string"
|
||||
else:
|
||||
assert exp.dtype == to_key
|
||||
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
warn = FutureWarning if exp.dtype != object else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = obj.replace(replacer)
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_key",
|
||||
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"from_key",
|
||||
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
|
||||
indirect=True,
|
||||
)
|
||||
def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
|
||||
index = pd.Index([3, 4], name="xyz")
|
||||
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
|
||||
assert obj.dtype == from_key
|
||||
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
|
||||
warn = FutureWarning
|
||||
if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
|
||||
exp.dtype, pd.DatetimeTZDtype
|
||||
):
|
||||
# with mismatched tzs, we retain the original dtype as of 2.0
|
||||
exp = exp.astype(obj.dtype)
|
||||
warn = None
|
||||
else:
|
||||
assert exp.dtype == to_key
|
||||
if to_key == from_key:
|
||||
warn = None
|
||||
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = obj.replace(replacer)
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_replace_series_period(self):
|
||||
raise NotImplementedError
|
@ -0,0 +1,191 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_get_loc_naive_dti_aware_str_deprecated(self):
|
||||
# GH#46903
|
||||
ts = Timestamp("20130101")._value
|
||||
dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)])
|
||||
ser = Series(range(100), index=dti)
|
||||
|
||||
key = "2013-01-01 00:00:00.000000050+0000"
|
||||
msg = re.escape(repr(key))
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[key]
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
dti.get_loc(key)
|
||||
|
||||
def test_indexing_with_datetime_tz(self):
|
||||
# GH#8260
|
||||
# support datetime64 with tz
|
||||
|
||||
idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
|
||||
dr = date_range("20130110", periods=3)
|
||||
df = DataFrame({"A": idx, "B": dr})
|
||||
df["C"] = idx
|
||||
df.iloc[1, 1] = pd.NaT
|
||||
df.iloc[1, 2] = pd.NaT
|
||||
|
||||
expected = Series(
|
||||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
|
||||
index=list("ABC"),
|
||||
dtype="object",
|
||||
name=1,
|
||||
)
|
||||
|
||||
# indexing
|
||||
result = df.iloc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = df.loc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_indexing_fast_xs(self):
|
||||
# indexing - fast_xs
|
||||
df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")})
|
||||
result = df.iloc[5]
|
||||
expected = Series(
|
||||
[Timestamp("2014-01-06 00:00:00+0000", tz="UTC")],
|
||||
index=["a"],
|
||||
name=5,
|
||||
dtype="M8[ns, UTC]",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - boolean
|
||||
result = df[df.a > df.a[3]]
|
||||
expected = df.iloc[4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_consistency_with_tz_aware_scalar(self):
|
||||
# xef gh-12938
|
||||
# various ways of indexing the same tz-aware scalar
|
||||
df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame()
|
||||
|
||||
df = pd.concat([df, df]).reset_index(drop=True)
|
||||
expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels")
|
||||
|
||||
result = df[0][0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iat[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].loc[0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].at[0]
|
||||
assert result == expected
|
||||
|
||||
def test_indexing_with_datetimeindex_tz(self, indexer_sl):
|
||||
# GH 12050
|
||||
# indexing on a series with a datetimeindex with tz
|
||||
index = date_range("2015-01-01", periods=2, tz="utc")
|
||||
|
||||
ser = Series(range(2), index=index, dtype="int64")
|
||||
|
||||
# list-like indexing
|
||||
|
||||
for sel in (index, list(index)):
|
||||
# getitem
|
||||
result = indexer_sl(ser)[sel]
|
||||
expected = ser.copy()
|
||||
if sel is not index:
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
indexer_sl(result)[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# single element indexing
|
||||
|
||||
# getitem
|
||||
assert indexer_sl(ser)[index[1]] == 1
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
indexer_sl(result)[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nanosecond_getitem_setitem_with_tz(self):
|
||||
# GH 11679
|
||||
data = ["2016-06-28 08:30:00.123456789"]
|
||||
index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]")
|
||||
df = DataFrame({"a": [10]}, index=index)
|
||||
result = df.loc[df.index[0]]
|
||||
expected = Series(10, index=["a"], name=df.index[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[df.index[0], "a"] = -1
|
||||
expected = DataFrame(-1, index=index, columns=["a"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_str_slice_millisecond_resolution(self, frame_or_series):
|
||||
# GH#33589
|
||||
|
||||
keys = [
|
||||
"2017-10-25T16:25:04.151",
|
||||
"2017-10-25T16:25:04.252",
|
||||
"2017-10-25T16:50:05.237",
|
||||
"2017-10-25T16:50:05.238",
|
||||
]
|
||||
obj = frame_or_series(
|
||||
[1, 2, 3, 4],
|
||||
index=[Timestamp(x) for x in keys],
|
||||
)
|
||||
result = obj[keys[1] : keys[2]]
|
||||
expected = frame_or_series(
|
||||
[2, 3],
|
||||
index=[
|
||||
Timestamp(keys[1]),
|
||||
Timestamp(keys[2]),
|
||||
],
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_getitem_pyarrow_index(self, frame_or_series):
|
||||
# GH 53644
|
||||
pytest.importorskip("pyarrow")
|
||||
obj = frame_or_series(
|
||||
range(5),
|
||||
index=date_range("2020", freq="D", periods=5).astype(
|
||||
"timestamp[us][pyarrow]"
|
||||
),
|
||||
)
|
||||
result = obj.loc[obj.index[:-3]]
|
||||
expected = frame_or_series(
|
||||
range(2),
|
||||
index=date_range("2020", freq="D", periods=2).astype(
|
||||
"timestamp[us][pyarrow]"
|
||||
),
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,689 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
RangeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def gen_obj(klass, index):
|
||||
if klass is Series:
|
||||
obj = Series(np.arange(len(index)), index=index)
|
||||
else:
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), len(index))),
|
||||
index=index,
|
||||
columns=index,
|
||||
)
|
||||
return obj
|
||||
|
||||
|
||||
class TestFloatIndexers:
|
||||
def check(self, result, original, indexer, getitem):
|
||||
"""
|
||||
comparator for results
|
||||
we need to take care if we are indexing on a
|
||||
Series or a frame
|
||||
"""
|
||||
if isinstance(original, Series):
|
||||
expected = original.iloc[indexer]
|
||||
elif getitem:
|
||||
expected = original.iloc[:, indexer]
|
||||
else:
|
||||
expected = original.iloc[indexer]
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index(list("abcde")),
|
||||
Index(list("abcde"), dtype="category"),
|
||||
date_range("2020-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
period_range("2020-01-01", periods=5),
|
||||
],
|
||||
)
|
||||
def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl):
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# getting
|
||||
with pytest.raises(KeyError, match="^3.0$"):
|
||||
indexer_sl(s)[3.0]
|
||||
|
||||
# contains
|
||||
assert 3.0 not in s
|
||||
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[3.0] = 10
|
||||
|
||||
if indexer_sl is tm.setitem:
|
||||
assert 3.0 in s2.axes[-1]
|
||||
elif indexer_sl is tm.loc:
|
||||
assert 3.0 in s2.axes[0]
|
||||
else:
|
||||
assert 3.0 not in s2.axes[0]
|
||||
assert 3.0 not in s2.axes[-1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index(list("abcde")),
|
||||
Index(list("abcde"), dtype="category"),
|
||||
date_range("2020-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
period_range("2020-01-01", periods=5),
|
||||
],
|
||||
)
|
||||
def test_scalar_non_numeric_series_fallback(self, index):
|
||||
# fallsback to position selection, series only
|
||||
s = Series(np.arange(len(index)), index=index)
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s[3]
|
||||
with pytest.raises(KeyError, match="^3.0$"):
|
||||
s[3.0]
|
||||
|
||||
def test_scalar_with_mixed(self, indexer_sl):
|
||||
s2 = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s3 = Series([1, 2, 3], index=["a", "b", 1.5])
|
||||
|
||||
# lookup in a pure string index with an invalid indexer
|
||||
|
||||
with pytest.raises(KeyError, match="^1.0$"):
|
||||
indexer_sl(s2)[1.0]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.0$"):
|
||||
indexer_sl(s2)[1.0]
|
||||
|
||||
result = indexer_sl(s2)["b"]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
with pytest.raises(KeyError, match="^1.0$"):
|
||||
indexer_sl(s3)[1.0]
|
||||
|
||||
if indexer_sl is not tm.loc:
|
||||
# __getitem__ falls back to positional
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s3[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.0$"):
|
||||
indexer_sl(s3)[1.0]
|
||||
|
||||
result = indexer_sl(s3)[1.5]
|
||||
expected = 3
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_scalar_integer(self, index, frame_or_series, indexer_sl):
|
||||
getitem = indexer_sl is not tm.loc
|
||||
|
||||
# test how scalar float indexers work on int indexes
|
||||
|
||||
# integer index
|
||||
i = index
|
||||
obj = gen_obj(frame_or_series, i)
|
||||
|
||||
# coerce to equal int
|
||||
|
||||
result = indexer_sl(obj)[3.0]
|
||||
self.check(result, obj, 3, getitem)
|
||||
|
||||
if isinstance(obj, Series):
|
||||
|
||||
def compare(x, y):
|
||||
assert x == y
|
||||
|
||||
expected = 100
|
||||
else:
|
||||
compare = tm.assert_series_equal
|
||||
if getitem:
|
||||
expected = Series(100, index=range(len(obj)), name=3)
|
||||
else:
|
||||
expected = Series(100.0, index=range(len(obj)), name=3)
|
||||
|
||||
s2 = obj.copy()
|
||||
indexer_sl(s2)[3.0] = 100
|
||||
|
||||
result = indexer_sl(s2)[3.0]
|
||||
compare(result, expected)
|
||||
|
||||
result = indexer_sl(s2)[3]
|
||||
compare(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_scalar_integer_contains_float(self, index, frame_or_series):
|
||||
# contains
|
||||
# integer index
|
||||
obj = gen_obj(frame_or_series, index)
|
||||
|
||||
# coerce to equal int
|
||||
assert 3.0 in obj
|
||||
|
||||
def test_scalar_float(self, frame_or_series):
|
||||
# scalar float indexers work on a float index
|
||||
index = Index(np.arange(5.0))
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# assert all operations except for iloc are ok
|
||||
indexer = index[3]
|
||||
for idxr in [tm.loc, tm.setitem]:
|
||||
getitem = idxr is not tm.loc
|
||||
|
||||
# getting
|
||||
result = idxr(s)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# setting
|
||||
s2 = s.copy()
|
||||
|
||||
result = idxr(s2)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# random float is a KeyError
|
||||
with pytest.raises(KeyError, match=r"^3\.5$"):
|
||||
idxr(s)[3.5]
|
||||
|
||||
# contains
|
||||
assert 3.0 in s
|
||||
|
||||
# iloc succeeds with an integer
|
||||
expected = s.iloc[3]
|
||||
s2 = s.copy()
|
||||
|
||||
s2.iloc[3] = expected
|
||||
result = s2.iloc[3]
|
||||
self.check(result, s, 3, False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index(list("abcde"), dtype=object),
|
||||
date_range("2020-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
period_range("2020-01-01", periods=5),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
def test_slice_non_numeric(self, index, idx, frame_or_series, indexer_sli):
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# getitem
|
||||
if indexer_sli is tm.iloc:
|
||||
msg = (
|
||||
"cannot do positional indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers "
|
||||
r"\[(3|4)(\.0)?\] "
|
||||
r"of type (float|int)"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indexer_sli(s)[idx]
|
||||
|
||||
# setitem
|
||||
if indexer_sli is tm.iloc:
|
||||
# otherwise we keep the same message as above
|
||||
msg = "slice indices must be integers or None or have an __index__ method"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indexer_sli(s)[idx] = 0
|
||||
|
||||
def test_slice_integer(self):
|
||||
# same as above, but for Integer based indexes
|
||||
# these coerce to a like integer
|
||||
# oob indicates if we are out of bounds
|
||||
# of positional indexing
|
||||
for index, oob in [
|
||||
(Index(np.arange(5, dtype=np.int64)), False),
|
||||
(RangeIndex(5), False),
|
||||
(Index(np.arange(5, dtype=np.int64) + 10), True),
|
||||
]:
|
||||
# s is an in-range index
|
||||
s = Series(range(5), index=index)
|
||||
|
||||
# getitem
|
||||
for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
|
||||
result = s.loc[idx]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(3, 5)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
|
||||
result = s.loc[idx]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(-6, 6)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[-6\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[slice(-6.0, 6.0)]
|
||||
|
||||
# getitem odd floats
|
||||
for idx, res1 in [
|
||||
(slice(2.5, 4), slice(3, 5)),
|
||||
(slice(2, 3.5), slice(2, 4)),
|
||||
(slice(2.5, 3.5), slice(3, 4)),
|
||||
]:
|
||||
result = s.loc[idx]
|
||||
if oob:
|
||||
res = slice(0, 0)
|
||||
else:
|
||||
res = res1
|
||||
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
|
||||
def test_integer_positional_indexing(self, idx):
|
||||
"""make sure that we are raising on positional indexing
|
||||
w.r.t. an integer index
|
||||
"""
|
||||
s = Series(range(2, 6), index=range(2, 6))
|
||||
|
||||
result = s[2:4]
|
||||
expected = s.iloc[2:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
klass = RangeIndex
|
||||
msg = (
|
||||
"cannot do (slice|positional) indexing "
|
||||
rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.iloc[idx]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_slice_integer_frame_getitem(self, index):
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
|
||||
|
||||
# getitem
|
||||
for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
|
||||
result = s.loc[idx]
|
||||
indexer = slice(0, 2)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
# getitem out-of-bounds
|
||||
for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
|
||||
result = s.loc[idx]
|
||||
self.check(result, s, slice(-10, 10), True)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[-10\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[slice(-10.0, 10.0)]
|
||||
|
||||
# getitem odd floats
|
||||
for idx, res in [
|
||||
(slice(0.5, 1), slice(1, 2)),
|
||||
(slice(0, 0.5), slice(0, 1)),
|
||||
(slice(0.5, 1.5), slice(1, 2)),
|
||||
]:
|
||||
result = s.loc[idx]
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[0\.5\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_float_slice_getitem_with_integer_index_raises(self, idx, index):
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
|
||||
|
||||
# setitem
|
||||
sc = s.copy()
|
||||
sc.loc[idx] = 0
|
||||
result = sc.loc[idx].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
def test_slice_float(self, idx, frame_or_series, indexer_sl):
|
||||
# same as above, but for floats
|
||||
index = Index(np.arange(5.0)) + 0.1
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
expected = s.iloc[3:4]
|
||||
|
||||
# getitem
|
||||
result = indexer_sl(s)[idx]
|
||||
assert isinstance(result, type(s))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[idx] = 0
|
||||
result = indexer_sl(s2)[idx].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
def test_floating_index_doc_example(self):
|
||||
index = Index([1.5, 2, 3, 4.5, 5])
|
||||
s = Series(range(5), index=index)
|
||||
assert s[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.iloc[3] == 3
|
||||
|
||||
def test_floating_misc(self, indexer_sl):
|
||||
# related 236
|
||||
# scalar/slicing of a float index
|
||||
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
|
||||
|
||||
# label based slicing
|
||||
result = indexer_sl(s)[1.0:3.0]
|
||||
expected = Series(1, index=[2.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# exact indexing when found
|
||||
|
||||
result = indexer_sl(s)[5.0]
|
||||
assert result == 2
|
||||
|
||||
result = indexer_sl(s)[5]
|
||||
assert result == 2
|
||||
|
||||
# value not found (and no fallbacking at all)
|
||||
|
||||
# scalar integers
|
||||
with pytest.raises(KeyError, match=r"^4$"):
|
||||
indexer_sl(s)[4]
|
||||
|
||||
# fancy floats/integers create the correct entry (as nan)
|
||||
# fancy tests
|
||||
expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64))
|
||||
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
|
||||
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
|
||||
|
||||
expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
|
||||
for fancy_idx in [[5, 0], np.array([5, 0])]:
|
||||
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
|
||||
|
||||
warn = FutureWarning if indexer_sl is tm.setitem else None
|
||||
msg = r"The behavior of obj\[i:j\] with a float-dtype index"
|
||||
|
||||
# all should return the same as we are slicing 'the same'
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result1 = indexer_sl(s)[2:5]
|
||||
result2 = indexer_sl(s)[2.0:5.0]
|
||||
result3 = indexer_sl(s)[2.0:5]
|
||||
result4 = indexer_sl(s)[2.1:5]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
tm.assert_series_equal(result1, result4)
|
||||
|
||||
expected = Series([1, 2], index=[2.5, 5.0])
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = indexer_sl(s)[2:5]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list selection
|
||||
result1 = indexer_sl(s)[[0.0, 5, 10]]
|
||||
result2 = s.iloc[[0, 2, 4]]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
indexer_sl(s)[[1.6, 5, 10]]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
indexer_sl(s)[[0, 1, 2]]
|
||||
|
||||
result = indexer_sl(s)[[2.5, 5]]
|
||||
tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))
|
||||
|
||||
result = indexer_sl(s)[[2.5]]
|
||||
tm.assert_series_equal(result, Series([1], index=[2.5]))
|
||||
|
||||
def test_floatindex_slicing_bug(self, float_numpy_dtype):
|
||||
# GH 5557, related to slicing a float index
|
||||
dtype = float_numpy_dtype
|
||||
ser = {
|
||||
256: 2321.0,
|
||||
1: 78.0,
|
||||
2: 2716.0,
|
||||
3: 0.0,
|
||||
4: 369.0,
|
||||
5: 0.0,
|
||||
6: 269.0,
|
||||
7: 0.0,
|
||||
8: 0.0,
|
||||
9: 0.0,
|
||||
10: 3536.0,
|
||||
11: 0.0,
|
||||
12: 24.0,
|
||||
13: 0.0,
|
||||
14: 931.0,
|
||||
15: 0.0,
|
||||
16: 101.0,
|
||||
17: 78.0,
|
||||
18: 9643.0,
|
||||
19: 0.0,
|
||||
20: 0.0,
|
||||
21: 0.0,
|
||||
22: 63761.0,
|
||||
23: 0.0,
|
||||
24: 446.0,
|
||||
25: 0.0,
|
||||
26: 34773.0,
|
||||
27: 0.0,
|
||||
28: 729.0,
|
||||
29: 78.0,
|
||||
30: 0.0,
|
||||
31: 0.0,
|
||||
32: 3374.0,
|
||||
33: 0.0,
|
||||
34: 1391.0,
|
||||
35: 0.0,
|
||||
36: 361.0,
|
||||
37: 0.0,
|
||||
38: 61808.0,
|
||||
39: 0.0,
|
||||
40: 0.0,
|
||||
41: 0.0,
|
||||
42: 6677.0,
|
||||
43: 0.0,
|
||||
44: 802.0,
|
||||
45: 0.0,
|
||||
46: 2691.0,
|
||||
47: 0.0,
|
||||
48: 3582.0,
|
||||
49: 0.0,
|
||||
50: 734.0,
|
||||
51: 0.0,
|
||||
52: 627.0,
|
||||
53: 70.0,
|
||||
54: 2584.0,
|
||||
55: 0.0,
|
||||
56: 324.0,
|
||||
57: 0.0,
|
||||
58: 605.0,
|
||||
59: 0.0,
|
||||
60: 0.0,
|
||||
61: 0.0,
|
||||
62: 3989.0,
|
||||
63: 10.0,
|
||||
64: 42.0,
|
||||
65: 0.0,
|
||||
66: 904.0,
|
||||
67: 0.0,
|
||||
68: 88.0,
|
||||
69: 70.0,
|
||||
70: 8172.0,
|
||||
71: 0.0,
|
||||
72: 0.0,
|
||||
73: 0.0,
|
||||
74: 64902.0,
|
||||
75: 0.0,
|
||||
76: 347.0,
|
||||
77: 0.0,
|
||||
78: 36605.0,
|
||||
79: 0.0,
|
||||
80: 379.0,
|
||||
81: 70.0,
|
||||
82: 0.0,
|
||||
83: 0.0,
|
||||
84: 3001.0,
|
||||
85: 0.0,
|
||||
86: 1630.0,
|
||||
87: 7.0,
|
||||
88: 364.0,
|
||||
89: 0.0,
|
||||
90: 67404.0,
|
||||
91: 9.0,
|
||||
92: 0.0,
|
||||
93: 0.0,
|
||||
94: 7685.0,
|
||||
95: 0.0,
|
||||
96: 1017.0,
|
||||
97: 0.0,
|
||||
98: 2831.0,
|
||||
99: 0.0,
|
||||
100: 2963.0,
|
||||
101: 0.0,
|
||||
102: 854.0,
|
||||
103: 0.0,
|
||||
104: 0.0,
|
||||
105: 0.0,
|
||||
106: 0.0,
|
||||
107: 0.0,
|
||||
108: 0.0,
|
||||
109: 0.0,
|
||||
110: 0.0,
|
||||
111: 0.0,
|
||||
112: 0.0,
|
||||
113: 0.0,
|
||||
114: 0.0,
|
||||
115: 0.0,
|
||||
116: 0.0,
|
||||
117: 0.0,
|
||||
118: 0.0,
|
||||
119: 0.0,
|
||||
120: 0.0,
|
||||
121: 0.0,
|
||||
122: 0.0,
|
||||
123: 0.0,
|
||||
124: 0.0,
|
||||
125: 0.0,
|
||||
126: 67744.0,
|
||||
127: 22.0,
|
||||
128: 264.0,
|
||||
129: 0.0,
|
||||
260: 197.0,
|
||||
268: 0.0,
|
||||
265: 0.0,
|
||||
269: 0.0,
|
||||
261: 0.0,
|
||||
266: 1198.0,
|
||||
267: 0.0,
|
||||
262: 2629.0,
|
||||
258: 775.0,
|
||||
257: 0.0,
|
||||
263: 0.0,
|
||||
259: 0.0,
|
||||
264: 163.0,
|
||||
250: 10326.0,
|
||||
251: 0.0,
|
||||
252: 1228.0,
|
||||
253: 0.0,
|
||||
254: 2769.0,
|
||||
255: 0.0,
|
||||
}
|
||||
|
||||
# smoke test for the repr
|
||||
s = Series(ser, dtype=dtype)
|
||||
result = s.value_counts()
|
||||
assert result.index.dtype == dtype
|
||||
str(result)
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_iat(float_frame):
|
||||
for i, row in enumerate(float_frame.index):
|
||||
for j, col in enumerate(float_frame.columns):
|
||||
result = float_frame.iat[i, j]
|
||||
expected = float_frame.at[row, col]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iat_duplicate_columns():
|
||||
# https://github.com/pandas-dev/pandas/issues/11754
|
||||
df = DataFrame([[1, 2]], columns=["x", "x"])
|
||||
assert df.iat[0, 0] == 1
|
||||
|
||||
|
||||
def test_iat_getitem_series_with_period_index():
|
||||
# GH#4390, iat incorrectly indexing
|
||||
index = period_range("1/1/2001", periods=10)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10), index=index)
|
||||
expected = ser[index[0]]
|
||||
result = ser.iat[0]
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_iat_setitem_item_cache_cleared(
|
||||
indexer_ial, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#45684
|
||||
data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)}
|
||||
df = DataFrame(data).copy()
|
||||
ser = df["y"]
|
||||
|
||||
# previously this iat setting would split the block and fail to clear
|
||||
# the item_cache.
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
indexer_ial(df)[7, 0] = 9999
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
indexer_ial(df)[7, 1] = 1234
|
||||
|
||||
assert df.iat[7, 1] == 1234
|
||||
if not using_copy_on_write:
|
||||
assert ser.iloc[-1] == 1234
|
||||
assert df.iloc[-1, -1] == 1234
|
1478
venv/lib/python3.12/site-packages/pandas/tests/indexing/test_iloc.py
Normal file
1478
venv/lib/python3.12/site-packages/pandas/tests/indexing/test_iloc.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,61 @@
|
||||
# Tests aimed at pandas.core.indexers
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.indexers import (
|
||||
is_scalar_indexer,
|
||||
length_of_indexer,
|
||||
validate_indices,
|
||||
)
|
||||
|
||||
|
||||
def test_length_of_indexer():
|
||||
arr = np.zeros(4, dtype=bool)
|
||||
arr[0] = 1
|
||||
result = length_of_indexer(arr)
|
||||
assert result == 1
|
||||
|
||||
|
||||
def test_is_scalar_indexer():
|
||||
indexer = (0, 1)
|
||||
assert is_scalar_indexer(indexer, 2)
|
||||
assert not is_scalar_indexer(indexer[0], 2)
|
||||
|
||||
indexer = (np.array([2]), 1)
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
indexer = (np.array([2]), np.array([3]))
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
indexer = (np.array([2]), np.array([3, 4]))
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
assert not is_scalar_indexer(slice(None), 1)
|
||||
|
||||
indexer = 0
|
||||
assert is_scalar_indexer(indexer, 1)
|
||||
|
||||
indexer = (0,)
|
||||
assert is_scalar_indexer(indexer, 1)
|
||||
|
||||
|
||||
class TestValidateIndices:
|
||||
def test_validate_indices_ok(self):
|
||||
indices = np.asarray([0, 1])
|
||||
validate_indices(indices, 2)
|
||||
validate_indices(indices[:0], 0)
|
||||
validate_indices(np.array([-1, -1]), 0)
|
||||
|
||||
def test_validate_indices_low(self):
|
||||
indices = np.asarray([0, -2])
|
||||
with pytest.raises(ValueError, match="'indices' contains"):
|
||||
validate_indices(indices, 2)
|
||||
|
||||
def test_validate_indices_high(self):
|
||||
indices = np.asarray([0, 1, 2])
|
||||
with pytest.raises(IndexError, match="indices are out"):
|
||||
validate_indices(indices, 2)
|
||||
|
||||
def test_validate_indices_empty(self):
|
||||
with pytest.raises(IndexError, match="indices are out"):
|
||||
validate_indices(np.array([0, 1]), 0)
|
File diff suppressed because it is too large
Load Diff
3366
venv/lib/python3.12/site-packages/pandas/tests/indexing/test_loc.py
Normal file
3366
venv/lib/python3.12/site-packages/pandas/tests/indexing/test_loc.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,75 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, dtype",
|
||||
[
|
||||
([], "object"),
|
||||
([1, 2, 3], "int64"),
|
||||
([1.0, 2.0, 3.0], "float64"),
|
||||
(["a", "b", "c"], "object"),
|
||||
(["a", "b", "c"], "string"),
|
||||
([1, 2, 3], "datetime64[ns]"),
|
||||
([1, 2, 3], "datetime64[ns, CET]"),
|
||||
([1, 2, 3], "timedelta64[ns]"),
|
||||
(["2000", "2001", "2002"], "Period[D]"),
|
||||
([1, 0, 3], "Sparse"),
|
||||
([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False], [True, True, True], [False, False, False]]
|
||||
)
|
||||
@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series])
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
def test_series_mask_boolean(values, dtype, mask, indexer_class, frame):
|
||||
# In case len(values) < 3
|
||||
index = ["a", "b", "c"][: len(values)]
|
||||
mask = mask[: len(values)]
|
||||
|
||||
obj = pd.Series(values, dtype=dtype, index=index)
|
||||
if frame:
|
||||
if len(values) == 0:
|
||||
# Otherwise obj is an empty DataFrame with shape (0, 1)
|
||||
obj = pd.DataFrame(dtype=dtype, index=index)
|
||||
else:
|
||||
obj = obj.to_frame()
|
||||
|
||||
if indexer_class is pd.array:
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
elif indexer_class is pd.Series:
|
||||
mask = pd.Series(mask, index=obj.index, dtype="boolean")
|
||||
else:
|
||||
mask = indexer_class(mask)
|
||||
|
||||
expected = obj[mask]
|
||||
|
||||
result = obj[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
if indexer_class is pd.Series:
|
||||
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
result = obj.iloc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
else:
|
||||
result = obj.iloc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_na_treated_as_false(frame_or_series, indexer_sli):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
obj = frame_or_series([1, 2, 3])
|
||||
|
||||
mask = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = indexer_sli(obj)[mask]
|
||||
expected = indexer_sli(obj)[mask.fillna(False)]
|
||||
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,702 @@
|
||||
"""
|
||||
test setting *parts* of objects both positionally and label based
|
||||
|
||||
TODO: these should be split among the indexer tests
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestEmptyFrameSetitemExpansion:
|
||||
def test_empty_frame_setitem_index_name_retained(self):
|
||||
# GH#31368 empty frame has non-None index.name -> retained
|
||||
df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
|
||||
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||||
|
||||
df["series"] = series
|
||||
expected = DataFrame(
|
||||
{"series": [1.23] * 4},
|
||||
index=pd.RangeIndex(4, name="df_index"),
|
||||
columns=Index(["series"], dtype=object),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_empty_frame_setitem_index_name_inherited(self):
|
||||
# GH#36527 empty frame has None index.name -> not retained
|
||||
df = DataFrame()
|
||||
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||||
df["series"] = series
|
||||
expected = DataFrame(
|
||||
{"series": [1.23] * 4},
|
||||
index=pd.RangeIndex(4, name="series_index"),
|
||||
columns=Index(["series"], dtype=object),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_zerolen_series_columns_align(self):
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=range(4))
|
||||
expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=["B"])
|
||||
|
||||
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_loc_setitem_zerolen_list_length_must_match_columns(self):
|
||||
# list-like must conform
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
|
||||
msg = "cannot set a row with mismatched columns"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[0] = [1, 2, 3]
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[3] = [6, 7] # length matches len(df.columns) --> OK!
|
||||
|
||||
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_partial_set_empty_frame(self):
|
||||
# partially set with an empty object
|
||||
# frame
|
||||
df = DataFrame()
|
||||
|
||||
msg = "cannot set a frame with no defined columns"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[1] = 1
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[1] = Series([1], index=["foo"])
|
||||
|
||||
msg = "cannot set a frame with no defined index and a scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[:, 1] = 1
|
||||
|
||||
def test_partial_set_empty_frame2(self):
|
||||
# these work as they don't really change
|
||||
# anything but the index
|
||||
# GH#5632
|
||||
expected = DataFrame(
|
||||
columns=Index(["foo"], dtype=object), index=Index([], dtype="object")
|
||||
)
|
||||
|
||||
df = DataFrame(index=Index([], dtype="object"))
|
||||
df["foo"] = Series([], dtype="object")
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([]))
|
||||
df["foo"] = Series(df.index)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([]))
|
||||
df["foo"] = df.index
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame3(self):
|
||||
expected = DataFrame(
|
||||
columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
|
||||
)
|
||||
expected["foo"] = expected["foo"].astype("float64")
|
||||
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = []
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = Series(np.arange(len(df)), dtype="float64")
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame4(self):
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = range(len(df))
|
||||
|
||||
expected = DataFrame(
|
||||
columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
|
||||
)
|
||||
# range is int-dtype-like, so we get int64 dtype
|
||||
expected["foo"] = expected["foo"].astype("int64")
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame5(self):
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.columns, pd.RangeIndex(0))
|
||||
df2 = DataFrame()
|
||||
df2[1] = Series([1], index=["foo"])
|
||||
df.loc[:, 1] = Series([1], index=["foo"])
|
||||
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_partial_set_empty_frame_no_index(self):
|
||||
# no index to start
|
||||
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df[0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[:, 0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_row(self):
|
||||
# GH#5720, GH#5744
|
||||
# don't create rows when empty
|
||||
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["New"] = expected["New"].astype("float64")
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
y["New"] = np.nan
|
||||
tm.assert_frame_equal(y, expected)
|
||||
|
||||
expected = DataFrame(columns=["a", "b", "c c", "d"])
|
||||
expected["d"] = expected["d"].astype("int64")
|
||||
df = DataFrame(columns=["a", "b", "c c"])
|
||||
df["d"] = 3
|
||||
tm.assert_frame_equal(df, expected)
|
||||
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
|
||||
|
||||
# reindex columns is ok
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
result = y.reindex(columns=["A", "B", "C"])
|
||||
expected = DataFrame(columns=["A", "B", "C"])
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["C"] = expected["C"].astype("float64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_set_empty_frame_set_series(self):
|
||||
# GH#5756
|
||||
# setting with empty Series
|
||||
df = DataFrame(Series(dtype=object))
|
||||
expected = DataFrame({0: Series(dtype=object)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(Series(name="foo", dtype=object))
|
||||
expected = DataFrame({"foo": Series(dtype=object)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||||
# GH#5932
|
||||
# copy on empty with assignment fails
|
||||
df = DataFrame(index=[0])
|
||||
df = df.copy()
|
||||
df["a"] = 0
|
||||
expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
|
||||
# GH#6171
|
||||
# consistency on empty frames
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = [1, 2]
|
||||
expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]})
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = ["1", "2"]
|
||||
expected = DataFrame(
|
||||
{
|
||||
"x": Series(
|
||||
["1", "2"],
|
||||
dtype=object if not using_infer_string else "string[pyarrow_numpy]",
|
||||
),
|
||||
"y": Series([np.nan, np.nan], dtype=object),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df.loc[0, "x"] = 1
|
||||
expected = DataFrame({"x": [1], "y": [np.nan]})
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
|
||||
class TestPartialSetting:
|
||||
def test_partial_setting(self):
|
||||
# GH2578, allow ix and friends to partially set
|
||||
|
||||
# series
|
||||
s_orig = Series([1, 2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# iloc/iat raise
|
||||
s = s_orig.copy()
|
||||
|
||||
msg = "iloc cannot enlarge its target object"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[3] = 5.0
|
||||
|
||||
msg = "index 3 is out of bounds for axis 0 with size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[3] = 5.0
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_partial_setting_frame(self, using_array_manager):
|
||||
df_orig = DataFrame(
|
||||
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
|
||||
)
|
||||
|
||||
# iloc/iat raise
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "iloc cannot enlarge its target object"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[4, 2] = 5.0
|
||||
|
||||
msg = "index 2 is out of bounds for axis 0 with size 2"
|
||||
if using_array_manager:
|
||||
msg = "list index out of range"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iat[4, 2] = 5.0
|
||||
|
||||
# row setting where it exists
|
||||
expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
|
||||
df = df_orig.copy()
|
||||
df.iloc[1] = df.iloc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
|
||||
df = df_orig.copy()
|
||||
df.loc[1] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# like 2578, partial setting with dtype preservation
|
||||
expected = DataFrame({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})
|
||||
df = df_orig.copy()
|
||||
df.loc[3] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, overwrite
|
||||
expected = DataFrame({"A": [0, 2, 4], "B": [0, 2, 4]})
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "B"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed dtype frame, overwrite
|
||||
expected = DataFrame({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})
|
||||
df = df_orig.copy()
|
||||
df["B"] = df["B"].astype(np.float64)
|
||||
# as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
|
||||
# setting inplace
|
||||
df.loc[:, "B"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "C"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "C"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting2(self):
|
||||
# GH 8473
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=dates,
|
||||
columns=["A", "B", "C", "D"],
|
||||
)
|
||||
|
||||
expected = pd.concat(
|
||||
[df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True
|
||||
)
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
|
||||
expected = pd.concat([df_orig, exp_other], axis=1)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting_mixed_dtype(self):
|
||||
# in a mixed dtype environment, try to preserve dtypes
|
||||
# by appending
|
||||
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||||
|
||||
s = df.loc[1].copy()
|
||||
s.name = 2
|
||||
expected = pd.concat([df, DataFrame(s).T.infer_objects()])
|
||||
|
||||
df.loc[2] = df.loc[1]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_partial_set(self):
|
||||
# partial set with new index
|
||||
# Regression from GH4825
|
||||
ser = Series([0.1, 0.2], index=[1, 2])
|
||||
|
||||
# loc equiv to .reindex
|
||||
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||||
with pytest.raises(KeyError, match=r"not in index"):
|
||||
ser.loc[[3, 2, 3]]
|
||||
|
||||
result = ser.reindex([3, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[3, 2, 3, "x"]]
|
||||
|
||||
result = ser.reindex([3, 2, 3, "x"])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, "x", 1]]
|
||||
|
||||
result = ser.reindex([2, 2, "x", 1])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing is in the index
|
||||
msg = (
|
||||
rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}'\)\] "
|
||||
r"are in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, 3]]
|
||||
|
||||
result = ser.reindex([2, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||||
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[3, 4, 4]]
|
||||
|
||||
result = s.reindex([3, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[5, 3, 3]]
|
||||
|
||||
result = s.reindex([5, 3, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[5, 4, 4]]
|
||||
|
||||
result = s.reindex([5, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[7, 2, 2]]
|
||||
|
||||
result = s.reindex([7, 2, 2])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[4, 5, 5]]
|
||||
|
||||
result = s.reindex([4, 5, 5])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_with_name(self):
|
||||
# GH 11497
|
||||
|
||||
idx = Index([1, 2], dtype="int64", name="idx")
|
||||
ser = Series([0.1, 0.2], index=idx, name="s")
|
||||
|
||||
# loc
|
||||
with pytest.raises(KeyError, match=r"\[3\] not in index"):
|
||||
ser.loc[[3, 2, 3]]
|
||||
|
||||
with pytest.raises(KeyError, match=r"not in index"):
|
||||
ser.loc[[3, 2, 3, "x"]]
|
||||
|
||||
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\['x'\] not in index"):
|
||||
ser.loc[[2, 2, "x", 1]]
|
||||
|
||||
# raises as nothing is in the index
|
||||
msg = (
|
||||
rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}', "
|
||||
r"name='idx'\)\] are in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, 3]]
|
||||
|
||||
idx = Index([1, 2, 3], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
|
||||
|
||||
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
|
||||
|
||||
# iloc
|
||||
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
@pytest.mark.parametrize("key", [100, 100.0])
|
||||
def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
|
||||
# GH#4940 inserting non-strings
|
||||
orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df = orig.copy()
|
||||
|
||||
df.loc[key, :] = df.iloc[0]
|
||||
ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name)
|
||||
ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
|
||||
expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_invalid(self):
|
||||
# GH 4940
|
||||
# allow only setting of 'valid' values
|
||||
|
||||
orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
# allow object conversion here
|
||||
df = orig.copy()
|
||||
df.loc["a", :] = df.iloc[0]
|
||||
ser = Series(df.iloc[0], name="a")
|
||||
exp = pd.concat([orig, DataFrame(ser).T.infer_objects()])
|
||||
tm.assert_frame_equal(df, exp)
|
||||
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
|
||||
assert df.index.dtype == "object"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels,expected_idx",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||||
[
|
||||
Period("2000-01-04", freq="D"),
|
||||
Period("2000-01-08", freq="D"),
|
||||
Period("2000-01-12", freq="D"),
|
||||
],
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||||
[
|
||||
Timestamp("2000-01-04"),
|
||||
Timestamp("2000-01-08"),
|
||||
Timestamp("2000-01-12"),
|
||||
],
|
||||
),
|
||||
(
|
||||
pd.timedelta_range(start="1 day", periods=20),
|
||||
["4D", "8D", "12D"],
|
||||
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes(
|
||||
self, idx, labels, expected_idx, frame_or_series
|
||||
):
|
||||
# GH 11278
|
||||
obj = frame_or_series(range(20), index=idx)
|
||||
|
||||
expected_value = [3, 7, 11]
|
||||
expected = frame_or_series(expected_value, expected_idx)
|
||||
|
||||
tm.assert_equal(expected, obj.loc[labels])
|
||||
if frame_or_series is Series:
|
||||
tm.assert_series_equal(expected, obj[labels])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-30"],
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-30"],
|
||||
),
|
||||
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
|
||||
self, idx, labels
|
||||
):
|
||||
# GH 11278
|
||||
ser = Series(range(20), index=idx)
|
||||
df = DataFrame(range(20), index=idx)
|
||||
msg = r"not in index"
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[labels]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels,msg",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
Index(["4D", "8D"], dtype=object),
|
||||
(
|
||||
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||||
r"are in the \[index\]"
|
||||
),
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
Index(["4D", "8D"], dtype=object),
|
||||
(
|
||||
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||||
r"are in the \[index\]"
|
||||
),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range(start="1 day", periods=20),
|
||||
Index(["2000-01-04", "2000-01-08"], dtype=object),
|
||||
(
|
||||
r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
|
||||
r"dtype='object'\)\] are in the \[index\]"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
|
||||
self, idx, labels, msg
|
||||
):
|
||||
# GH 11278
|
||||
ser = Series(range(20), index=idx)
|
||||
df = DataFrame(range(20), index=idx)
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[labels]
|
||||
|
||||
|
||||
class TestStringSlicing:
|
||||
def test_slice_irregular_datetime_index_with_nan(self):
|
||||
# GH36953
|
||||
index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
|
||||
df = DataFrame(range(len(index)), index=index)
|
||||
expected = DataFrame(range(len(index[:3])), index=index[:3])
|
||||
with pytest.raises(KeyError, match="non-existing keys is not allowed"):
|
||||
# Upper bound is not in index (which is unordered)
|
||||
# GH53983
|
||||
# GH37819
|
||||
df["2012-01-01":"2012-01-04"]
|
||||
# Need this precision for right bound since the right slice
|
||||
# bound is "rounded" up to the largest timepoint smaller than
|
||||
# the next "resolution"-step of the provided point.
|
||||
# e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns
|
||||
result = df["2012-01-01":"2012-01-03 00:00:00.000000000"]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,303 @@
|
||||
""" test scalar indexing, including at and iat """
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def generate_indices(f, values=False):
|
||||
"""
|
||||
generate the indices
|
||||
if values is True , use the axis values
|
||||
is False, use the range
|
||||
"""
|
||||
axes = f.axes
|
||||
if values:
|
||||
axes = (list(range(len(ax))) for ax in axes)
|
||||
|
||||
return itertools.product(*axes)
|
||||
|
||||
|
||||
class TestScalar:
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["ints", "uints"])
|
||||
def test_iat_set_ints(self, kind, col, request):
|
||||
f = request.getfixturevalue(f"{kind}_{col}")
|
||||
indices = generate_indices(f, True)
|
||||
for i in indices:
|
||||
f.iat[i] = 1
|
||||
expected = f.values[i]
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["labels", "ts", "floats"])
|
||||
def test_iat_set_other(self, kind, col, request):
|
||||
f = request.getfixturevalue(f"{kind}_{col}")
|
||||
msg = "iAt based indexing can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx = next(generate_indices(f, False))
|
||||
f.iat[idx] = 1
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"])
|
||||
def test_at_set_ints_other(self, kind, col, request):
|
||||
f = request.getfixturevalue(f"{kind}_{col}")
|
||||
indices = generate_indices(f, False)
|
||||
for i in indices:
|
||||
f.at[i] = 1
|
||||
expected = f.loc[i]
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
|
||||
class TestAtAndiAT:
|
||||
# at and iat tests that don't need Base class
|
||||
|
||||
def test_float_index_at_iat(self):
|
||||
ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
|
||||
for el, item in ser.items():
|
||||
assert ser.at[el] == item
|
||||
for i in range(len(ser)):
|
||||
assert ser.iat[i] == i + 1
|
||||
|
||||
def test_at_iat_coercion(self):
|
||||
# as timestamp is not a tuple!
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=dates,
|
||||
columns=["A", "B", "C", "D"],
|
||||
)
|
||||
s = df["A"]
|
||||
|
||||
result = s.at[dates[5]]
|
||||
xp = s.values[5]
|
||||
assert result == xp
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, expected",
|
||||
[
|
||||
[
|
||||
Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]"),
|
||||
Timestamp("2014-02-02"),
|
||||
],
|
||||
[
|
||||
Series(["1 days", "2 days"], dtype="timedelta64[ns]"),
|
||||
Timedelta("2 days"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_iloc_iat_coercion_datelike(self, indexer_ial, ser, expected):
|
||||
# GH 7729
|
||||
# make sure we are boxing the returns
|
||||
result = indexer_ial(ser)[1]
|
||||
assert result == expected
|
||||
|
||||
def test_imethods_with_dups(self):
|
||||
# GH6493
|
||||
# iat/iloc with dups
|
||||
|
||||
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64")
|
||||
result = s.iloc[2]
|
||||
assert result == 2
|
||||
result = s.iat[2]
|
||||
assert result == 2
|
||||
|
||||
msg = "index 10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[10]
|
||||
msg = "index -10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[-10]
|
||||
|
||||
result = s.iloc[[2, 3]]
|
||||
expected = Series([2, 3], [2, 2], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = s.to_frame()
|
||||
result = df.iloc[2]
|
||||
expected = Series(2, index=[0], name=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iat[2, 0]
|
||||
assert result == 2
|
||||
|
||||
def test_frame_at_with_duplicate_axes(self):
|
||||
# GH#33041
|
||||
arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2)
|
||||
df = DataFrame(arr, columns=["A", "A"])
|
||||
|
||||
result = df.at[0, "A"]
|
||||
expected = df.iloc[0].copy()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.T.at["A", 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setter
|
||||
df.at[1, "A"] = 2
|
||||
expected = Series([2.0, 2.0], index=["A", "A"], name=1)
|
||||
tm.assert_series_equal(df.iloc[1], expected)
|
||||
|
||||
def test_at_getitem_dt64tz_values(self):
|
||||
# gh-15822
|
||||
df = DataFrame(
|
||||
{
|
||||
"name": ["John", "Anderson"],
|
||||
"date": [
|
||||
Timestamp(2017, 3, 13, 13, 32, 56),
|
||||
Timestamp(2017, 2, 16, 12, 10, 3),
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = df["date"].dt.tz_localize("Asia/Shanghai")
|
||||
|
||||
expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai")
|
||||
|
||||
result = df.loc[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_series(self):
|
||||
# GH 19860
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
for el, item in s.items():
|
||||
assert s.at[el] == s.loc[el] == item
|
||||
for i in range(len(s)):
|
||||
assert s.iat[i] == s.iloc[i] == i + 1
|
||||
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.at[4]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.loc[4]
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
|
||||
# GH 19860
|
||||
df = DataFrame(
|
||||
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2]
|
||||
)
|
||||
for rowIdx, row in df.iterrows():
|
||||
for el, item in row.items():
|
||||
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
|
||||
|
||||
for row in range(2):
|
||||
for i in range(5):
|
||||
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
|
||||
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.at[0, 3]
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.loc[0, 3]
|
||||
|
||||
def test_iat_setter_incompatible_assignment(self):
|
||||
# GH 23236
|
||||
result = DataFrame({"a": [0.0, 1.0], "b": [4, 5]})
|
||||
result.iat[0, 0] = None
|
||||
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iat_dont_wrap_object_datetimelike():
|
||||
# GH#32809 .iat calls go through DataFrame._get_value, should not
|
||||
# call maybe_box_datetimelike
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
tdi = dti - dti
|
||||
ser = Series(dti.to_pydatetime(), dtype=object)
|
||||
ser2 = Series(tdi.to_pytimedelta(), dtype=object)
|
||||
df = DataFrame({"A": ser, "B": ser2})
|
||||
assert (df.dtypes == object).all()
|
||||
|
||||
for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]:
|
||||
assert result is ser[0]
|
||||
assert isinstance(result, datetime)
|
||||
assert not isinstance(result, Timestamp)
|
||||
|
||||
for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]:
|
||||
assert result is ser2[1]
|
||||
assert isinstance(result, timedelta)
|
||||
assert not isinstance(result, Timedelta)
|
||||
|
||||
|
||||
def test_at_with_tuple_index_get():
|
||||
# GH 26989
|
||||
# DataFrame.at getter works with Index of tuples
|
||||
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
|
||||
assert df.index.nlevels == 1
|
||||
assert df.at[(1, 2), "a"] == 1
|
||||
|
||||
# Series.at getter works with Index of tuples
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 1
|
||||
assert series.at[(1, 2)] == 1
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_at_with_tuple_index_set():
|
||||
# GH 26989
|
||||
# DataFrame.at setter works with Index of tuples
|
||||
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
|
||||
assert df.index.nlevels == 1
|
||||
df.at[(1, 2), "a"] = 2
|
||||
assert df.at[(1, 2), "a"] == 2
|
||||
|
||||
# Series.at setter works with Index of tuples
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 1
|
||||
series.at[1, 2] = 3
|
||||
assert series.at[1, 2] == 3
|
||||
|
||||
|
||||
class TestMultiIndexScalar:
|
||||
def test_multiindex_at_get(self):
|
||||
# GH 26989
|
||||
# DataFrame.at and DataFrame.loc getter works with MultiIndex
|
||||
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
|
||||
assert df.index.nlevels == 2
|
||||
assert df.at[(1, 3), "a"] == 1
|
||||
assert df.loc[(1, 3), "a"] == 1
|
||||
|
||||
# Series.at and Series.loc getter works with MultiIndex
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 2
|
||||
assert series.at[1, 3] == 1
|
||||
assert series.loc[1, 3] == 1
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_multiindex_at_set(self):
|
||||
# GH 26989
|
||||
# DataFrame.at and DataFrame.loc setter works with MultiIndex
|
||||
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
|
||||
assert df.index.nlevels == 2
|
||||
df.at[(1, 3), "a"] = 3
|
||||
assert df.at[(1, 3), "a"] == 3
|
||||
df.loc[(1, 3), "a"] = 4
|
||||
assert df.loc[(1, 3), "a"] == 4
|
||||
|
||||
# Series.at and Series.loc setter works with MultiIndex
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 2
|
||||
series.at[1, 3] = 5
|
||||
assert series.at[1, 3] == 5
|
||||
series.loc[1, 3] = 6
|
||||
assert series.loc[1, 3] == 6
|
||||
|
||||
def test_multiindex_at_get_one_level(self):
|
||||
# GH#38053
|
||||
s2 = Series((0, 1), index=[[False, True]])
|
||||
result = s2.at[False]
|
||||
assert result == 0
|
Reference in New Issue
Block a user