venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas.errors import SettingWithCopyError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write):
|
||||
# Inplace ops, originally from:
|
||||
# https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
|
||||
events = {
|
||||
("eyes", "left"): a,
|
||||
("eyes", "right"): b,
|
||||
("ears", "left"): c,
|
||||
("ears", "right"): d,
|
||||
}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
|
||||
zed = DataFrame(events, index=["a", "b"], columns=multiind)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
elif warn_copy_on_write:
|
||||
with tm.assert_produces_warning(None):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.assert_produces_warning(None):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
|
||||
def test_cache_updating(using_copy_on_write, warn_copy_on_write):
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.default_rng(2).random((10, 3))
|
||||
df = DataFrame(a, columns=["x", "y", "z"])
|
||||
df_original = df.copy()
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["z"].iloc[0] = 1.0
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
|
||||
else:
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), "z"] = 2
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 2
|
||||
|
||||
|
||||
def test_indexer_caching(monkeypatch):
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
size_cutoff = 20
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)])
|
||||
s = Series(np.zeros(size_cutoff), index=index)
|
||||
|
||||
# setitem
|
||||
s[s == 0] = 1
|
||||
expected = Series(np.ones(size_cutoff), index=index)
|
||||
tm.assert_series_equal(s, expected)
|
@ -0,0 +1,50 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
Series,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(["a", "a", "a", "b", "b"])
|
||||
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
|
||||
s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc["a", Period("2012-01")]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc["a", datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_multiindex_datetime_columns():
|
||||
# GH35015, using datetime as column indices raises exception
|
||||
|
||||
mi = MultiIndex.from_tuples(
|
||||
[(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"]
|
||||
)
|
||||
|
||||
df = DataFrame([], columns=mi)
|
||||
|
||||
expected_df = DataFrame(
|
||||
[],
|
||||
columns=MultiIndex.from_arrays(
|
||||
[[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"]
|
||||
),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
@ -0,0 +1,410 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"access_method",
|
||||
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"level1_value, expected",
|
||||
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
|
||||
)
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"])
|
||||
ser = Series([1, 2, 3], index=mi)
|
||||
expected.index.name = "A"
|
||||
|
||||
result = access_method(ser, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0_value", ["D", "A"])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
arr = np.random.default_rng(2).standard_normal((len(index), 1))
|
||||
df = DataFrame(arr, index=index, columns=["val"])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != "A":
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val["A"]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val["X"]
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(
|
||||
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer_sl(s)[2000, 3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer_sl
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer_sl(s)[2000, 3, 10]
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error,expected_error_msg",
|
||||
[
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
|
||||
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
|
||||
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
|
||||
(
|
||||
lambda s: s.iloc[len(s)],
|
||||
IndexError,
|
||||
"single positional indexer is out-of-bounds",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
indexer,
|
||||
expected_error,
|
||||
expected_error_msg,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df["foo", "one"].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error_msg",
|
||||
[
|
||||
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df["foobar"], r"^'foobar'$"),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_tuple_string_column_names():
|
||||
# GH#50372
|
||||
mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")])
|
||||
df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi)
|
||||
df["single_index"] = 0
|
||||
|
||||
df_flat = df.copy()
|
||||
df_flat.columns = df_flat.columns.to_flat_index()
|
||||
df_flat["new_single_index"] = 0
|
||||
|
||||
result = df_flat[[("a", "aa"), "new_single_index"]]
|
||||
expected = DataFrame(
|
||||
[[0, 0], [1, 0], [2, 0]], columns=Index([("a", "aa"), "new_single_index"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
|
||||
df.columns = [
|
||||
["level1 item1", "level1 item2"],
|
||||
["", "level2 item2"],
|
||||
["level3 item1", "level3 item2"],
|
||||
]
|
||||
|
||||
result = df["level1 item1"]
|
||||
expected = DataFrame(
|
||||
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_slice",
|
||||
[
|
||||
(lambda df: df["foo"], slice(3)),
|
||||
(lambda df: df["bar"], slice(3, 5)),
|
||||
(lambda df: df.loc[:, "bar"], slice(3, 5)),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mixed_depth_get():
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
|
||||
|
||||
result = df["a"]
|
||||
expected = df["a", "", ""].rename("a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["routine1", "result1"]
|
||||
expected = df["routine1", "result1", ""]
|
||||
expected = expected.rename(("routine1", "result1"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_nan_multiindex(nulls_fixture):
|
||||
# GH#29751
|
||||
# loc on a multiindex containing nan values
|
||||
n = nulls_fixture # for code readability
|
||||
cols = ["a", "b", "c"]
|
||||
df = DataFrame(
|
||||
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
|
||||
columns=cols,
|
||||
).set_index(["a", "b"])
|
||||
df["c"] = df["c"].astype("int64")
|
||||
|
||||
idx = (21, n)
|
||||
result = df.loc[:idx]
|
||||
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[idx:]
|
||||
expected = DataFrame(
|
||||
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
|
||||
).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx1, idx2 = (21, n), (31, n)
|
||||
result = df.loc[idx1:idx2]
|
||||
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected",
|
||||
[
|
||||
(
|
||||
(["b"], ["bar", np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["a", "b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", np.nan)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], ["bar"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2], [5]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar")]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], [np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[3], [6]],
|
||||
columns=MultiIndex(
|
||||
codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_nan_cols_multiindex(
|
||||
indexer,
|
||||
expected,
|
||||
nulls_fixture,
|
||||
):
|
||||
# Slicing MultiIndex including levels with nan values, for more information
|
||||
# see GH#25154
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", nulls_fixture)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
|
||||
result = df.loc[:, indexer]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
|
||||
index = ["h1", "h3", "h5"]
|
||||
columns = MultiIndex(
|
||||
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=["main", "sub"],
|
||||
)
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
|
||||
)
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(["h1", "h3", "h5"])
|
||||
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
|
||||
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
|
||||
result = df["A"]["A1"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame(
|
||||
[["d", 4, 4], ["e", 5, 5]],
|
||||
index=Index(["B2", "B2"], name="sub"),
|
||||
columns=["h1", "h3", "h5"],
|
||||
).T
|
||||
result = df["A"]["B2"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_empty_slice():
|
||||
# GH 15454
|
||||
df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
|
||||
result = df[[]]
|
||||
expected = DataFrame(
|
||||
index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_empty_multiindex():
|
||||
# GH#36936
|
||||
arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]]
|
||||
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
|
||||
df = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
|
||||
# loc on empty multiindex == loc with False mask
|
||||
empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index
|
||||
result = df.loc[empty_multiindex, :]
|
||||
expected = df.loc[[False] * len(df.index), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# replacing value with loc on empty multiindex
|
||||
df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5
|
||||
result = df
|
||||
expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((3, 3))
|
||||
return DataFrame(
|
||||
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
(
|
||||
lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs("b", drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.default_rng(2).standard_normal((4, 3))
|
||||
df = DataFrame(
|
||||
arr,
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
|
||||
)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
|
||||
)
|
||||
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ("Ohio", "Red")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [
|
||||
["str00", "str01"],
|
||||
["str10", "str11"],
|
||||
["str20", "srt21"],
|
||||
["str30", "str31"],
|
||||
["str40", "str41"],
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
|
||||
)
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, indexes, values, expected_k",
|
||||
[
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
|
||||
],
|
||||
)
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=["i", "j", "k"])
|
||||
df = df.set_index(["i", "j"])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df["k"] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def m():
|
||||
return 5
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def n():
|
||||
return 100
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cols():
|
||||
return ["jim", "joe", "jolie", "joline", "jolia"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vals(n):
|
||||
vals = [
|
||||
np.random.default_rng(2).integers(0, 10, n),
|
||||
np.random.default_rng(2).choice(list("abcdefghij"), n),
|
||||
np.random.default_rng(2).choice(
|
||||
pd.date_range("20141009", periods=10).tolist(), n
|
||||
),
|
||||
np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n),
|
||||
np.random.default_rng(2).standard_normal(n),
|
||||
]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
return vals
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def keys(n, m, vals):
|
||||
# bunch of keys for testing
|
||||
keys = [
|
||||
np.random.default_rng(2).integers(0, 11, m),
|
||||
np.random.default_rng(2).choice(list("abcdefghijk"), m),
|
||||
np.random.default_rng(2).choice(
|
||||
pd.date_range("20141009", periods=11).tolist(), m
|
||||
),
|
||||
np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m),
|
||||
]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += [t[:-1] for t in vals[:: n // m]]
|
||||
return keys
|
||||
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
@pytest.fixture
|
||||
def df(vals, cols):
|
||||
return DataFrame(vals, columns=cols)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a(df):
|
||||
return pd.concat([df, df])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def b(df, cols):
|
||||
return df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
|
||||
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
|
||||
def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
|
||||
# GH7724, GH2646
|
||||
|
||||
frame = request.getfixturevalue(frame_fixture)
|
||||
if lexsort_depth == 0:
|
||||
df = frame.copy(deep=False)
|
||||
else:
|
||||
df = frame.sort_values(by=cols[:lexsort_depth])
|
||||
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index._lexsort_depth < lexsort_depth
|
||||
for key in keys:
|
||||
mask = np.ones(len(df), dtype=bool)
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[: i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[: i + 1] in mi.index
|
||||
right = df[mask].copy(deep=False)
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
|
||||
assert return_value is None
|
||||
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
return_value = right.set_index(cols[:-1], inplace=True)
|
||||
assert return_value is None
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(
|
||||
right["jolia"].values, name=right.index[0], index=["jolia"]
|
||||
)
|
||||
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
@ -0,0 +1,992 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
IndexingError,
|
||||
PerformanceWarning,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index)
|
||||
|
||||
|
||||
class TestMultiIndexLoc:
|
||||
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[("bar", "two"), "B"] = 5
|
||||
assert frame.loc[("bar", "two"), "B"] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = list(range(3))
|
||||
df.loc[("bar", "two"), 1] = 7
|
||||
assert df.loc[("bar", "two"), 1] == 7
|
||||
|
||||
def test_loc_getitem_general(self, any_real_numpy_dtype):
|
||||
# GH#2817
|
||||
dtype = any_real_numpy_dtype
|
||||
data = {
|
||||
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
"num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12},
|
||||
}
|
||||
df = DataFrame(data)
|
||||
df = df.astype({"col": dtype, "num": dtype})
|
||||
df = df.set_index(keys=["col", "num"])
|
||||
key = 4.0, 12
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
return_value = df.sort_index(inplace=True)
|
||||
assert return_value is None
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be float64 Index
|
||||
col_arr = np.array([4.0] * 3, dtype=dtype)
|
||||
year_arr = np.array([12] * 3, dtype=dtype)
|
||||
index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"])
|
||||
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_getitem_multiindex_missing_label_raises(self):
|
||||
# GH#21593
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
def test_loc_getitem_list_of_tuples_with_multiindex(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
ser = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = ser.reindex(ser.index[49:51])
|
||||
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype="float64",
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_labels(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j"], ["X", "X", "Y"]],
|
||||
)
|
||||
|
||||
# the first 2 rows
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc["i"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# 2nd (last) column
|
||||
expected = df.iloc[:, [2]].droplevel(0, axis=1)
|
||||
result = df.loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# bottom right corner
|
||||
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
|
||||
result = df.loc["j"].loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with a tuple
|
||||
expected = df.iloc[[0, 1]]
|
||||
result = df.loc[("i", "X")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_ints(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc[4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_missing_label_raises(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
|
||||
def test_loc_multiindex_list_missing_label(self, key, pos):
|
||||
# GH 27148 - lists with missing labels _do_ raise
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[key]
|
||||
|
||||
def test_loc_multiindex_too_many_dims_raises(self):
|
||||
# GH 14885
|
||||
s = Series(
|
||||
range(8),
|
||||
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
|
||||
s.loc["a", "b"]
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
|
||||
s.loc["a", "d", "g"]
|
||||
with pytest.raises(IndexingError, match="Too many indexers"):
|
||||
s.loc["a", "d", "g", "j"]
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ["Attribute" + str(i) for i in range(1)]
|
||||
attribute_values = ["Value" + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
|
||||
)
|
||||
|
||||
expected = df.loc[([1, 2],), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.loc[:, "a":"c"]
|
||||
|
||||
result = s.loc[0:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer getitem with list of indexers skips wrong element
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(len(single_level)),
|
||||
index=single_level,
|
||||
)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
|
||||
)
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys)
|
||||
)
|
||||
if indexer_type_1 is set or indexer_type_2 is set:
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
df.loc[indexer, "Data"]
|
||||
|
||||
return
|
||||
else:
|
||||
result = df.loc[indexer, "Data"]
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = frame_or_series([1, 2], index=mi)
|
||||
obj.loc[("a",)] = 0
|
||||
expected = frame_or_series([0, 2], index=mi)
|
||||
tm.assert_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize("indexer", [("a",), ("a")])
|
||||
def test_multiindex_one_dimensional_tuple_columns(self, indexer):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = DataFrame([1, 2], index=mi)
|
||||
obj.loc[indexer, :] = 0
|
||||
expected = DataFrame([0, 2], index=mi)
|
||||
tm.assert_frame_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
|
||||
)
|
||||
def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
|
||||
# GH#39147
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
|
||||
df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
|
||||
df.loc[indexer, ["c", "d"]] = 1.0
|
||||
expected = DataFrame(
|
||||
[[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
|
||||
index=mi,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_sorted_multiindex_after_union(self):
|
||||
# GH#44752
|
||||
midx = MultiIndex.from_product(
|
||||
[pd.date_range("20110101", periods=2), Index(["a", "b"])]
|
||||
)
|
||||
ser1 = Series(1, index=midx)
|
||||
ser2 = Series(1, index=midx[:2])
|
||||
df = pd.concat([ser1, ser2], axis=1)
|
||||
expected = df.copy()
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({0: ser1, 1: ser2})
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_no_second_level_index(self):
|
||||
# GH#43599
|
||||
df = DataFrame(
|
||||
index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]),
|
||||
columns=["Val"],
|
||||
)
|
||||
res = df.loc[np.s_[:, "c", :]]
|
||||
expected = DataFrame(
|
||||
index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"]
|
||||
)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_multi_index_key_error(self):
|
||||
# GH 51892
|
||||
df = DataFrame(
|
||||
{
|
||||
(1, 2): ["a", "b", "c"],
|
||||
(1, 3): ["d", "e", "f"],
|
||||
(2, 2): ["g", "h", "i"],
|
||||
(2, 4): ["j", "k", "l"],
|
||||
}
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"(1, 4)"):
|
||||
df.loc[0, (1, 4)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, pos",
|
||||
[
|
||||
([], []), # empty ok
|
||||
(["A"], slice(3)),
|
||||
(["A", "D"], []), # "D" isn't present -> raise
|
||||
(["D", "E"], []), # no values found -> raise
|
||||
(["D"], []), # same, with single item list: GH 27148
|
||||
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
|
||||
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
|
||||
)
|
||||
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
|
||||
expected = ser.iloc[pos]
|
||||
|
||||
if expected.size == 0 and indexer != []:
|
||||
with pytest.raises(KeyError, match=str(indexer)):
|
||||
ser.loc[indexer]
|
||||
elif indexer == (slice(None), ["foo", "bah"]):
|
||||
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
|
||||
with pytest.raises(KeyError, match="'bah'"):
|
||||
ser.loc[indexer]
|
||||
else:
|
||||
result = ser.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 6)),
|
||||
index=range(5),
|
||||
columns=multi_index,
|
||||
)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame(
|
||||
[[np.mean, np.median], ["mean", "median"]],
|
||||
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
|
||||
index=["function", "name"],
|
||||
)
|
||||
result = df.loc["function", ("functs", "mean")]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.arange(10),
|
||||
"b": np.arange(10),
|
||||
"c": np.random.default_rng(2).standard_normal(10),
|
||||
"d": np.random.default_rng(2).standard_normal(10),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
|
||||
df.loc[("bar", "three"), "B"]
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[("bar", "three"), "B"] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[("bar", "three"), "B"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_setitem_single_column_slice():
|
||||
# case from https://github.com/pandas-dev/pandas/issues/27841
|
||||
df = DataFrame(
|
||||
"string",
|
||||
index=list("abcd"),
|
||||
columns=MultiIndex.from_product([["Main"], ("another", "one")]),
|
||||
)
|
||||
df["labels"] = "a"
|
||||
df.loc[:, "labels"] = df.index
|
||||
tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
|
||||
|
||||
# test with non-object block
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
index=range(4),
|
||||
columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
|
||||
)
|
||||
expected = df.copy()
|
||||
df.loc[:, "B"] = np.arange(4)
|
||||
expected.iloc[:, 2] = np.arange(4)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_nan_multiindex(using_infer_string):
|
||||
# GH 5286
|
||||
tups = [
|
||||
("Good Things", "C", np.nan),
|
||||
("Good Things", "R", np.nan),
|
||||
("Bad Things", "C", np.nan),
|
||||
("Bad Things", "T", np.nan),
|
||||
("Okay Things", "N", "B"),
|
||||
("Okay Things", "N", "D"),
|
||||
("Okay Things", "B", np.nan),
|
||||
("Okay Things", "D", np.nan),
|
||||
]
|
||||
df = DataFrame(
|
||||
np.ones((8, 4)),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
|
||||
)
|
||||
result = df.loc["Good Things"].loc["C"]
|
||||
expected = DataFrame(
|
||||
np.ones((1, 4)),
|
||||
index=Index(
|
||||
[np.nan],
|
||||
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
|
||||
name="u3",
|
||||
),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_period_string_indexing():
|
||||
# GH 9892
|
||||
a = pd.period_range("2013Q1", "2013Q4", freq="Q")
|
||||
i = (1111, 2222, 3333)
|
||||
idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
|
||||
df = DataFrame(
|
||||
index=idx,
|
||||
columns=(
|
||||
"OMS",
|
||||
"OMK",
|
||||
"RES",
|
||||
"DRIFT_IND",
|
||||
"OEVRIG_IND",
|
||||
"FIN_IND",
|
||||
"VARE_UD",
|
||||
"LOEN_UD",
|
||||
"FIN_UD",
|
||||
),
|
||||
)
|
||||
result = df.loc[("2013Q1", 1111), "OMS"]
|
||||
|
||||
alt = df.loc[(a[0], 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
# Because the resolution of the string matches, it is an exact lookup,
|
||||
# not a slice
|
||||
assert np.isnan(result)
|
||||
|
||||
alt = df.loc[("2013Q1", 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
|
||||
def test_loc_datetime_mask_slicing():
|
||||
# GH 16699
|
||||
dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
|
||||
m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
|
||||
df = DataFrame(
|
||||
data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
|
||||
)
|
||||
result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
|
||||
expected = Series(
|
||||
[3],
|
||||
name="C1",
|
||||
index=MultiIndex.from_tuples(
|
||||
[(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
|
||||
names=["Idx1", "Idx2"],
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_datetime_series_tuple_slicing():
|
||||
# https://github.com/pandas-dev/pandas/issues/35858
|
||||
date = pd.Timestamp("2000")
|
||||
ser = Series(
|
||||
1,
|
||||
index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
|
||||
name="c",
|
||||
)
|
||||
result = ser.loc[:, [date]]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_loc_with_mi_indexer():
|
||||
# https://github.com/pandas-dev/pandas/issues/35351
|
||||
df = DataFrame(
|
||||
data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples(
|
||||
[(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
|
||||
),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
|
||||
result = df.loc[idx, :]
|
||||
expected = DataFrame(
|
||||
[["a", 1], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_mi_with_level1_named_0():
|
||||
# GH#37194
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
|
||||
ser = Series(range(3), index=dti)
|
||||
df = ser.to_frame()
|
||||
df[1] = dti
|
||||
|
||||
df2 = df.set_index(0, append=True)
|
||||
assert df2.index.names == (None, 0)
|
||||
df2.index.get_loc(dti[0]) # smoke test
|
||||
|
||||
result = df2.loc[dti[0]]
|
||||
expected = df2.iloc[[0]].droplevel(None)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2 = df2[1]
|
||||
assert ser2.index.names == (None, 0)
|
||||
|
||||
result = ser2.loc[dti[0]]
|
||||
expected = ser2.iloc[[0]].droplevel(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_str_slice():
|
||||
# GH#15928
|
||||
df = DataFrame(
|
||||
[
|
||||
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
|
||||
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||||
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||||
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
|
||||
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
|
||||
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
|
||||
],
|
||||
columns="time,ticker,bid,ask".split(","),
|
||||
)
|
||||
df2 = df.set_index(["ticker", "time"]).sort_index()
|
||||
|
||||
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
|
||||
expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
|
||||
def test_3levels_leading_period_index():
|
||||
# GH#24091
|
||||
pi = pd.PeriodIndex(
|
||||
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
|
||||
name="datetime",
|
||||
freq="D",
|
||||
)
|
||||
lev2 = ["A", "A", "Z", "W"]
|
||||
lev3 = ["B", "C", "Q", "F"]
|
||||
mi = MultiIndex.from_arrays([pi, lev2, lev3])
|
||||
|
||||
ser = Series(range(4), index=mi, dtype=np.float64)
|
||||
result = ser.loc[(pi[0], "A", "B")]
|
||||
assert result == 0.0
|
||||
|
||||
|
||||
class TestKeyErrorsWithMultiIndex:
|
||||
def test_missing_keys_raises_keyerror(self):
|
||||
# GH#27420 KeyError, not TypeError
|
||||
df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
|
||||
df2 = df.set_index(["A", "B"])
|
||||
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df2.loc[(1, 6)]
|
||||
|
||||
def test_missing_key_raises_keyerror2(self):
|
||||
# GH#21168 KeyError, not "IndexingError: Too many indexers"
|
||||
ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
|
||||
|
||||
with pytest.raises(KeyError, match=r"\(0, 3\)"):
|
||||
ser.loc[0, 3]
|
||||
|
||||
def test_missing_key_combination(self):
|
||||
# GH: 19556
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
np.array(["a", "a", "b", "b"]),
|
||||
np.array(["1", "2", "2", "3"]),
|
||||
np.array(["c", "d", "c", "d"]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi)
|
||||
msg = r"\('b', '1', slice\(None, None, None\)\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[("b", "1", slice(None)), :]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.index.get_locs(("b", "1", slice(None)))
|
||||
with pytest.raises(KeyError, match=r"\('b', '1'\)"):
|
||||
df.loc[("b", "1"), :]
|
||||
|
||||
|
||||
def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
result = ser[2000, 5]
|
||||
expected = df.loc[2000, 5]["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_with_nan():
|
||||
# GH: 27104
|
||||
df = DataFrame(
|
||||
{"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
|
||||
).set_index(["ind1", "ind2"])
|
||||
result = df.loc[["a"]]
|
||||
expected = DataFrame(
|
||||
{"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["a"]
|
||||
expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_non_found_tuple():
|
||||
# GH: 25236
|
||||
df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
|
||||
["a", "b", "c"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
|
||||
df.loc[(2.0, 2.0, 3.0)]
|
||||
|
||||
|
||||
def test_get_loc_datetime_index():
|
||||
# GH#24263
|
||||
index = pd.date_range("2001-01-01", periods=100)
|
||||
mi = MultiIndex.from_arrays([index])
|
||||
# Check if get_loc matches for Index and MultiIndex
|
||||
assert mi.get_loc("2001-01") == slice(0, 31, None)
|
||||
assert index.get_loc("2001-01") == slice(0, 31, None)
|
||||
|
||||
loc = mi[::2].get_loc("2001-01")
|
||||
expected = index[::2].get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.repeat(2).get_loc("2001-01")
|
||||
expected = index.repeat(2).get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.append(mi).get_loc("2001-01")
|
||||
expected = index.append(index).get_loc("2001-01")
|
||||
# TODO: standardize return type for MultiIndex.get_loc
|
||||
tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
|
||||
|
||||
|
||||
def test_loc_setitem_indexer_differently_ordered():
|
||||
# GH#34603
|
||||
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
|
||||
|
||||
indexer = ("a", [1, 0])
|
||||
df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
|
||||
expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_index_differently_ordered_slice_none():
|
||||
# GH#31330
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6], [7, 8]],
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
result = df.loc[(slice(None), [2, 1]), :]
|
||||
expected = DataFrame(
|
||||
[[3, 4], [7, 8], [1, 2], [5, 6]],
|
||||
index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
|
||||
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
|
||||
# GH#40978
|
||||
df = DataFrame(
|
||||
[1] * 8,
|
||||
index=MultiIndex.from_tuples(
|
||||
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
|
||||
),
|
||||
columns=["a"],
|
||||
)
|
||||
result = df.loc[(slice(None), indexer), :]
|
||||
expected = DataFrame(
|
||||
[1] * 8,
|
||||
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
|
||||
columns=["a"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[df.index.isin(indexer, level=1), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_loc_getitem_drops_levels_for_one_row_dataframe():
|
||||
# GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
|
||||
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
|
||||
df = DataFrame({"d": [0]}, index=mi)
|
||||
expected = df.droplevel([0, 2])
|
||||
result = df.loc["x", :, "z"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser = Series([0], index=mi)
|
||||
result = ser.loc["x", :, "z"]
|
||||
expected = Series([0], index=Index(["y"], name="b"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_columns_loc_list_label_order():
|
||||
# GH 10710
|
||||
cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
|
||||
df = DataFrame(np.zeros((5, 6)), columns=cols)
|
||||
result = df.loc[:, ["B", "A"]]
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 4)),
|
||||
columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_partial_indexing_list_raises():
|
||||
# GH 13501
|
||||
frame = DataFrame(
|
||||
np.arange(12).reshape((4, 3)),
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
|
||||
)
|
||||
frame.index.names = ["key1", "key2"]
|
||||
frame.columns.names = ["state", "color"]
|
||||
with pytest.raises(KeyError, match="\\[2\\] not in index"):
|
||||
frame.loc[["b", 2], "Colorado"]
|
||||
|
||||
|
||||
def test_mi_indexing_list_nonexistent_raises():
|
||||
# GH 15452
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
|
||||
with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
|
||||
s.loc[["not", "found"]]
|
||||
|
||||
|
||||
def test_mi_add_cell_missing_row_non_unique():
|
||||
# GH 16018
|
||||
result = DataFrame(
|
||||
[[1, 2, 5, 6], [3, 4, 7, 8]],
|
||||
index=["a", "a"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
result.loc["c"] = -1
|
||||
result.loc["c", (1, "A")] = 3
|
||||
result.loc["d", (1, "A")] = 3
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 5.0, 6.0],
|
||||
[3.0, 4.0, 7.0, 8.0],
|
||||
[3.0, -1.0, -1, -1],
|
||||
[3.0, np.nan, np.nan, np.nan],
|
||||
],
|
||||
index=["a", "a", "c", "d"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_get_scalar_casting_to_float():
|
||||
# GH#41369
|
||||
df = DataFrame(
|
||||
{"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
|
||||
)
|
||||
result = df.loc[(3, 4), "b"]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
result = df.loc[[(3, 4)], "b"].iloc[0]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
|
||||
|
||||
def test_loc_empty_single_selector_with_names():
|
||||
# GH 19517
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
|
||||
s2 = Series(index=idx, dtype=np.float64)
|
||||
result = s2.loc["a"]
|
||||
expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_keyerror_rightmost_key_missing():
|
||||
# GH 20951
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [100, 100, 200, 200, 300, 300],
|
||||
"B": [10, 10, 20, 21, 31, 33],
|
||||
"C": range(6),
|
||||
}
|
||||
)
|
||||
df = df.set_index(["A", "B"])
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
df.loc[(100, 1)]
|
||||
|
||||
|
||||
def test_multindex_series_loc_with_tuple_label():
|
||||
# GH#43908
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
|
||||
ser = Series([1, 2], index=mi)
|
||||
result = ser.loc[(3, (4, 5))]
|
||||
assert result == 2
|
@ -0,0 +1,235 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.index as libindex
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.boolean import BooleanDtype
|
||||
|
||||
|
||||
class TestMultiIndexBasic:
|
||||
def test_multiindex_perf_warn(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.default_rng(2).random(4),
|
||||
}
|
||||
).set_index(["jim", "joe"])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(1, "z")]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0,)]
|
||||
|
||||
@pytest.mark.parametrize("offset", [-5, 5])
|
||||
def test_indexing_over_hashtable_size_cutoff(self, monkeypatch, offset):
|
||||
size_cutoff = 20
|
||||
n = size_cutoff + offset
|
||||
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
# GH 3588
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
result = df.set_index(["a", "b"], drop=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index(["R1", "R2", np.nan, "R4"], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_exclusive_nat_column_indexing(self):
|
||||
# GH 38025
|
||||
# test multi indexing when one column exclusively contains NaT values
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
df = df.set_index(["a", "b"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_nested_tuples_duplicates(self):
|
||||
# GH#30892
|
||||
|
||||
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
|
||||
idx = Index(["a", "a", "c"])
|
||||
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
|
||||
|
||||
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
|
||||
|
||||
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
|
||||
|
||||
df2 = df.copy(deep=True)
|
||||
df2.loc[(dti[0], "a"), "c2"] = 1.0
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df3 = df.copy(deep=True)
|
||||
df3.loc[[(dti[0], "a")], "c2"] = 1.0
|
||||
tm.assert_frame_equal(df3, expected)
|
||||
|
||||
def test_multiindex_with_datatime_level_preserves_freq(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/35563
|
||||
idx = Index(range(2), name="A")
|
||||
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
|
||||
mi = MultiIndex.from_product([idx, dti])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi)
|
||||
result = df.loc[0].index
|
||||
tm.assert_index_equal(result, dti)
|
||||
assert result.freq == dti.freq
|
||||
|
||||
def test_multiindex_complex(self):
|
||||
# GH#42145
|
||||
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
|
||||
non_complex_data = [3, 4, 5]
|
||||
result = DataFrame(
|
||||
{
|
||||
"x": complex_data,
|
||||
"y": non_complex_data,
|
||||
"z": non_complex_data,
|
||||
}
|
||||
)
|
||||
result.set_index(["x", "y"], inplace=True)
|
||||
expected = DataFrame(
|
||||
{"z": non_complex_data},
|
||||
index=MultiIndex.from_arrays(
|
||||
[complex_data, non_complex_data],
|
||||
names=("x", "y"),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rename_multiindex_with_duplicates(self):
|
||||
# GH 38015
|
||||
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
df = DataFrame(index=mi)
|
||||
df = df.rename(index={"A": "Apple"}, level=0)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
expected = DataFrame(index=mi2)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_align_multiindex_with_nan_overlap_only(self):
|
||||
# GH 38439
|
||||
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]])
|
||||
ser1 = Series([1, 2], index=mi1)
|
||||
ser2 = Series([1, 2], index=mi2)
|
||||
result1, result2 = ser1.align(ser2)
|
||||
|
||||
mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]])
|
||||
expected1 = Series([1.0, np.nan, 2.0], index=mi)
|
||||
expected2 = Series([np.nan, 2.0, 1.0], index=mi)
|
||||
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_series_align_multiindex_with_nan(self):
|
||||
# GH 38439
|
||||
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]])
|
||||
ser1 = Series([1, 2], index=mi1)
|
||||
ser2 = Series([1, 2], index=mi2)
|
||||
result1, result2 = ser1.align(ser2)
|
||||
|
||||
mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
expected1 = Series([1, 2], index=mi)
|
||||
expected2 = Series([2, 1], index=mi)
|
||||
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_nunique_smoke(self):
|
||||
# GH 34019
|
||||
n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique()
|
||||
assert n == 1
|
||||
|
||||
def test_multiindex_repeated_keys(self):
|
||||
# GH19414
|
||||
tm.assert_series_equal(
|
||||
Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[
|
||||
["a", "a", "b", "b"]
|
||||
],
|
||||
Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])),
|
||||
)
|
||||
|
||||
def test_multiindex_with_na_missing_key(self):
|
||||
# GH46173
|
||||
df = DataFrame.from_dict(
|
||||
{
|
||||
("foo",): [1, 2, 3],
|
||||
("bar",): [5, 6, 7],
|
||||
(None,): [8, 9, 0],
|
||||
}
|
||||
)
|
||||
with pytest.raises(KeyError, match="missing_key"):
|
||||
df[[("missing_key",)]]
|
||||
|
||||
def test_multiindex_dtype_preservation(self):
|
||||
# GH51261
|
||||
columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"])
|
||||
df = DataFrame(["value"], columns=columns).astype("category")
|
||||
df_no_multiindex = df["A"]
|
||||
assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype)
|
||||
|
||||
# geopandas 1763 analogue
|
||||
df = DataFrame(
|
||||
[[1, 0], [0, 1]],
|
||||
columns=[
|
||||
["foo", "foo"],
|
||||
["location", "location"],
|
||||
["x", "y"],
|
||||
],
|
||||
).assign(bools=Series([True, False], dtype="boolean"))
|
||||
assert isinstance(df["bools"].dtype, BooleanDtype)
|
||||
|
||||
def test_multiindex_from_tuples_with_nan(self):
|
||||
# GH#23578
|
||||
result = MultiIndex.from_tuples([("a", "b", "c"), np.nan, ("d", "", "")])
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "b", "c"), (np.nan, np.nan, np.nan), ("d", "", "")]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,269 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexPartial:
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ["a", "b"]
|
||||
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2), columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(
|
||||
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
|
||||
)
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs("foo")
|
||||
result2 = frame.loc["foo"]
|
||||
expected = frame.T["foo"].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
|
||||
codes=[
|
||||
[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1],
|
||||
[0, 1, 0, 1, 0, 1, 0, 1],
|
||||
],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=index,
|
||||
columns=list("abcd"),
|
||||
)
|
||||
|
||||
result = df.xs(("foo", "one"))
|
||||
expected = df.loc["foo", "one"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc["bar":"baz"]
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(
|
||||
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
|
||||
)
|
||||
df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx)
|
||||
|
||||
result = df.loc[("a", "y"), :]
|
||||
expected = df.loc[("a", "y")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[("a", "y"), [1, 0]]
|
||||
expected = df.loc[("a", "y")][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
|
||||
df.loc[("a", "foo"), :]
|
||||
|
||||
# TODO(ArrayManager) rewrite test to not use .values
|
||||
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_partial_set(
|
||||
self,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.iloc[65:85] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].loc[2000, 4] = 1
|
||||
df.loc[(2000, 4), "A"] = 1
|
||||
else:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].loc[2000, 4] = 1
|
||||
exp.iloc[65:85, 0] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.iloc[:100] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].iloc[14] = 5
|
||||
if using_copy_on_write:
|
||||
assert df["A"].iloc[14] == exp["A"].iloc[14]
|
||||
else:
|
||||
assert df["A"].iloc[14] == 5
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, float])
|
||||
def test_getitem_intkey_leading_level(
|
||||
self, multiindex_year_month_day_dataframe_random_data, dtype
|
||||
):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
levels = ymd.index.levels
|
||||
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
|
||||
ser = ymd["A"]
|
||||
mi = ser.index
|
||||
assert isinstance(mi, MultiIndex)
|
||||
if dtype is int:
|
||||
assert mi.levels[0].dtype == np.dtype(int)
|
||||
else:
|
||||
assert mi.levels[0].dtype == np.float64
|
||||
|
||||
assert 14 not in mi.levels[0]
|
||||
assert not mi.levels[0]._should_fallback_to_positional
|
||||
assert not mi._should_fallback_to_positional
|
||||
|
||||
with pytest.raises(KeyError, match="14"):
|
||||
ser[14]
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_idx, exp_values",
|
||||
[
|
||||
(
|
||||
slice("2019-2", None),
|
||||
DatetimeIndex(["2019-02-01"], dtype="M8[ns]"),
|
||||
[2, 3],
|
||||
),
|
||||
(
|
||||
slice(None, "2019-2"),
|
||||
date_range("2019", periods=2, freq="MS"),
|
||||
[0, 1, 2, 3],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
|
||||
# GH: 25165
|
||||
date_idx = date_range("2019", periods=2, freq="MS")
|
||||
df = DataFrame(
|
||||
list(range(4)),
|
||||
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
expected = DataFrame(
|
||||
exp_values,
|
||||
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
result = df[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis=0)[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = df.swaplevel(0, 1).sort_index()
|
||||
expected = expected.swaplevel(0, 1).sort_index()
|
||||
|
||||
result = df2.loc[:, indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_partial_both_axis():
|
||||
# gh-12660
|
||||
iterables = [["a", "b"], [2, 1]]
|
||||
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
|
||||
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns
|
||||
)
|
||||
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
|
||||
result = df.loc["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,589 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SettingWithCopyError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
|
||||
class TestMultiIndexSetItem:
|
||||
def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
|
||||
target.loc[indexers] = value
|
||||
result = target.loc[indexers]
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
|
||||
def test_setitem_multiindex(self):
|
||||
# GH#7190
|
||||
cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
|
||||
)
|
||||
t, n = 0, 2
|
||||
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
columns=cols,
|
||||
index=index,
|
||||
)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=0)
|
||||
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=1)
|
||||
|
||||
df = DataFrame(columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=2)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=((t, n), "X"),
|
||||
value=np.array(3),
|
||||
expected=3,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex2(self):
|
||||
# GH#5206
|
||||
df = DataFrame(
|
||||
np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
|
||||
)
|
||||
df["F"] = 99
|
||||
row_selection = df["A"] % 2 == 0
|
||||
col_selection = ["B", "C"]
|
||||
df.loc[row_selection, col_selection] = df["F"]
|
||||
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
|
||||
tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df["F"],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex3(self):
|
||||
# GH#11372
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
|
||||
)
|
||||
cols = MultiIndex.from_product(
|
||||
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((12, 4)), index=idx, columns=cols
|
||||
)
|
||||
|
||||
subidx = MultiIndex.from_arrays(
|
||||
[["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")]
|
||||
)
|
||||
subcols = MultiIndex.from_arrays(
|
||||
[["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
vals = DataFrame(
|
||||
np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols
|
||||
)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# set all columns
|
||||
vals = DataFrame(
|
||||
np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols
|
||||
)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# identity
|
||||
copy = df.copy()
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(df.index, df.columns),
|
||||
value=df,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=copy,
|
||||
)
|
||||
|
||||
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
|
||||
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_multiindex_setitem(self):
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [
|
||||
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
|
||||
np.array(["one", "two", "one", "one", "two", "one"]),
|
||||
np.arange(0, 6, 1),
|
||||
]
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((6, 3)),
|
||||
index=arrays,
|
||||
columns=["A", "B", "C"],
|
||||
).sort_index()
|
||||
|
||||
expected = df_orig.loc[["bar"]] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[["bar"]] *= 2
|
||||
tm.assert_frame_equal(df.loc[["bar"]], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
msg = "cannot align on a multi-index with out specifying the join levels"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["bar"] *= 2
|
||||
|
||||
def test_multiindex_setitem2(self):
|
||||
# from SO
|
||||
# https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict(
|
||||
{
|
||||
"price": {
|
||||
("DE", "Coal", "Stock"): 2,
|
||||
("DE", "Gas", "Stock"): 4,
|
||||
("DE", "Elec", "Demand"): 1,
|
||||
("FR", "Gas", "Stock"): 5,
|
||||
("FR", "Solar", "SupIm"): 0,
|
||||
("FR", "Wind", "SupIm"): 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
df_orig.index = MultiIndex.from_tuples(
|
||||
df_orig.index, names=["Sit", "Com", "Type"]
|
||||
)
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 1, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], "price"] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
df["d"] = np.nan
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df.loc[4, "d"] = arr
|
||||
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
|
||||
|
||||
def test_multiindex_assignment_single_dtype(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH3777 part 2b
|
||||
# single dtype
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
dtype=np.int64,
|
||||
)
|
||||
view = df["c"].iloc[:2].values
|
||||
|
||||
# arr can be losslessly cast to int, so this setitem is inplace
|
||||
# INFO(CoW-warn) this does not warn because we directly took .values
|
||||
# above, so no reference to a pandas object is alive for `view`
|
||||
df.loc[4, "c"] = arr
|
||||
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
|
||||
result = df.loc[4, "c"]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# extra check for inplace-ness
|
||||
if not using_copy_on_write:
|
||||
tm.assert_numpy_array_equal(view, exp.values)
|
||||
|
||||
# arr + 0.5 cannot be cast losslessly to int, so we upcast
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="item of incompatible dtype"
|
||||
):
|
||||
df.loc[4, "c"] = arr + 0.5
|
||||
result = df.loc[4, "c"]
|
||||
exp = exp + 0.5
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# scalar ok
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.loc[4, "c"] = 10
|
||||
exp = Series(10, index=[8, 10], name="c", dtype="float64")
|
||||
tm.assert_series_equal(df.loc[4, "c"], exp)
|
||||
|
||||
# invalid assignments
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0]
|
||||
|
||||
# But with a length-1 listlike column indexer this behaves like
|
||||
# `df.loc[4, "c"] = 0
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.loc[4, ["c"]] = [0]
|
||||
assert (df.loc[4, "c"] == 0).all()
|
||||
|
||||
def test_groupby_example(self):
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64,
|
||||
columns=col_names,
|
||||
)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df["new_col"] = np.nan
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
df.loc[name, "new_col"] = new_vals
|
||||
|
||||
def test_series_setitem(
|
||||
self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write
|
||||
):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s.iloc[49])
|
||||
|
||||
with pytest.raises(KeyError, match="49"):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
s[49]
|
||||
|
||||
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values.copy()
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match="boolean values only"):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [["t1", "t2"], ["a", "b", "c"]]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
|
||||
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, "value"]
|
||||
tm.assert_series_equal(df["value"], result)
|
||||
|
||||
result = df.loc[df.index[1:3], "value"]
|
||||
tm.assert_series_equal(df["value"][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, "value"] = 10
|
||||
result["value"] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=[["a", "a", "b", "b"], [0, 1, 0, 1]],
|
||||
)
|
||||
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"]
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"].values
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
def test_frame_setitem_multi_column2(self):
|
||||
# ---------------------------------------
|
||||
# GH#1803
|
||||
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df["A"] = 0.0 # Doesn't work
|
||||
assert (df["A"].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df["B", "1"] = [1, 2, 3]
|
||||
df["A"] = df["B", "1"]
|
||||
|
||||
sliced_a1 = df["A", "1"]
|
||||
sliced_a2 = df["A", "2"]
|
||||
sliced_b1 = df["B", "1"]
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ("A", "1")
|
||||
assert sliced_a2.name == ("A", "2")
|
||||
assert sliced_b1.name == ("B", "1")
|
||||
|
||||
def test_loc_getitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
|
||||
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
|
||||
)
|
||||
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), 4)),
|
||||
index=index,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
res = obj.loc[1:2]
|
||||
exp = obj.reindex(obj.index[2:])
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
obj.loc[1:2] = 7
|
||||
assert (obj.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft["foo", "two"]
|
||||
dft["foo", "two"] = s > s.median()
|
||||
tm.assert_series_equal(dft["foo", "two"], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[("foo", "two")])
|
||||
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_loc(
|
||||
self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
frame_original = frame.copy()
|
||||
col = frame["B"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
col[subset] = 97
|
||||
if using_copy_on_write:
|
||||
# chained setitem doesn't work with CoW
|
||||
tm.assert_frame_equal(frame, frame_original)
|
||||
else:
|
||||
assert (frame.loc[subset, "B"] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame(
|
||||
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
|
||||
)
|
||||
|
||||
df = df.set_index(["A", "B"])
|
||||
mi = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[mi, "C"] = "_"
|
||||
|
||||
assert (df.xs((1, 1))["C"] == "_").all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df["A"] = df["A"].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
def test_setitem_nonmonotonic(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/31449
|
||||
index = MultiIndex.from_tuples(
|
||||
[("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
|
||||
)
|
||||
df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
|
||||
df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
|
||||
expected = DataFrame({"e": [99, 1, 100]}, index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestSetitemWithExpansionMultiIndex:
|
||||
def test_setitem_new_column_mixed_depth(self):
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result["b"] = [1, 2, 3, 4]
|
||||
expected["b", "", ""] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_new_column_all_na(self):
|
||||
# GH#1534
|
||||
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df["new"] = s
|
||||
assert df["new"].isna().all()
|
||||
|
||||
def test_setitem_enlargement_keep_index_names(self):
|
||||
# GH#53053
|
||||
mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"])
|
||||
df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"])
|
||||
df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)]
|
||||
mi_expected = MultiIndex.from_tuples(
|
||||
[(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data=[[10, 20, 30], [10, 20, 30]],
|
||||
index=mi_expected,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
|
||||
# is not a view
|
||||
def test_frame_setitem_view_direct(
|
||||
multiindex_dataframe_random_data, using_copy_on_write
|
||||
):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
if using_copy_on_write:
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values != 0).all()
|
||||
else:
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(
|
||||
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(
|
||||
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_partial_multiindex():
|
||||
# GH 54875
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [3, 4, 5],
|
||||
"c": 6,
|
||||
"d": 7,
|
||||
}
|
||||
).set_index(["a", "b", "c"])
|
||||
ser = Series(8, index=df.index.droplevel("c"))
|
||||
result = df.copy()
|
||||
result["d"] = ser
|
||||
expected = df.copy()
|
||||
expected["d"] = 8
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,796 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
|
||||
|
||||
class TestMultiIndexSlicers:
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product(
|
||||
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
|
||||
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3")
|
||||
]
|
||||
]
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ["foo"])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["A", "a"]
|
||||
expected = DataFrame(
|
||||
{"bar": [1, 5, 9], "foo": [0, 4, 8]},
|
||||
index=Index([1, 2, 3], name="two"),
|
||||
columns=Index(["bar", "foo"], name="lvl1"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.to_numpy())), index=ix)
|
||||
result = s.loc["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = s.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in s.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"cannot index with a boolean indexer "
|
||||
"that is not the same length as the index"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[1\] not in index"):
|
||||
# slice(None) is on the index, [1] is on the columns, but 1 is
|
||||
# not in the columns, so we raise
|
||||
# This used to treat [1] as positional GH#16396
|
||||
df.loc[slice(None), [1]]
|
||||
|
||||
# not lexsorted
|
||||
assert df.index._lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index._lexsort_depth == 0
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be "
|
||||
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice("bar")), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 3],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 2],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
200000,
|
||||
200000,
|
||||
]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([["a"] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
|
||||
index=index,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
slice(1, 1),
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
idx[
|
||||
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
|
||||
],
|
||||
idx[1:1],
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
1,
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[
|
||||
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
|
||||
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
|
||||
"DATE": [
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
"2013-10-01",
|
||||
"2013-07-09",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
],
|
||||
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
|
||||
}
|
||||
)
|
||||
|
||||
df["DATE"] = pd.to_datetime(df["DATE"])
|
||||
df1 = df.set_index(["A", "B", "DATE"])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice("A1")), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice("A2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice("B1", "B2")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice("A2"), slice("B0")), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice("B2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 1"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc["A1", ("a", slice("foo"))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(
|
||||
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
|
||||
)
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc["A1", (slice(None), "foo")]
|
||||
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = (
|
||||
DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
.sort_index()
|
||||
.sort_index(axis=1)
|
||||
)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="columns")[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
for i in [-1, 2, "foo"]:
|
||||
msg = f"No axis named {i} for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc(axis=i)[:, :, ["C1", "C3"]]
|
||||
|
||||
def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1":"a2"]
|
||||
expected = df.iloc[:, :-3]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1"]
|
||||
expected = df.iloc[:, :3]
|
||||
expected.columns = ["b1", "b2", "b3"]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_ax_single_level_indexer_simple_df(self):
|
||||
# GH29519
|
||||
# test single level indexing on single index column data frame
|
||||
df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
|
||||
result = df.loc(axis=1)["a"]
|
||||
expected = Series(np.array([0, 3, 6]), name="a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["A", "a"] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype="int64"
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "setting an array element with a sequence."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100], [100, 100]], dtype="int64"
|
||||
)
|
||||
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[100, 100, 100, 100], dtype="int64"
|
||||
)
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
|
||||
(slice(None), 1), (slice(None), ["foo"])
|
||||
]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
|
||||
rhs.loc[:, ("c", "bah")] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
ser = Series(
|
||||
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
|
||||
)
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
|
||||
tm.assert_indexing_slices_equivalent(
|
||||
ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
|
||||
)
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ["a", "b", "c", "d"]
|
||||
idx = MultiIndex.from_product([freq, range(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc["a"]
|
||||
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc["d"]
|
||||
expected = DataFrame(
|
||||
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = ymd["A"].copy()
|
||||
exp = ymd["A"].copy()
|
||||
s[5:] = 0
|
||||
exp.iloc[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, loc, iloc",
|
||||
[
|
||||
# dtype = int, step = -1
|
||||
("int", slice(None, None, -1), slice(None, None, -1)),
|
||||
("int", slice(3, None, -1), slice(3, None, -1)),
|
||||
("int", slice(None, 1, -1), slice(None, 0, -1)),
|
||||
("int", slice(3, 1, -1), slice(3, 0, -1)),
|
||||
# dtype = int, step = -2
|
||||
("int", slice(None, None, -2), slice(None, None, -2)),
|
||||
("int", slice(3, None, -2), slice(3, None, -2)),
|
||||
("int", slice(None, 1, -2), slice(None, 0, -2)),
|
||||
("int", slice(3, 1, -2), slice(3, 0, -2)),
|
||||
# dtype = str, step = -1
|
||||
("str", slice(None, None, -1), slice(None, None, -1)),
|
||||
("str", slice("d", None, -1), slice(3, None, -1)),
|
||||
("str", slice(None, "b", -1), slice(None, 0, -1)),
|
||||
("str", slice("d", "b", -1), slice(3, 0, -1)),
|
||||
# dtype = str, step = -2
|
||||
("str", slice(None, None, -2), slice(None, None, -2)),
|
||||
("str", slice("d", None, -2), slice(3, None, -2)),
|
||||
("str", slice(None, "b", -2), slice(None, 0, -2)),
|
||||
("str", slice("d", "b", -2), slice(3, 0, -2)),
|
||||
],
|
||||
)
|
||||
def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
|
||||
# GH#38071
|
||||
labels = {
|
||||
"str": list("abcde"),
|
||||
"int": range(5),
|
||||
}[dtype]
|
||||
|
||||
mi = MultiIndex.from_arrays([labels] * 2)
|
||||
df = DataFrame(1.0, index=mi, columns=["A"])
|
||||
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
expected = df.iloc[iloc, :]
|
||||
result_get_loc = df.loc[SLC[loc], :]
|
||||
result_get_locs_level_0 = df.loc[SLC[loc, :], :]
|
||||
result_get_locs_level_1 = df.loc[SLC[:, loc], :]
|
||||
|
||||
tm.assert_frame_equal(result_get_loc, expected)
|
||||
tm.assert_frame_equal(result_get_locs_level_0, expected)
|
||||
tm.assert_frame_equal(result_get_locs_level_1, expected)
|
@ -0,0 +1,153 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted:
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame(
|
||||
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
|
||||
)
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
|
||||
xp = Series(["x"], index=xp_idx, name="data")
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, : np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("key", [None, lambda x: x])
|
||||
def test_frame_getitem_not_sorted2(self, key):
|
||||
# 13431
|
||||
df = DataFrame(
|
||||
{
|
||||
"col1": ["b", "d", "b", "a"],
|
||||
"col2": [3, 1, 1, 2],
|
||||
"data": ["one", "two", "three", "four"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = df.set_index(["col1", "col2"])
|
||||
df2_original = df2.copy()
|
||||
|
||||
df2.index = df2.index.set_levels(["b", "d", "a"], level="col1")
|
||||
df2.index = df2.index.set_codes([0, 1, 0, 2], level="col1")
|
||||
assert not df2.index.is_monotonic_increasing
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index(key=key)
|
||||
assert expected.index.is_monotonic_increasing
|
||||
|
||||
result = df2.sort_index(level=0, key=key)
|
||||
assert result.index.is_monotonic_increasing
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_sort_values_key(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
index = index.sort_values( # sort by third letter
|
||||
key=lambda x: x.map(lambda entry: entry[2])
|
||||
)
|
||||
result = DataFrame(range(8), index=index)
|
||||
|
||||
arrays = [
|
||||
["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
expected = DataFrame(range(8), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_argsort_with_na(self):
|
||||
# GH48495
|
||||
arrays = [
|
||||
array([2, NA, 1], dtype="Int64"),
|
||||
array([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.argsort()
|
||||
expected = np.array([2, 0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_sort_values_with_na(self):
|
||||
# GH48495
|
||||
arrays = [
|
||||
array([2, NA, 1], dtype="Int64"),
|
||||
array([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
index = index.sort_values()
|
||||
result = DataFrame(range(3), index=index)
|
||||
|
||||
arrays = [
|
||||
array([1, 2, NA], dtype="Int64"),
|
||||
array([3, 1, 2], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
expected = DataFrame(range(3), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df["foo", "four"] = "foo"
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df["foo"]
|
||||
result2 = df.loc[:, "foo"]
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs("foo")
|
||||
result2 = df.loc["foo"]
|
||||
expected = df.reindex(df.index[arrays[0] == "foo"])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(np.random.default_rng(2).standard_normal(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s["qux"]
|
||||
result2 = s.loc["qux"]
|
||||
expected = s[arrays[0] == "qux"]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
Reference in New Issue
Block a user