forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,27 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
# Note: identical the "multi" entry in the top-level "index" fixture
|
||||
@pytest.fixture
|
||||
def idx():
|
||||
# a MultiIndex used to test the general functionality of the
|
||||
# general functionality of this object
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
@ -0,0 +1,263 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_infer_objects(idx):
|
||||
with pytest.raises(NotImplementedError, match="to_frame"):
|
||||
idx.infer_objects()
|
||||
|
||||
|
||||
def test_shift(idx):
|
||||
# GH8083 test the base class for shift
|
||||
msg = (
|
||||
"This method is only implemented for DatetimeIndex, PeriodIndex and "
|
||||
"TimedeltaIndex; Got type MultiIndex"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
|
||||
def test_groupby(idx):
|
||||
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||||
labels = idx.tolist()
|
||||
exp = {1: labels[:3], 2: labels[3:]}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
# GH5620
|
||||
groups = idx.groupby(idx)
|
||||
exp = {key: [key] for key in idx}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
|
||||
def test_truncate_multiindex():
|
||||
# GH 34564 for MultiIndex level names check
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["L1", "L2"],
|
||||
)
|
||||
|
||||
result = index.truncate(before=1)
|
||||
assert "foo" not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(after=1)
|
||||
assert 2 not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(before=1, after=2)
|
||||
assert len(result.levels[0]) == 2
|
||||
assert index.names == result.names
|
||||
|
||||
msg = "after < before"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.truncate(3, 1)
|
||||
|
||||
|
||||
# TODO: reshape
|
||||
|
||||
|
||||
def test_reorder_levels(idx):
|
||||
# this blows up
|
||||
with pytest.raises(IndexError, match="^Too many levels"):
|
||||
idx.reorder_levels([2, 1, 0])
|
||||
|
||||
|
||||
def test_numpy_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(m, reps, axis=1)
|
||||
|
||||
|
||||
def test_append_mixed_dtypes():
|
||||
# GH 13660
|
||||
dti = date_range("2011-01-01", freq="ME", periods=3)
|
||||
dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
|
||||
pi = period_range("2011-01", freq="M", periods=3)
|
||||
|
||||
mi = MultiIndex.from_arrays(
|
||||
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
|
||||
)
|
||||
assert mi.nlevels == 6
|
||||
|
||||
res = mi.append(mi)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, 1, 2, 3],
|
||||
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||||
["a", "b", "c", "a", "b", "c"],
|
||||
dti.append(dti),
|
||||
dti_tz.append(dti_tz),
|
||||
pi.append(pi),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
other = MultiIndex.from_arrays(
|
||||
[
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
]
|
||||
)
|
||||
|
||||
res = mi.append(other)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, "x", "y", "z"],
|
||||
[1.1, np.nan, 3.3, "x", "y", "z"],
|
||||
["a", "b", "c", "x", "y", "z"],
|
||||
dti.append(Index(["x", "y", "z"])),
|
||||
dti_tz.append(Index(["x", "y", "z"])),
|
||||
pi.append(Index(["x", "y", "z"])),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
def test_iter(idx):
|
||||
result = list(idx)
|
||||
expected = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sub(idx):
|
||||
first = idx
|
||||
|
||||
# - now raises (previously was set op difference)
|
||||
msg = "cannot perform __sub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first - idx[-3:]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first.tolist()
|
||||
msg = "cannot perform __rsub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.tolist() - idx[-3:]
|
||||
|
||||
|
||||
def test_map(idx):
|
||||
# callable
|
||||
index = idx
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
||||
lambda values, idx: pd.Series(values, idx),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(idx, mapper):
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
# we don't infer to uint64 dtype for a dict
|
||||
if idx.dtype == np.uint64 and isinstance(identity, dict):
|
||||
expected = idx.astype("int64")
|
||||
else:
|
||||
expected = idx
|
||||
|
||||
result = idx.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = Index([np.nan] * len(idx))
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
np.exp,
|
||||
np.exp2,
|
||||
np.expm1,
|
||||
np.log,
|
||||
np.log2,
|
||||
np.log10,
|
||||
np.log1p,
|
||||
np.sqrt,
|
||||
np.sin,
|
||||
np.cos,
|
||||
np.tan,
|
||||
np.arcsin,
|
||||
np.arccos,
|
||||
np.arctan,
|
||||
np.sinh,
|
||||
np.cosh,
|
||||
np.tanh,
|
||||
np.arcsinh,
|
||||
np.arccosh,
|
||||
np.arctanh,
|
||||
np.deg2rad,
|
||||
np.rad2deg,
|
||||
],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_ufuncs(idx, func):
|
||||
# test ufuncs of numpy. see:
|
||||
# https://numpy.org/doc/stable/reference/ufuncs.html
|
||||
|
||||
expected_exception = TypeError
|
||||
msg = (
|
||||
"loop of ufunc does not support argument 0 of type tuple which "
|
||||
f"has no callable {func.__name__} method"
|
||||
)
|
||||
with pytest.raises(expected_exception, match=msg):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[np.isfinite, np.isinf, np.isnan, np.signbit],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_type_funcs(idx, func):
|
||||
msg = (
|
||||
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
|
||||
"could not be safely coerced to any supported types according to "
|
||||
"the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype(idx):
|
||||
expected = idx.copy()
|
||||
actual = idx.astype("O")
|
||||
tm.assert_copy(actual.levels, expected.levels)
|
||||
tm.assert_copy(actual.codes, expected.codes)
|
||||
assert actual.names == list(expected.names)
|
||||
|
||||
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
||||
idx.astype(np.dtype(int))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(idx, ordered):
|
||||
# GH 18630
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype(CategoricalDtype(ordered=ordered))
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype("category")
|
@ -0,0 +1,122 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_numeric_compat(idx):
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = "cannot perform __truediv__"
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
|
||||
def test_logical_compat(idx, method):
|
||||
msg = f"cannot perform {method}"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)()
|
||||
|
||||
|
||||
def test_inplace_mutation_resets_values():
|
||||
levels = [["a", "b", "c"], [4]]
|
||||
levels2 = [[1, 2, 3], ["a"]]
|
||||
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||||
|
||||
mi1 = MultiIndex(levels=levels, codes=codes)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes)
|
||||
|
||||
# instantiating MultiIndex should not access/cache _.values
|
||||
assert "_values" not in mi1._cache
|
||||
assert "_values" not in mi2._cache
|
||||
|
||||
vals = mi1.values.copy()
|
||||
vals2 = mi2.values.copy()
|
||||
|
||||
# accessing .values should cache ._values
|
||||
assert mi1._values is mi1._cache["_values"]
|
||||
assert mi1.values is mi1._cache["_values"]
|
||||
assert isinstance(mi1._cache["_values"], np.ndarray)
|
||||
|
||||
# Make sure level setting works
|
||||
new_vals = mi1.set_levels(levels2).values
|
||||
tm.assert_almost_equal(vals2, new_vals)
|
||||
|
||||
# Doesn't drop _values from _cache [implementation detail]
|
||||
tm.assert_almost_equal(mi1._cache["_values"], vals)
|
||||
|
||||
# ...and values is still same too
|
||||
tm.assert_almost_equal(mi1.values, vals)
|
||||
|
||||
# Make sure label setting works too
|
||||
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||||
exp_values = np.empty((6,), dtype=object)
|
||||
exp_values[:] = [(1, "a")] * 6
|
||||
|
||||
# Must be 1d array of tuples
|
||||
assert exp_values.shape == (6,)
|
||||
|
||||
new_mi = mi2.set_codes(codes2)
|
||||
assert "_values" not in new_mi._cache
|
||||
new_values = new_mi.values
|
||||
assert "_values" in new_mi._cache
|
||||
|
||||
# Shouldn't change cache
|
||||
tm.assert_almost_equal(mi2._cache["_values"], vals2)
|
||||
|
||||
# Should have correct values
|
||||
tm.assert_almost_equal(exp_values, new_values)
|
||||
|
||||
|
||||
def test_boxable_categorical_values():
|
||||
cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h"))
|
||||
result = MultiIndex.from_product([["a", "b", "c"], cat]).values
|
||||
expected = pd.Series(
|
||||
[
|
||||
("a", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("a", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("a", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
]
|
||||
).values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": ["a", "b", "c"],
|
||||
"b": [
|
||||
pd.Timestamp("2012-01-01 00:00:00"),
|
||||
pd.Timestamp("2012-01-01 01:00:00"),
|
||||
pd.Timestamp("2012-01-01 02:00:00"),
|
||||
],
|
||||
"c": [
|
||||
pd.Timestamp("2012-01-01 00:00:00"),
|
||||
pd.Timestamp("2012-01-01 01:00:00"),
|
||||
pd.Timestamp("2012-01-01 02:00:00"),
|
||||
],
|
||||
}
|
||||
).values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,860 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_constructor_single_level():
|
||||
result = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["first"]
|
||||
|
||||
|
||||
def test_constructor_no_levels():
|
||||
msg = "non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=[], codes=[])
|
||||
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=[])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(codes=[])
|
||||
|
||||
|
||||
def test_constructor_nonhashable_names():
|
||||
# GH 20527
|
||||
levels = [[1, 2], ["one", "two"]]
|
||||
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||||
names = (["foo"], ["bar"])
|
||||
msg = r"MultiIndex\.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes, names=names)
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=("foo", "bar"),
|
||||
)
|
||||
renamed = [["fooo"], ["barr"]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.rename(names=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.set_names(names=renamed)
|
||||
|
||||
|
||||
def test_constructor_mismatched_codes_levels(idx):
|
||||
codes = [np.array([1]), np.array([2]), np.array([3])]
|
||||
levels = ["a"]
|
||||
|
||||
msg = "Length of levels and codes must be the same"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
length_error = (
|
||||
r"On level 0, code max \(3\) >= length of level \(1\)\. "
|
||||
"NOTE: this index is in an inconsistent state"
|
||||
)
|
||||
label_error = r"Unequal code lengths: \[4, 2\]"
|
||||
code_value_error = r"On level 0, code value \(-2\) < -1"
|
||||
|
||||
# important to check that it's looking at the right thing.
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# external API
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
idx.copy().set_levels([["a"], ["b"]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# test set_codes with verify_integrity=False
|
||||
# the setting should not raise any value error
|
||||
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
|
||||
|
||||
# code value smaller than -1
|
||||
with pytest.raises(ValueError, match=code_value_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
|
||||
|
||||
|
||||
def test_na_levels():
|
||||
# GH26408
|
||||
# test if codes are re-assigned value -1 for levels
|
||||
# with missing values (NaN, NaT, None)
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# verify set_levels and set_codes
|
||||
result = MultiIndex(
|
||||
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
|
||||
).set_codes([[0, -1, 1, 2, 3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_copy_in_constructor():
|
||||
levels = np.array(["a", "b", "c"])
|
||||
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
||||
val = codes[0]
|
||||
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
|
||||
assert mi.codes[0][0] == val
|
||||
codes[0] = 15
|
||||
assert mi.codes[0][0] == val
|
||||
val = levels[0]
|
||||
levels[0] = "PANDA"
|
||||
assert mi.levels[0][0] == val
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_arrays
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_arrays(idx):
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# list of arrays as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# infer correctly
|
||||
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
|
||||
assert result.levels[0].equals(Index([Timestamp("20130101")]))
|
||||
assert result.levels[1].equals(Index(["a", "b"]))
|
||||
|
||||
|
||||
def test_from_arrays_iterator(idx):
|
||||
# GH 18434
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# invalid iterator input
|
||||
msg = "Input must be a list / sequence of array-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(0)
|
||||
|
||||
|
||||
def test_from_arrays_tuples(idx):
|
||||
arrays = tuple(
|
||||
tuple(np.asarray(lev).take(level_codes))
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
)
|
||||
|
||||
# tuple of tuples as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("idx1", "idx2"),
|
||||
[
|
||||
(
|
||||
pd.period_range("2011-01-01", freq="D", periods=3),
|
||||
pd.period_range("2015-01-01", freq="h", periods=3),
|
||||
),
|
||||
(
|
||||
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
|
||||
date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range("1 days", freq="D", periods=3),
|
||||
pd.timedelta_range("2 hours", freq="h", periods=3),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_datetimelike_mixed():
|
||||
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
|
||||
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
|
||||
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||||
|
||||
result2 = MultiIndex.from_arrays(
|
||||
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
|
||||
)
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_categorical():
|
||||
# GH13743
|
||||
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
|
||||
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
|
||||
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||||
|
||||
|
||||
def test_from_arrays_empty():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=[])
|
||||
|
||||
# 1 level
|
||||
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
# N levels
|
||||
for N in [2, 3]:
|
||||
arrays = [[]] * N
|
||||
names = list("ABC")[:N]
|
||||
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||||
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_sequence_of_arrays",
|
||||
[
|
||||
1,
|
||||
[1],
|
||||
[1, 2],
|
||||
[[1], 2],
|
||||
[1, [2]],
|
||||
"a",
|
||||
["a"],
|
||||
["a", "b"],
|
||||
[["a"], "b"],
|
||||
(1,),
|
||||
(1, 2),
|
||||
([1], 2),
|
||||
(1, [2]),
|
||||
"a",
|
||||
("a",),
|
||||
("a", "b"),
|
||||
(["a"], "b"),
|
||||
[(1,), 2],
|
||||
[1, (2,)],
|
||||
[("a",), "b"],
|
||||
((1,), 2),
|
||||
(1, (2,)),
|
||||
(("a",), "b"),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
|
||||
msg = "Input must be a list / sequence of array-likes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
|
||||
)
|
||||
def test_from_arrays_different_lengths(idx1, idx2):
|
||||
# see gh-13599
|
||||
msg = "^all arrays must be same length$"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
|
||||
def test_from_arrays_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b", "c"], name="bar")
|
||||
|
||||
result = MultiIndex.from_arrays([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_tuples
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_tuples():
|
||||
msg = "Cannot infer number of levels from empty list"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples([])
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
# input tuples
|
||||
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_iterator():
|
||||
# GH 18434
|
||||
# input iterator for tuples
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# input non-iterables
|
||||
msg = "Input must be a list / sequence of tuple-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples(0)
|
||||
|
||||
|
||||
def test_from_tuples_empty():
|
||||
# GH 16777
|
||||
result = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_index_values(idx):
|
||||
result = MultiIndex.from_tuples(idx)
|
||||
assert (result.values == idx.values).all()
|
||||
|
||||
|
||||
def test_tuples_with_name_string():
|
||||
# GH 15110 and GH 14848
|
||||
|
||||
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||||
msg = "Names should be list-like for a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="abc")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="a")
|
||||
|
||||
|
||||
def test_from_tuples_with_tuple_label():
|
||||
# GH 15457
|
||||
expected = pd.DataFrame(
|
||||
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
|
||||
).set_index(["a", "b"])
|
||||
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
|
||||
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_product
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_product_empty_zero_levels():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_product([])
|
||||
|
||||
|
||||
def test_from_product_empty_one_level():
|
||||
result = MultiIndex.from_product([[]], names=["A"])
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
|
||||
)
|
||||
def test_from_product_empty_two_levels(first, second):
|
||||
names = ["A", "B"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("N", list(range(4)))
|
||||
def test_from_product_empty_three_levels(N):
|
||||
# GH12258
|
||||
names = ["A", "B", "C"]
|
||||
lvl2 = list(range(N))
|
||||
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||||
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
|
||||
)
|
||||
def test_from_product_invalid_input(invalid_input):
|
||||
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(iterables=invalid_input)
|
||||
|
||||
|
||||
def test_from_product_datetimeindex():
|
||||
dt_index = date_range("2000-01-01", periods=2)
|
||||
mi = MultiIndex.from_product([[1, 2], dt_index])
|
||||
etalon = construct_1d_object_array_from_listlike(
|
||||
[
|
||||
(1, Timestamp("2000-01-01")),
|
||||
(1, Timestamp("2000-01-02")),
|
||||
(2, Timestamp("2000-01-01")),
|
||||
(2, Timestamp("2000-01-02")),
|
||||
]
|
||||
)
|
||||
tm.assert_numpy_array_equal(mi.values, etalon)
|
||||
|
||||
|
||||
def test_from_product_rangeindex():
|
||||
# RangeIndex is preserved by factorize, so preserved in levels
|
||||
rng = Index(range(5))
|
||||
other = ["a", "b"]
|
||||
mi = MultiIndex.from_product([rng, other])
|
||||
tm.assert_index_equal(mi._levels[0], rng, exact=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [False, True])
|
||||
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
|
||||
def test_from_product_index_series_categorical(ordered, f):
|
||||
# GH13743
|
||||
first = ["foo", "bar"]
|
||||
|
||||
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
|
||||
expected = pd.CategoricalIndex(
|
||||
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
|
||||
)
|
||||
|
||||
result = MultiIndex.from_product([first, f(idx)])
|
||||
tm.assert_index_equal(result.get_level_values(1), expected)
|
||||
|
||||
|
||||
def test_from_product():
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_iterator():
|
||||
# GH 18434
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Invalid non-iterable input
|
||||
msg = "Input must be a list / sequence of iterables."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected_names",
|
||||
[
|
||||
(
|
||||
Series([1, 2, 3], name="foo"),
|
||||
Series(["a", "b"], name="bar"),
|
||||
["foo", "bar"],
|
||||
),
|
||||
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
|
||||
([1, 2, 3], ["a", "b"], None),
|
||||
],
|
||||
)
|
||||
def test_from_product_infer_names(a, b, expected_names):
|
||||
# GH27292
|
||||
result = MultiIndex.from_product([a, b])
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=expected_names,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b"], name="bar")
|
||||
|
||||
result = MultiIndex.from_product([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_readonly():
|
||||
# GH#15286 passing read-only array to from_product
|
||||
a = np.array(range(3))
|
||||
b = ["a", "b"]
|
||||
expected = MultiIndex.from_product([a, b])
|
||||
|
||||
a.setflags(write=False)
|
||||
result = MultiIndex.from_product([a, b])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_create_index_existing_name(idx):
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
index = idx
|
||||
index.names = ["foo", "bar"]
|
||||
result = Index(index)
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Index(index, name="A")
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
name="A",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_frame
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_frame():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
|
||||
)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
|
||||
)
|
||||
result = MultiIndex.from_frame(df)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
|
||||
def test_from_frame_missing_values_multiIndex():
|
||||
# GH 39984
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"a": Series([1, 2, None], dtype="Int64"),
|
||||
"b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
||||
}
|
||||
)
|
||||
multi_indexed = MultiIndex.from_frame(df)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 2, None]).astype("Int64"),
|
||||
pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
||||
],
|
||||
names=["a", "b"],
|
||||
)
|
||||
tm.assert_index_equal(multi_indexed, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"non_frame",
|
||||
[
|
||||
Series([1, 2, 3, 4]),
|
||||
[1, 2, 3, 4],
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
Index([1, 2, 3, 4]),
|
||||
np.array([[1, 2], [3, 4], [5, 6]]),
|
||||
27,
|
||||
],
|
||||
)
|
||||
def test_from_frame_error(non_frame):
|
||||
# GH 22420
|
||||
with pytest.raises(TypeError, match="Input must be a DataFrame"):
|
||||
MultiIndex.from_frame(non_frame)
|
||||
|
||||
|
||||
def test_from_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
original_dtypes = df.dtypes.to_dict()
|
||||
|
||||
expected_mi = MultiIndex.from_arrays(
|
||||
[
|
||||
date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
mi = MultiIndex.from_frame(df)
|
||||
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
tm.assert_index_equal(expected_mi, mi)
|
||||
assert original_dtypes == mi_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
|
||||
)
|
||||
def test_from_frame_valid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
mi = MultiIndex.from_frame(df, names=names_in)
|
||||
assert mi.names == names_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names,expected_error_msg",
|
||||
[
|
||||
("bad_input", "Names should be list-like for a MultiIndex"),
|
||||
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
|
||||
],
|
||||
)
|
||||
def test_from_frame_invalid_names(names, expected_error_msg):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
MultiIndex.from_frame(df, names=names)
|
||||
|
||||
|
||||
def test_index_equal_empty_iterable():
|
||||
# #16844
|
||||
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
|
||||
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(a, b)
|
||||
|
||||
|
||||
def test_raise_invalid_sortorder():
|
||||
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
# Correct sortorder
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
|
||||
)
|
||||
|
||||
|
||||
def test_datetimeindex():
|
||||
idx1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
|
||||
)
|
||||
idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
|
||||
idx = MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
expected1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
tm.assert_index_equal(idx.levels[0], expected1)
|
||||
tm.assert_index_equal(idx.levels[1], idx2)
|
||||
|
||||
# from datetime combos
|
||||
# GH 7888
|
||||
date1 = np.datetime64("today")
|
||||
date2 = datetime.today()
|
||||
date3 = Timestamp.today()
|
||||
|
||||
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
|
||||
index = MultiIndex.from_product([[d1], [d2]])
|
||||
assert isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
# but NOT date objects, matching Index behavior
|
||||
date4 = date.today()
|
||||
index = MultiIndex.from_product([[date4], [date2]])
|
||||
assert not isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
|
||||
def test_constructor_with_tz():
|
||||
index = pd.DatetimeIndex(
|
||||
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
|
||||
)
|
||||
columns = pd.DatetimeIndex(
|
||||
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
result = MultiIndex.from_arrays([index, columns])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
result = MultiIndex.from_arrays([Series(index), Series(columns)])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
|
||||
def test_multiindex_inference_consistency():
|
||||
# check that inference behavior matches the base class
|
||||
|
||||
v = date.today()
|
||||
|
||||
arr = [v, v]
|
||||
|
||||
idx = Index(arr)
|
||||
assert idx.dtype == object
|
||||
|
||||
mi = MultiIndex.from_arrays([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_product([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_tuples([(x,) for x in arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
|
||||
def test_dtype_representation(using_infer_string):
|
||||
# GH#46900
|
||||
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
|
||||
result = pmidx.dtypes
|
||||
exp = "object" if not using_infer_string else "string"
|
||||
expected = Series(
|
||||
["int64", exp],
|
||||
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,164 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_to_numpy(idx):
|
||||
result = idx.to_numpy()
|
||||
exp = idx.values
|
||||
tm.assert_numpy_array_equal(result, exp)
|
||||
|
||||
|
||||
def test_to_frame():
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "'name' must be a list / sequence of column names."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.to_frame(name="first")
|
||||
|
||||
msg = "'name' should have same length as number of levels on index."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.to_frame(name=["first"])
|
||||
|
||||
# Tests for datetime index
|
||||
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
1: np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
"second": np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
expected_df = DataFrame(
|
||||
{
|
||||
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
df = mi.to_frame(index=False)
|
||||
df_dtypes = df.dtypes.to_dict()
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
assert original_dtypes == df_dtypes
|
||||
|
||||
|
||||
def test_to_frame_resulting_column_order():
|
||||
# GH 22420
|
||||
expected = ["z", 0, "a"]
|
||||
mi = MultiIndex.from_arrays(
|
||||
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
|
||||
)
|
||||
result = mi.to_frame().columns.tolist()
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_to_frame_duplicate_labels():
|
||||
# GH 45245
|
||||
data = [(1, 2), (3, 4)]
|
||||
names = ["a", "a"]
|
||||
index = MultiIndex.from_tuples(data, names=names)
|
||||
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
||||
index.to_frame()
|
||||
|
||||
result = index.to_frame(allow_duplicates=True)
|
||||
expected = DataFrame(data, index=index, columns=names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
names = [None, 0]
|
||||
index = MultiIndex.from_tuples(data, names=names)
|
||||
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
||||
index.to_frame()
|
||||
|
||||
result = index.to_frame(allow_duplicates=True)
|
||||
expected = DataFrame(data, index=index, columns=[0, 0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_flat_index(idx):
|
||||
expected = pd.Index(
|
||||
(
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
),
|
||||
tupleize_cols=False,
|
||||
)
|
||||
result = idx.to_flat_index()
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,96 @@
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_multiindex_copied(copy, original):
|
||||
# Levels should be (at least, shallow copied)
|
||||
tm.assert_copy(copy.levels, original.levels)
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
|
||||
# Labels doesn't matter which way copied
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
assert copy.codes is not original.codes
|
||||
|
||||
# Names doesn't matter which way copied
|
||||
assert copy.names == original.names
|
||||
assert copy.names is not original.names
|
||||
|
||||
# Sort order should be copied
|
||||
assert copy.sortorder == original.sortorder
|
||||
|
||||
|
||||
def test_copy(idx):
|
||||
i_copy = idx.copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_shallow_copy(idx):
|
||||
i_copy = idx._view()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_view(idx):
|
||||
i_view = idx.view()
|
||||
assert_multiindex_copied(i_view, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [copy, deepcopy])
|
||||
def test_copy_and_deepcopy(func):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = func(idx)
|
||||
assert idx_copy is not idx
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
def test_copy_method(deep):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(deep=deep)
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"kwarg, value",
|
||||
[
|
||||
("names", ["third", "fourth"]),
|
||||
],
|
||||
)
|
||||
def test_copy_method_kwargs(deep, kwarg, value):
|
||||
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
|
||||
assert getattr(idx_copy, kwarg) == value
|
||||
|
||||
|
||||
def test_copy_deep_false_retains_id():
|
||||
# GH#47878
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
|
||||
res = idx.copy(deep=False)
|
||||
assert res._id is idx._id
|
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_drop(idx):
|
||||
dropped = idx.drop([("foo", "two"), ("qux", "one")])
|
||||
|
||||
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
|
||||
dropped2 = idx.drop(index)
|
||||
|
||||
expected = idx[[0, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
tm.assert_index_equal(dropped2, expected)
|
||||
|
||||
dropped = idx.drop(["bar"])
|
||||
expected = idx[[0, 1, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop("foo")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop(index)
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(["foo", "two"])
|
||||
|
||||
# partially correct argument
|
||||
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop(mixed_index)
|
||||
|
||||
# error='ignore'
|
||||
dropped = idx.drop(index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(["foo", "two"], errors="ignore")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop
|
||||
dropped = idx.drop(["foo", ("qux", "one")])
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop / error='ignore'
|
||||
mixed_index = ["foo", ("qux", "one"), "two"]
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(mixed_index)
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
|
||||
def test_droplevel_with_names(idx):
|
||||
index = idx[idx.get_loc("foo")]
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.name == "second"
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.names == ("two", "three")
|
||||
|
||||
dropped = index.droplevel("two")
|
||||
expected = index.droplevel(1)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
|
||||
def test_droplevel_list():
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
|
||||
dropped = index[:2].droplevel(["three", "one"])
|
||||
expected = index[:2].droplevel(2).droplevel(0)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
dropped = index[:2].droplevel([])
|
||||
expected = index[:2]
|
||||
assert dropped.equals(expected)
|
||||
|
||||
msg = (
|
||||
"Cannot remove 3 levels from an index with 3 levels: "
|
||||
"at least one level must be left"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index[:2].droplevel(["one", "two", "three"])
|
||||
|
||||
with pytest.raises(KeyError, match="'Level four not found'"):
|
||||
index[:2].droplevel(["one", "four"])
|
||||
|
||||
|
||||
def test_drop_not_lexsorted():
|
||||
# GH 12078
|
||||
|
||||
# define the lexsorted version of the multi-index
|
||||
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
|
||||
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
|
||||
assert lexsorted_mi._is_lexsorted()
|
||||
|
||||
# and the not-lexsorted version
|
||||
df = pd.DataFrame(
|
||||
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
|
||||
)
|
||||
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
|
||||
df = df.reset_index()
|
||||
not_lexsorted_mi = df.columns
|
||||
assert not not_lexsorted_mi._is_lexsorted()
|
||||
|
||||
# compare the results
|
||||
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
|
||||
|
||||
|
||||
def test_drop_with_nan_in_index(nulls_fixture):
|
||||
# GH#18853
|
||||
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
|
||||
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(pd.Timestamp("2001"), level="date")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_drop_with_non_monotonic_duplicates():
|
||||
# GH#33494
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
|
||||
result = mi.drop((1, 2))
|
||||
expected = MultiIndex.from_tuples([(2, 3)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_single_level_drop_partially_missing_elements():
|
||||
# GH 37820
|
||||
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
|
||||
msg = r"labels \[4\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(4, level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([1, 4], level=0)
|
||||
msg = r"labels \[nan\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan], level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, 2, 3], level=0)
|
||||
|
||||
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
|
||||
msg = r"labels \['a'\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, "a"], level=0)
|
||||
|
||||
|
||||
def test_droplevel_multiindex_one_level():
|
||||
# GH#37208
|
||||
index = MultiIndex.from_tuples([(2,)], names=("b",))
|
||||
result = index.droplevel([])
|
||||
expected = Index([2], name="b")
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,363 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import (
|
||||
hashtable,
|
||||
index as libindex,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx_dup():
|
||||
# compare tests/indexes/multi/conftest.py
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 0, 1, 1])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [None, ["first", "second"]])
|
||||
def test_unique(names):
|
||||
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
|
||||
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# GH #20568 - empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=names)
|
||||
res = mi.unique()
|
||||
tm.assert_index_equal(mi, res)
|
||||
|
||||
|
||||
def test_unique_datetimelike():
|
||||
idx1 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
|
||||
)
|
||||
idx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
result = MultiIndex.from_arrays([idx1, idx2]).unique()
|
||||
|
||||
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
|
||||
eidx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
|
||||
)
|
||||
exp = MultiIndex.from_arrays([eidx1, eidx2])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
|
||||
def test_unique_level(idx, level):
|
||||
# GH #17896 - with level= argument
|
||||
result = idx.unique(level=level)
|
||||
expected = idx.get_level_values(level).unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With already unique level
|
||||
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_multiindex_codes():
|
||||
# GH 17464
|
||||
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||||
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
|
||||
|
||||
# And that using set_levels with duplicate levels fails
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
|
||||
def test_duplicate_level_names(names):
|
||||
# GH18872, GH19029
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3)
|
||||
mi = mi.rename(names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename(., level=)
|
||||
mi.rename(names[1], level=1, inplace=True)
|
||||
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||||
assert mi.names == names
|
||||
|
||||
|
||||
def test_duplicate_meta_data():
|
||||
# GH 10115
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
|
||||
for idx in [
|
||||
mi,
|
||||
mi.set_names([None, None]),
|
||||
mi.set_names([None, "Num"]),
|
||||
mi.set_names(["Upper", "Num"]),
|
||||
]:
|
||||
assert idx.has_duplicates
|
||||
assert idx.drop_duplicates().names == idx.names
|
||||
|
||||
|
||||
def test_has_duplicates(idx, idx_dup):
|
||||
# see fixtures
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
assert idx_dup.is_unique is False
|
||||
assert idx_dup.has_duplicates is True
|
||||
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
assert mi.is_unique is False
|
||||
assert mi.has_duplicates is True
|
||||
|
||||
# single instance of NaN
|
||||
mi_nan = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan.is_unique is True
|
||||
assert mi_nan.has_duplicates is False
|
||||
|
||||
# multiple instances of NaN
|
||||
mi_nan_dup = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan_dup.is_unique is False
|
||||
assert mi_nan_dup.has_duplicates is True
|
||||
|
||||
|
||||
def test_has_duplicates_from_tuples():
|
||||
# GH 9075
|
||||
t = [
|
||||
("x", "out", "z", 5, "y", "in", "z", 169),
|
||||
("x", "out", "z", 7, "y", "in", "z", 119),
|
||||
("x", "out", "z", 9, "y", "in", "z", 135),
|
||||
("x", "out", "z", 13, "y", "in", "z", 145),
|
||||
("x", "out", "z", 14, "y", "in", "z", 158),
|
||||
("x", "out", "z", 16, "y", "in", "z", 122),
|
||||
("x", "out", "z", 17, "y", "in", "z", 160),
|
||||
("x", "out", "z", 18, "y", "in", "z", 180),
|
||||
("x", "out", "z", 20, "y", "in", "z", 143),
|
||||
("x", "out", "z", 21, "y", "in", "z", 128),
|
||||
("x", "out", "z", 22, "y", "in", "z", 129),
|
||||
("x", "out", "z", 25, "y", "in", "z", 111),
|
||||
("x", "out", "z", 28, "y", "in", "z", 114),
|
||||
("x", "out", "z", 29, "y", "in", "z", 121),
|
||||
("x", "out", "z", 31, "y", "in", "z", 126),
|
||||
("x", "out", "z", 32, "y", "in", "z", 155),
|
||||
("x", "out", "z", 33, "y", "in", "z", 123),
|
||||
("x", "out", "z", 12, "y", "in", "z", 144),
|
||||
]
|
||||
|
||||
mi = MultiIndex.from_tuples(t)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nlevels", [4, 8])
|
||||
@pytest.mark.parametrize("with_nulls", [True, False])
|
||||
def test_has_duplicates_overflow(nlevels, with_nulls):
|
||||
# handle int64 overflow if possible
|
||||
# no overflow with 4
|
||||
# overflow possible with 8
|
||||
codes = np.tile(np.arange(500), 2)
|
||||
level = np.arange(500)
|
||||
|
||||
if with_nulls: # inject some null values
|
||||
codes[500] = -1 # common nan value
|
||||
codes = [codes.copy() for i in range(nlevels)]
|
||||
for i in range(nlevels):
|
||||
codes[i][500 + i - nlevels // 2] = -1
|
||||
|
||||
codes += [np.array([-1, 1]).repeat(500)]
|
||||
else:
|
||||
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
|
||||
|
||||
levels = [level] * nlevels + [[0, 1]]
|
||||
|
||||
# no dups
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
# with a dup
|
||||
if with_nulls:
|
||||
|
||||
def f(a):
|
||||
return np.insert(a, 1000, a[0])
|
||||
|
||||
codes = list(map(f, codes))
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
else:
|
||||
values = mi.values.tolist()
|
||||
mi = MultiIndex.from_tuples(values + [values[0]])
|
||||
|
||||
assert mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", np.array([False, False, False, True, True, False])),
|
||||
("last", np.array([False, True, True, False, False, False])),
|
||||
(False, np.array([False, True, True, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_duplicated(idx_dup, keep, expected):
|
||||
result = idx_dup.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_duplicated_hashtable_impl(keep, monkeypatch):
|
||||
# GH 9125
|
||||
n, k = 6, 10
|
||||
levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)]
|
||||
codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels]
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", 50)
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
result = mi.duplicated(keep=keep)
|
||||
expected = hashtable.duplicated(mi.values, keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [101, 102])
|
||||
def test_duplicated_with_nan(val):
|
||||
# GH5873
|
||||
mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", range(1, 6))
|
||||
@pytest.mark.parametrize("m", range(1, 5))
|
||||
def test_duplicated_with_nan_multi_shape(n, m):
|
||||
# GH5873
|
||||
# all possible unique combinations, including nan
|
||||
codes = product(range(-1, n), range(-1, m))
|
||||
mi = MultiIndex(
|
||||
levels=[list("abcde")[:n], list("WXYZ")[:m]],
|
||||
codes=np.random.default_rng(2).permutation(list(codes)).T,
|
||||
)
|
||||
assert len(mi) == (n + 1) * (m + 1)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))
|
||||
|
||||
|
||||
def test_duplicated_drop_duplicates():
|
||||
# GH#4060
|
||||
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
|
||||
|
||||
expected = np.array([False, False, False, True, False, False], dtype=bool)
|
||||
duplicated = idx.duplicated()
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(), expected)
|
||||
|
||||
expected = np.array([True, False, False, False, False, False])
|
||||
duplicated = idx.duplicated(keep="last")
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
|
||||
|
||||
expected = np.array([True, False, False, True, False, False])
|
||||
duplicated = idx.duplicated(keep=False)
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
np.complex64,
|
||||
np.complex128,
|
||||
],
|
||||
)
|
||||
def test_duplicated_series_complex_numbers(dtype):
|
||||
# GH 17927
|
||||
expected = Series(
|
||||
[False, False, False, True, False, False, False, True, False, True],
|
||||
dtype=bool,
|
||||
)
|
||||
result = Series(
|
||||
[
|
||||
np.nan + np.nan * 1j,
|
||||
0,
|
||||
1j,
|
||||
1j,
|
||||
1,
|
||||
1 + 1j,
|
||||
1 + 2j,
|
||||
1 + 1j,
|
||||
np.nan,
|
||||
np.nan + np.nan * 1j,
|
||||
],
|
||||
dtype=dtype,
|
||||
).duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_midx_unique_ea_dtype():
|
||||
# GH#48335
|
||||
vals_a = Series([1, 2, NA, NA], dtype="Int64")
|
||||
vals_b = np.array([1, 2, 3, 3])
|
||||
midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"])
|
||||
result = midx.unique()
|
||||
|
||||
exp_vals_a = Series([1, 2, NA], dtype="Int64")
|
||||
exp_vals_b = np.array([1, 2, 3])
|
||||
expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_any_real_numeric_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_equals(idx):
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.equals(idx.to_flat_index())
|
||||
assert idx.equals(idx.to_flat_index().astype("category"))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(Series(idx))
|
||||
|
||||
|
||||
def test_equals_op(idx):
|
||||
# GH9947, GH10637
|
||||
index_a = idx
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
|
||||
def test_compare_tuple():
|
||||
# GH#21517
|
||||
mi = MultiIndex.from_product([[1, 2]] * 2)
|
||||
|
||||
all_false = np.array([False, False, False, False])
|
||||
|
||||
result = mi == mi[0]
|
||||
expected = np.array([True, False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi != mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi < mi[0]
|
||||
tm.assert_numpy_array_equal(result, all_false)
|
||||
|
||||
result = mi <= mi[0]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi > mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi >= mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~all_false)
|
||||
|
||||
|
||||
def test_compare_tuple_strs():
|
||||
# GH#34180
|
||||
|
||||
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
|
||||
|
||||
result = mi == ("c", "a")
|
||||
expected = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi == ("c",)
|
||||
expected = np.array([False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_multi(idx):
|
||||
assert idx.equals(idx)
|
||||
assert not idx.equals(idx.values)
|
||||
assert idx.equals(Index(idx.values))
|
||||
|
||||
assert idx.equal_levels(idx)
|
||||
assert not idx.equals(idx[:-1])
|
||||
assert not idx.equals(idx[-1])
|
||||
|
||||
# different number of levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
|
||||
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
||||
assert not index.equals(index2)
|
||||
assert not index.equal_levels(index2)
|
||||
|
||||
# levels are different
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
assert not idx.equal_levels(index)
|
||||
|
||||
# some of the labels are different
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
|
||||
|
||||
def test_identical(idx):
|
||||
mi = idx.copy()
|
||||
mi2 = idx.copy()
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi = mi.set_names(["new1", "new2"])
|
||||
assert mi.equals(mi2)
|
||||
assert not mi.identical(mi2)
|
||||
|
||||
mi2 = mi2.set_names(["new1", "new2"])
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi4 = Index(mi.tolist(), tupleize_cols=False)
|
||||
assert not mi.identical(mi4)
|
||||
assert mi.equals(mi4)
|
||||
|
||||
|
||||
def test_equals_operator(idx):
|
||||
# GH9785
|
||||
assert (idx == idx).all()
|
||||
|
||||
|
||||
def test_equals_missing_values():
|
||||
# make sure take is not using -1
|
||||
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
|
||||
result = i[0:1].equals(i[0])
|
||||
assert not result
|
||||
result = i[1:2].equals(i[1])
|
||||
assert not result
|
||||
|
||||
|
||||
def test_equals_missing_values_differently_sorted():
|
||||
# GH#38439
|
||||
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
|
||||
assert not mi1.equals(mi2)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
assert mi1.equals(mi2)
|
||||
|
||||
|
||||
def test_is_():
|
||||
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
|
||||
assert mi.is_(mi)
|
||||
assert mi.is_(mi.view())
|
||||
assert mi.is_(mi.view().view().view().view())
|
||||
mi2 = mi.view()
|
||||
# names are metadata, they don't change id
|
||||
mi2.names = ["A", "B"]
|
||||
assert mi2.is_(mi)
|
||||
assert mi.is_(mi2)
|
||||
|
||||
assert not mi.is_(mi.set_names(["C", "D"]))
|
||||
# levels are inherent properties, they change identity
|
||||
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi3.is_(mi2)
|
||||
# shouldn't change
|
||||
assert mi2.is_(mi)
|
||||
mi4 = mi3.view()
|
||||
|
||||
# GH 17464 - Remove duplicate MultiIndex levels
|
||||
mi4 = mi4.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi4.is_(mi3)
|
||||
mi5 = mi.view()
|
||||
mi5 = mi5.set_levels(mi5.levels)
|
||||
assert not mi5.is_(mi)
|
||||
|
||||
|
||||
def test_is_all_dates(idx):
|
||||
assert not idx._is_all_dates
|
||||
|
||||
|
||||
def test_is_numeric(idx):
|
||||
# MultiIndex is never numeric
|
||||
assert not is_any_real_numeric_dtype(idx)
|
||||
|
||||
|
||||
def test_multiindex_compare():
|
||||
# GH 21149
|
||||
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||||
# behave consistently with those for MultiIndex with nlevels > 1
|
||||
|
||||
midx = MultiIndex.from_product([[0, 1]])
|
||||
|
||||
# Equality self-test: MultiIndex object vs self
|
||||
expected = Series([True, True])
|
||||
result = Series(midx == midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Greater than comparison: MultiIndex object vs self
|
||||
expected = Series([False, False])
|
||||
result = Series(midx > midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_ea_int_regular_int():
|
||||
# GH#46026
|
||||
mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
assert not mi1.equals(mi2)
|
||||
assert not mi2.equals(mi1)
|
@ -0,0 +1,249 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_format(idx):
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
idx.format()
|
||||
idx[:0].format()
|
||||
|
||||
|
||||
def test_format_integer_names():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
|
||||
)
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
index.format(names=True)
|
||||
|
||||
|
||||
def test_format_sparse_config(idx):
|
||||
# GH1538
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with pd.option_context("display.multi_sparse", False):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.format()
|
||||
assert result[1] == "foo two"
|
||||
|
||||
|
||||
def test_format_sparse_display():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||||
codes=[
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 0, 0, 1],
|
||||
[0, 1, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0],
|
||||
],
|
||||
)
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = index.format()
|
||||
assert result[3] == "1 0 0 0"
|
||||
|
||||
|
||||
def test_repr_with_unicode_data():
|
||||
with pd.option_context("display.encoding", "UTF-8"):
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
assert "\\" not in repr(index) # we don't want unicode-escaped
|
||||
|
||||
|
||||
def test_repr_roundtrip_raises():
|
||||
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
eval(repr(mi))
|
||||
|
||||
|
||||
def test_unicode_string_with_unicode():
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
str(idx)
|
||||
|
||||
|
||||
def test_repr_max_seq_item_setting(idx):
|
||||
# GH10182
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
|
||||
class TestRepr:
|
||||
def test_unicode_repr_issues(self):
|
||||
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
|
||||
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
repr(index.levels)
|
||||
repr(index.get_level_values(1))
|
||||
|
||||
def test_repr_max_seq_items_equal_to_n(self, idx):
|
||||
# display.max_seq_items == n
|
||||
with pd.option_context("display.max_seq_items", 6):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
def test_repr(self, idx):
|
||||
result = idx[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
with pd.option_context("display.max_seq_items", 5):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
...
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
# display.max_seq_items == 1
|
||||
with pd.option_context("display.max_seq_items", 1):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([...
|
||||
('qux', 'two')],
|
||||
names=['first', ...], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
def test_rjust(self):
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
|
||||
result = mi[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[::500].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:08:20'),
|
||||
('abc', 10, '2000-01-01 00:16:40'),
|
||||
('abc', 10, '2000-01-01 00:25:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:00:01'),
|
||||
( 'a', 9, '2000-01-01 00:00:02'),
|
||||
( 'a', 9, '2000-01-01 00:00:03'),
|
||||
( 'a', 9, '2000-01-01 00:00:04'),
|
||||
( 'a', 9, '2000-01-01 00:00:05'),
|
||||
( 'a', 9, '2000-01-01 00:00:06'),
|
||||
( 'a', 9, '2000-01-01 00:00:07'),
|
||||
( 'a', 9, '2000-01-01 00:00:08'),
|
||||
( 'a', 9, '2000-01-01 00:00:09'),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10'),
|
||||
('abc', 10, '2000-01-01 00:33:11'),
|
||||
('abc', 10, '2000-01-01 00:33:12'),
|
||||
('abc', 10, '2000-01-01 00:33:13'),
|
||||
('abc', 10, '2000-01-01 00:33:14'),
|
||||
('abc', 10, '2000-01-01 00:33:15'),
|
||||
('abc', 10, '2000-01-01 00:33:16'),
|
||||
('abc', 10, '2000-01-01 00:33:17'),
|
||||
('abc', 10, '2000-01-01 00:33:18'),
|
||||
('abc', 10, '2000-01-01 00:33:19')],
|
||||
names=['a', 'b', 'dti'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_tuple_width(self):
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
levels = [ci, ci.codes + 9, dti, dti, dti]
|
||||
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
|
||||
mi = MultiIndex.from_arrays(levels, names=names)
|
||||
result = mi[:1].__repr__()
|
||||
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
|
||||
assert result == expected
|
||||
|
||||
result = mi[:10].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
|
||||
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
|
||||
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
|
||||
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
|
||||
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
|
||||
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
|
||||
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
|
||||
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
|
||||
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
|
||||
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_multiindex_long_element(self):
|
||||
# Non-regression test towards GH#52960
|
||||
data = MultiIndex.from_tuples([("c" * 62,)])
|
||||
|
||||
expected = (
|
||||
"MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
|
||||
"cccccccccccccccccccccc',)],\n )"
|
||||
)
|
||||
assert str(data) == expected
|
@ -0,0 +1,124 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetLevelValues:
|
||||
def test_get_level_values_box_datetime64(self):
|
||||
dates = date_range("1/1/2000", periods=4)
|
||||
levels = [dates, [0, 1]]
|
||||
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
|
||||
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
assert isinstance(index.get_level_values(0)[0], Timestamp)
|
||||
|
||||
|
||||
def test_get_level_values(idx):
|
||||
result = idx.get_level_values(0)
|
||||
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == "first"
|
||||
|
||||
result = idx.get_level_values("first")
|
||||
expected = idx.get_level_values(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 10460
|
||||
index = MultiIndex(
|
||||
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
|
||||
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
|
||||
)
|
||||
|
||||
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
|
||||
tm.assert_index_equal(index.get_level_values(0), exp)
|
||||
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||||
tm.assert_index_equal(index.get_level_values(1), exp)
|
||||
|
||||
|
||||
def test_get_level_values_all_na():
|
||||
# GH#17924 when level entirely consists of nan
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_int_with_na():
|
||||
# GH#17924
|
||||
arrays = [["a", "b", "b"], [1, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([1, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([np.nan, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_na():
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [[], []]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_when_periods():
|
||||
# GH33131. See also discussion in GH32669.
|
||||
# This test can probably be removed when PeriodIndex._engine is removed.
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
)
|
||||
|
||||
idx = MultiIndex.from_arrays(
|
||||
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
|
||||
)
|
||||
idx2 = MultiIndex.from_arrays(
|
||||
[idx._get_level_values(level) for level in range(idx.nlevels)]
|
||||
)
|
||||
assert all(x.is_monotonic_increasing for x in idx2.levels)
|
||||
|
||||
|
||||
def test_values_loses_freq_of_underlying_index():
|
||||
# GH#49054
|
||||
idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME"))
|
||||
expected = idx.copy(deep=True)
|
||||
idx2 = Index([1, 2, 3])
|
||||
midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]])
|
||||
midx.values
|
||||
assert idx.freq is not None
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,384 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_matching(actual, expected, check_dtype=False):
|
||||
# avoid specifying internal representation
|
||||
# as much as possible
|
||||
assert len(actual) == len(expected)
|
||||
for act, exp in zip(actual, expected):
|
||||
act = np.asarray(act)
|
||||
exp = np.asarray(exp)
|
||||
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||||
|
||||
|
||||
def test_get_level_number_integer(idx):
|
||||
idx.names = [1, 0]
|
||||
assert idx._get_level_number(1) == 0
|
||||
assert idx._get_level_number(0) == 1
|
||||
msg = "Too many levels: Index has only 2 levels, not 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx._get_level_number(2)
|
||||
with pytest.raises(KeyError, match="Level fourth not found"):
|
||||
idx._get_level_number("fourth")
|
||||
|
||||
|
||||
def test_get_dtypes(using_infer_string):
|
||||
# Test MultiIndex.dtypes (# Gh37062)
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
|
||||
names=["int", "string", "dt"],
|
||||
)
|
||||
|
||||
exp = "object" if not using_infer_string else "string"
|
||||
expected = pd.Series(
|
||||
{
|
||||
"int": np.dtype("int64"),
|
||||
"string": exp,
|
||||
"dt": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_no_level_name(using_infer_string):
|
||||
# Test MultiIndex.dtypes (# GH38580 )
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
)
|
||||
exp = "object" if not using_infer_string else "string"
|
||||
expected = pd.Series(
|
||||
{
|
||||
"level_0": np.dtype("int64"),
|
||||
"level_1": exp,
|
||||
"level_2": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_duplicate_level_names(using_infer_string):
|
||||
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
|
||||
result = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
names=["A", "A", "A"],
|
||||
).dtypes
|
||||
exp = "object" if not using_infer_string else "string"
|
||||
expected = pd.Series(
|
||||
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
|
||||
index=["A", "A", "A"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
with pytest.raises(IndexError, match="Too many levels"):
|
||||
frame.index._get_level_number(2)
|
||||
with pytest.raises(IndexError, match="not a valid level number"):
|
||||
frame.index._get_level_number(-3)
|
||||
|
||||
|
||||
def test_set_name_methods(idx):
|
||||
# so long as these are synonyms, we don't need to test set_names
|
||||
index_names = ["first", "second"]
|
||||
assert idx.rename == idx.set_names
|
||||
new_names = [name + "SUFFIX" for name in index_names]
|
||||
ind = idx.set_names(new_names)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
msg = "Length of names must match number of levels in MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ind.set_names(new_names + new_names)
|
||||
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||||
res = ind.set_names(new_names2, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
# set names for specific level (# GH7792)
|
||||
ind = idx.set_names(new_names[0], level=0)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == [new_names[0], index_names[1]]
|
||||
|
||||
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == [new_names2[0], index_names[1]]
|
||||
|
||||
# set names for multiple levels
|
||||
ind = idx.set_names(new_names, level=[0, 1])
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
|
||||
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
|
||||
def test_set_levels_codes_directly(idx):
|
||||
# setting levels/codes directly raises AttributeError
|
||||
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
msg = "Can't set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.levels = new_levels
|
||||
|
||||
msg = (
|
||||
"property 'codes' of 'MultiIndex' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.codes = new_codes
|
||||
|
||||
|
||||
def test_set_levels(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
# level changing [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels)
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing specific level [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels[0], level=0)
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.set_levels(new_levels[1], level=1)
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels, level=[0, 1])
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# illegal level changing should not change levels
|
||||
# GH 13754
|
||||
original_index = idx.copy()
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_levels(["c"], level=0)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_codes([0, 1, 2, 3, 4, 5], level=0)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Levels"):
|
||||
idx.set_levels("c", level=0)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Codes"):
|
||||
idx.set_codes(1, level=0)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
|
||||
def test_set_codes(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
# changing codes w/o mutation
|
||||
ind2 = idx.set_codes(new_codes)
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing specific level w/o mutation
|
||||
ind2 = idx.set_codes(new_codes[0], level=0)
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.set_codes(new_codes[1], level=1)
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels w/o mutation
|
||||
ind2 = idx.set_codes(new_codes, level=[0, 1])
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing for levels of different magnitude of categories
|
||||
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_codes = range(129, -1, -1)
|
||||
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
|
||||
|
||||
# [w/o mutation]
|
||||
result = ind.set_codes(codes=new_codes, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
|
||||
def test_set_levels_codes_names_bad_input(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
names = idx.names
|
||||
|
||||
with pytest.raises(ValueError, match="Length of levels"):
|
||||
idx.set_levels([levels[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of codes"):
|
||||
idx.set_codes([codes[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names([names[0]])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_names(names[0])
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_levels(levels, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_codes(codes, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names(names[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="Names must be a"):
|
||||
idx.set_names(names, level=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_names_with_nlevel_1(inplace):
|
||||
# GH 21149
|
||||
# Ensure that .set_names for MultiIndex with
|
||||
# nlevels == 1 does not raise any errors
|
||||
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
|
||||
m = MultiIndex.from_product([[0, 1]])
|
||||
result = m.set_names("first", level=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
result = m
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_levels_categorical(ordered):
|
||||
# GH13854
|
||||
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||||
|
||||
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||||
result = index.set_levels(cidx, level=0)
|
||||
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result_lvl = result.get_level_values(0)
|
||||
expected_lvl = CategoricalIndex(
|
||||
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
|
||||
)
|
||||
tm.assert_index_equal(result_lvl, expected_lvl)
|
||||
|
||||
|
||||
def test_set_value_keeps_names():
|
||||
# motivating example from #3742
|
||||
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
|
||||
lev2 = ["1", "2", "3"] * 2
|
||||
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
|
||||
df = pd.DataFrame(
|
||||
np.random.default_rng(2).standard_normal((6, 4)),
|
||||
columns=["one", "two", "three", "four"],
|
||||
index=idx,
|
||||
)
|
||||
df = df.sort_index()
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
df.at[("grethe", "4"), "one"] = 99.34
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
|
||||
|
||||
def test_set_levels_with_iterable():
|
||||
# GH23273
|
||||
sizes = [1, 2, 3]
|
||||
colors = ["black"] * 3
|
||||
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
|
||||
|
||||
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
|
||||
|
||||
expected_sizes = [3, 2, 1]
|
||||
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_empty_level():
|
||||
# GH#48636
|
||||
midx = MultiIndex.from_arrays([[]], names=["A"])
|
||||
result = midx.set_levels(pd.DatetimeIndex([]), level=0)
|
||||
expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_levels_pos_args_removal():
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, "one"),
|
||||
(3, "one"),
|
||||
],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
with pytest.raises(TypeError, match="positional arguments"):
|
||||
idx.set_levels(["a", "b", "c"], 0)
|
||||
|
||||
with pytest.raises(TypeError, match="positional arguments"):
|
||||
idx.set_codes([[0, 1], [1, 0]], 0)
|
||||
|
||||
|
||||
def test_set_levels_categorical_keep_dtype():
|
||||
# GH#52125
|
||||
midx = MultiIndex.from_arrays([[5, 6]])
|
||||
result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0)
|
||||
expected = MultiIndex.from_arrays([pd.Categorical([1, 2])])
|
||||
tm.assert_index_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,289 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_labels_dtypes():
|
||||
# GH 8456
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
assert i.codes[0].dtype == "int8"
|
||||
assert i.codes[1].dtype == "int8"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(40)])
|
||||
assert i.codes[1].dtype == "int8"
|
||||
i = MultiIndex.from_product([["a"], range(400)])
|
||||
assert i.codes[1].dtype == "int16"
|
||||
i = MultiIndex.from_product([["a"], range(40000)])
|
||||
assert i.codes[1].dtype == "int32"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(1000)])
|
||||
assert (i.codes[0] >= 0).all()
|
||||
assert (i.codes[1] >= 0).all()
|
||||
|
||||
|
||||
def test_values_boxed():
|
||||
tuples = [
|
||||
(1, pd.Timestamp("2000-01-01")),
|
||||
(2, pd.NaT),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
(1, pd.Timestamp("2000-01-04")),
|
||||
(2, pd.Timestamp("2000-01-02")),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
]
|
||||
result = MultiIndex.from_tuples(tuples)
|
||||
expected = construct_1d_object_array_from_listlike(tuples)
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
# Check that code branches for boxed values produce identical results
|
||||
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||||
|
||||
|
||||
def test_values_multiindex_datetimeindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(10**18, 10**18 + 5)
|
||||
naive = pd.DatetimeIndex(ints)
|
||||
|
||||
aware = pd.DatetimeIndex(ints, tz="US/Central")
|
||||
|
||||
idx = MultiIndex.from_arrays([naive, aware])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive)
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive[:2])
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware[:2])
|
||||
|
||||
|
||||
def test_values_multiindex_periodindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(2007, 2012)
|
||||
pidx = pd.PeriodIndex(ints, freq="D")
|
||||
|
||||
idx = MultiIndex.from_arrays([ints, pidx])
|
||||
result = idx.values
|
||||
|
||||
outer = Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx[:2])
|
||||
|
||||
|
||||
def test_consistency():
|
||||
# need to construct an overflow
|
||||
major_axis = list(range(70000))
|
||||
minor_axis = list(range(10))
|
||||
|
||||
major_codes = np.arange(70000)
|
||||
minor_codes = np.repeat(range(10), 7000)
|
||||
|
||||
# the fact that is works means it's consistent
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
# inconsistent
|
||||
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
assert index.is_unique is False
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_hash_collisions(monkeypatch):
|
||||
# non-smoke test that we don't get hash collisions
|
||||
size_cutoff = 50
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(8), np.arange(8)], names=["one", "two"]
|
||||
)
|
||||
result = index.get_indexer(index.values)
|
||||
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
|
||||
|
||||
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||||
result = index.get_loc(index[i])
|
||||
assert result == i
|
||||
|
||||
|
||||
def test_dims():
|
||||
pass
|
||||
|
||||
|
||||
def test_take_invalid_kwargs():
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_isna_behavior(idx):
|
||||
# should not segfault GH5123
|
||||
# NOTE: if MI representation changes, may make sense to allow
|
||||
# isna(MI)
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
pd.isna(idx)
|
||||
|
||||
|
||||
def test_large_multiindex_error(monkeypatch):
|
||||
# GH12527
|
||||
size_cutoff = 50
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
df_below_cutoff = pd.DataFrame(
|
||||
1,
|
||||
index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
|
||||
columns=["dest"],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_below_cutoff.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_below_cutoff.loc[(3, 0), "dest"]
|
||||
df_above_cutoff = pd.DataFrame(
|
||||
1,
|
||||
index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
|
||||
columns=["dest"],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_above_cutoff.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_above_cutoff.loc[(3, 0), "dest"]
|
||||
|
||||
|
||||
def test_mi_hashtable_populated_attribute_error(monkeypatch):
|
||||
# GH 18165
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
|
||||
r = range(50)
|
||||
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
|
||||
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df["a"].foo()
|
||||
|
||||
|
||||
def test_can_hold_identifiers(idx):
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
|
||||
def test_metadata_immutable(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
# shouldn't be able to set at either the top level or base level
|
||||
mutable_regex = re.compile("does not support mutable operations")
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0] = levels[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0][0] = levels[0][0]
|
||||
# ditto for labels
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0] = codes[0]
|
||||
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
||||
codes[0][0] = codes[0][0]
|
||||
# and for names
|
||||
names = idx.names
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
names[0] = names[0]
|
||||
|
||||
|
||||
def test_level_setting_resets_attributes():
|
||||
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert ind.is_monotonic_increasing
|
||||
ind = ind.set_levels([["A", "B"], [1, 3, 2]])
|
||||
# if this fails, probably didn't reset the cache correctly.
|
||||
assert not ind.is_monotonic_increasing
|
||||
|
||||
|
||||
def test_rangeindex_fallback_coercion_bug():
|
||||
# GH 12893
|
||||
df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df = pd.concat(
|
||||
{"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
|
||||
axis=1,
|
||||
)
|
||||
df.index.names = ["fizz", "buzz"]
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"df2": np.arange(100), "df1": np.arange(100)},
|
||||
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected, check_like=True)
|
||||
|
||||
result = df.index.get_level_values("fizz")
|
||||
expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = df.index.get_level_values("buzz")
|
||||
expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_memory_usage(idx):
|
||||
result = idx.memory_usage()
|
||||
if len(idx):
|
||||
idx.get_loc(idx[0])
|
||||
result2 = idx.memory_usage()
|
||||
result3 = idx.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if idx.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_nlevels(idx):
|
||||
assert idx.nlevels == 2
|
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_isin_nan():
|
||||
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.isin([("bar", float("nan"))]), np.array([False, True])
|
||||
)
|
||||
|
||||
|
||||
def test_isin_missing(nulls_fixture):
|
||||
# GH48905
|
||||
mi1 = MultiIndex.from_tuples([(1, nulls_fixture)])
|
||||
mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)])
|
||||
result = mi2.isin(mi1)
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin():
|
||||
values = [("foo", 2), ("bar", 3), ("quux", 4)]
|
||||
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
result = idx.isin(values)
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# empty, return dtype bool
|
||||
idx = MultiIndex.from_arrays([[], []])
|
||||
result = idx.isin(values)
|
||||
assert len(result) == 0
|
||||
assert result.dtype == np.bool_
|
||||
|
||||
|
||||
def test_isin_level_kwarg():
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
|
||||
vals_0 = ["foo", "bar", "quux"]
|
||||
vals_1 = [2, 3, 10]
|
||||
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||||
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||||
|
||||
msg = "Too many levels: Index has only 2 levels, not 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=5)
|
||||
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=-5)
|
||||
|
||||
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
|
||||
idx.isin(vals_0, level=1.0)
|
||||
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
|
||||
idx.isin(vals_1, level=-1.0)
|
||||
with pytest.raises(KeyError, match="'Level A not found'"):
|
||||
idx.isin(vals_1, level="A")
|
||||
|
||||
idx.names = ["A", "B"]
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
|
||||
|
||||
with pytest.raises(KeyError, match="'Level C not found'"):
|
||||
idx.isin(vals_1, level="C")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"labels,expected,level",
|
||||
[
|
||||
([("b", np.nan)], np.array([False, False, True]), None),
|
||||
([np.nan, "a"], np.array([True, True, False]), 0),
|
||||
(["d", np.nan], np.array([False, True, True]), 1),
|
||||
],
|
||||
)
|
||||
def test_isin_multi_index_with_missing_value(labels, expected, level):
|
||||
# GH 19132
|
||||
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
|
||||
result = midx.isin(labels, level=level)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_empty():
|
||||
# GH#51599
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
result = midx.isin([])
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_generator():
|
||||
# GH#52568
|
||||
midx = MultiIndex.from_tuples([(1, 2)])
|
||||
result = midx.isin(x for x in [(1, 2)])
|
||||
expected = np.array([True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,268 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
MultiIndex,
|
||||
Series,
|
||||
StringDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
|
||||
)
|
||||
def test_join_level(idx, other, join_type):
|
||||
join_index, lidx, ridx = other.join(
|
||||
idx, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
exp_level = other.join(idx.levels[1], how=join_type)
|
||||
assert join_index.levels[0].equals(idx.levels[0])
|
||||
assert join_index.levels[1].equals(exp_level)
|
||||
|
||||
# pare down levels
|
||||
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
|
||||
exp_values = idx.values[mask]
|
||||
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
||||
|
||||
if join_type in ("outer", "inner"):
|
||||
join_index2, ridx2, lidx2 = idx.join(
|
||||
other, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
assert join_index.equals(join_index2)
|
||||
tm.assert_numpy_array_equal(lidx, lidx2)
|
||||
tm.assert_numpy_array_equal(ridx, ridx2)
|
||||
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
||||
|
||||
|
||||
def test_join_level_corner_case(idx):
|
||||
# some corner cases
|
||||
index = Index(["three", "one", "two"])
|
||||
result = index.join(idx, level="second")
|
||||
assert isinstance(result, MultiIndex)
|
||||
|
||||
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
|
||||
idx.join(idx, level=1)
|
||||
|
||||
|
||||
def test_join_self(idx, join_type):
|
||||
result = idx.join(idx, how=join_type)
|
||||
expected = idx
|
||||
if join_type == "outer":
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_multi():
|
||||
# GH 10665
|
||||
midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
|
||||
idx = Index([1, 2, 5], name="b")
|
||||
|
||||
# inner
|
||||
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
|
||||
exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
|
||||
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
||||
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
# keep MultiIndex
|
||||
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
|
||||
exp_ridx = np.array(
|
||||
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
|
||||
)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_multi_wrong_order():
|
||||
# GH 25760
|
||||
# GH 28956
|
||||
|
||||
midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
|
||||
|
||||
join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
|
||||
|
||||
exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
|
||||
|
||||
tm.assert_index_equal(midx1, join_idx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_multi_return_indexers():
|
||||
# GH 34074
|
||||
|
||||
midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
|
||||
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
||||
|
||||
result = midx1.join(midx2, return_indexers=False)
|
||||
tm.assert_index_equal(result, midx1)
|
||||
|
||||
|
||||
def test_join_overlapping_interval_level():
|
||||
# GH 44096
|
||||
idx_1 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
|
||||
(2, Interval(3.0, 5.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
|
||||
idx_2 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(2, Interval(3.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)),
|
||||
(2, Interval(3.0, 5.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
result = idx_1.join(idx_2, how="outer")
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_midx_ea():
|
||||
# GH#49277
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")],
|
||||
names=["a", "b"],
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"]
|
||||
)
|
||||
result = midx.join(midx2, how="inner")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 1], dtype="Int64"),
|
||||
Series([1, 2], dtype="Int64"),
|
||||
Series([3, 3], dtype="Int64"),
|
||||
],
|
||||
names=["a", "b", "c"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_midx_string():
|
||||
# GH#49277
|
||||
midx = MultiIndex.from_arrays(
|
||||
[
|
||||
Series(["a", "a", "c"], dtype=StringDtype()),
|
||||
Series(["a", "b", "c"], dtype=StringDtype()),
|
||||
],
|
||||
names=["a", "b"],
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())],
|
||||
names=["a", "c"],
|
||||
)
|
||||
result = midx.join(midx2, how="inner")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series(["a", "a"], dtype=StringDtype()),
|
||||
Series(["a", "b"], dtype=StringDtype()),
|
||||
Series(["c", "c"], dtype=StringDtype()),
|
||||
],
|
||||
names=["a", "b", "c"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_multi_with_nan():
|
||||
# GH29252
|
||||
df1 = DataFrame(
|
||||
data={"col1": [1.1, 1.2]},
|
||||
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
|
||||
)
|
||||
df2 = DataFrame(
|
||||
data={"col2": [2.1, 2.2]},
|
||||
index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]),
|
||||
)
|
||||
result = df1.join(df2)
|
||||
expected = DataFrame(
|
||||
data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]},
|
||||
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [0, 5])
|
||||
def test_join_dtypes(any_numeric_ea_dtype, val):
|
||||
# GH#49830
|
||||
midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]])
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]]
|
||||
)
|
||||
result = midx.join(midx2, how="outer")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]]
|
||||
).sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_dtypes_all_nan(any_numeric_ea_dtype):
|
||||
# GH#49830
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]]
|
||||
)
|
||||
result = midx.join(midx2, how="outer")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype),
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_index_levels():
|
||||
# GH#53093
|
||||
midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")])
|
||||
midx2 = MultiIndex.from_tuples([("a", "2019-01-31")])
|
||||
result = midx.join(midx2, how="outer")
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")]
|
||||
)
|
||||
tm.assert_index_equal(result.levels[1], expected.levels[1])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,46 @@
|
||||
from pandas import MultiIndex
|
||||
|
||||
|
||||
class TestIsLexsorted:
|
||||
def test_is_lexsorted(self):
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
)
|
||||
assert index._is_lexsorted()
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
|
||||
)
|
||||
assert not index._is_lexsorted()
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
|
||||
)
|
||||
assert not index._is_lexsorted()
|
||||
assert index._lexsort_depth == 0
|
||||
|
||||
|
||||
class TestLexsortDepth:
|
||||
def test_lexsort_depth(self):
|
||||
# Test that lexsort_depth return the correct sortorder
|
||||
# when it was given to the MultiIndex const.
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
assert index._lexsort_depth == 2
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
|
||||
)
|
||||
assert index._lexsort_depth == 1
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
|
||||
)
|
||||
assert index._lexsort_depth == 0
|
@ -0,0 +1,111 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fillna(idx):
|
||||
# GH 11343
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
|
||||
|
||||
def test_dropna():
|
||||
# GH 6194
|
||||
idx = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, np.nan, 3, np.nan, 5],
|
||||
[1, 2, np.nan, np.nan, 5],
|
||||
["a", "b", "c", np.nan, "e"],
|
||||
]
|
||||
)
|
||||
|
||||
exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
|
||||
tm.assert_index_equal(idx.dropna(), exp)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), exp)
|
||||
|
||||
exp = MultiIndex.from_arrays(
|
||||
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), exp)
|
||||
|
||||
msg = "invalid how option: xxx"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.dropna(how="xxx")
|
||||
|
||||
# GH26408
|
||||
# test if missing values are dropped for multiindex constructed
|
||||
# from codes and values
|
||||
idx = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
|
||||
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
|
||||
)
|
||||
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
|
||||
tm.assert_index_equal(idx.dropna(), expected)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), expected)
|
||||
|
||||
expected = MultiIndex.from_arrays(
|
||||
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), expected)
|
||||
|
||||
|
||||
def test_nulls(idx):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="isna is not defined for MultiIndex")
|
||||
def test_hasnans_isnans(idx):
|
||||
# GH 11343, added tests for hasnans / isnans
|
||||
index = idx.copy()
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is False
|
||||
|
||||
index = idx.copy()
|
||||
values = index.values
|
||||
values[1] = np.nan
|
||||
|
||||
index = type(idx)(values)
|
||||
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is True
|
||||
|
||||
|
||||
def test_nan_stays_float():
|
||||
# GH 7031
|
||||
idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1])
|
||||
idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
|
||||
idxm = idx0.join(idx1, how="outer")
|
||||
assert pd.isna(idx0.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
|
||||
|
||||
df0 = pd.DataFrame([[1, 2]], index=idx0)
|
||||
df1 = pd.DataFrame([[3, 4]], index=idx1)
|
||||
dfm = df0 - df1
|
||||
assert pd.isna(df0.index.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
|
||||
|
||||
|
||||
def test_tuples_have_na():
|
||||
index = MultiIndex(
|
||||
levels=[[1, 0], [0, 1, 2, 3]],
|
||||
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
|
||||
)
|
||||
|
||||
assert pd.isna(index[4][0])
|
||||
assert pd.isna(index.values[4][0])
|
@ -0,0 +1,188 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
|
||||
# string ordering
|
||||
mi = lexsorted_two_level_string_multiindex
|
||||
assert mi.is_monotonic_increasing is False
|
||||
assert Index(mi.values).is_monotonic_increasing is False
|
||||
assert mi._is_strictly_monotonic_increasing is False
|
||||
assert Index(mi.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_monotonic_increasing():
|
||||
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
|
||||
assert i.is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[1, 2, 3, 4],
|
||||
[
|
||||
"gb00b03mlx29",
|
||||
"lu0197800237",
|
||||
"nl0000289783",
|
||||
"nl0000289965",
|
||||
"nl0000301109",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
|
||||
def test_is_monotonic_decreasing():
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[4, 3, 2, 1],
|
||||
[
|
||||
"nl0000301109",
|
||||
"nl0000289965",
|
||||
"nl0000289783",
|
||||
"lu0197800237",
|
||||
"gb00b03mlx29",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_increasing():
|
||||
idx = MultiIndex(
|
||||
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_decreasing():
|
||||
idx = MultiIndex(
|
||||
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
|
||||
)
|
||||
def test_is_monotonic_with_nans(values, attr):
|
||||
# GH: 37220
|
||||
idx = MultiIndex.from_tuples(values, names=["test"])
|
||||
assert getattr(idx, attr) is False
|
@ -0,0 +1,201 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_slice_keep_name():
|
||||
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
|
||||
assert x[1:].names == x.names
|
||||
|
||||
|
||||
def test_index_name_retained():
|
||||
# GH9857
|
||||
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
|
||||
result = result.set_index("z")
|
||||
result.loc[10] = [9, 10]
|
||||
df_expected = pd.DataFrame(
|
||||
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
|
||||
)
|
||||
df_expected = df_expected.set_index("z")
|
||||
tm.assert_frame_equal(result, df_expected)
|
||||
|
||||
|
||||
def test_changing_names(idx):
|
||||
assert [level.name for level in idx.levels] == ["first", "second"]
|
||||
|
||||
view = idx.view()
|
||||
copy = idx.copy()
|
||||
shallow_copy = idx._view()
|
||||
|
||||
# changing names should not change level names on object
|
||||
new_names = [name + "a" for name in idx.names]
|
||||
idx.names = new_names
|
||||
check_level_names(idx, ["firsta", "seconda"])
|
||||
|
||||
# and not on copies
|
||||
check_level_names(view, ["first", "second"])
|
||||
check_level_names(copy, ["first", "second"])
|
||||
check_level_names(shallow_copy, ["first", "second"])
|
||||
|
||||
# and copies shouldn't change original
|
||||
shallow_copy.names = [name + "c" for name in shallow_copy.names]
|
||||
check_level_names(idx, ["firsta", "seconda"])
|
||||
|
||||
|
||||
def test_take_preserve_name(idx):
|
||||
taken = idx.take([3, 0, 1])
|
||||
assert taken.names == idx.names
|
||||
|
||||
|
||||
def test_copy_names():
|
||||
# Check that adding a "names" parameter to the copy is honored
|
||||
# GH14302
|
||||
multi_idx = MultiIndex.from_tuples([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
|
||||
multi_idx1 = multi_idx.copy()
|
||||
|
||||
assert multi_idx.equals(multi_idx1)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx1.names == ["MyName1", "MyName2"]
|
||||
|
||||
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx2)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx2.names == ["NewName1", "NewName2"]
|
||||
|
||||
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx3)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx3.names == ["NewName1", "NewName2"]
|
||||
|
||||
# gh-35592
|
||||
with pytest.raises(ValueError, match="Length of new names must be 2, got 1"):
|
||||
multi_idx.copy(names=["mario"])
|
||||
|
||||
with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"):
|
||||
multi_idx.copy(names=[["mario"], ["luigi"]])
|
||||
|
||||
|
||||
def test_names(idx):
|
||||
# names are assigned in setup
|
||||
assert idx.names == ["first", "second"]
|
||||
level_names = [level.name for level in idx.levels]
|
||||
assert level_names == idx.names
|
||||
|
||||
# setting bad names on existing
|
||||
index = idx
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", list(index.names) + ["third"])
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", [])
|
||||
|
||||
# initializing with bad names (should always be equivalent)
|
||||
major_axis, minor_axis = idx.levels
|
||||
major_codes, minor_codes = idx.codes
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first", "second", "third"],
|
||||
)
|
||||
|
||||
# names are assigned on index, but not transferred to the levels
|
||||
index.names = ["a", "b"]
|
||||
level_names = [level.name for level in index.levels]
|
||||
assert level_names == ["a", "b"]
|
||||
|
||||
|
||||
def test_duplicate_level_names_access_raises(idx):
|
||||
# GH19029
|
||||
idx.names = ["foo", "foo"]
|
||||
with pytest.raises(ValueError, match="name foo occurs multiple times"):
|
||||
idx._get_level_number("foo")
|
||||
|
||||
|
||||
def test_get_names_from_levels():
|
||||
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
|
||||
|
||||
assert idx.levels[0].name == "a"
|
||||
assert idx.levels[1].name == "b"
|
||||
|
||||
|
||||
def test_setting_names_from_levels_raises():
|
||||
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
idx.levels[0].name = "foo"
|
||||
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
idx.levels[1].name = "foo"
|
||||
|
||||
new = pd.Series(1, index=idx.levels[0])
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
new.index.name = "bar"
|
||||
|
||||
assert pd.Index._no_setting_name is False
|
||||
assert pd.RangeIndex._no_setting_name is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["rename", "set_names"])
|
||||
@pytest.mark.parametrize(
|
||||
"rename_dict, exp_names",
|
||||
[
|
||||
({"x": "z"}, ["z", "y", "z"]),
|
||||
({"x": "z", "y": "x"}, ["z", "x", "z"]),
|
||||
({"y": "z"}, ["x", "z", "x"]),
|
||||
({}, ["x", "y", "x"]),
|
||||
({"z": "a"}, ["x", "y", "x"]),
|
||||
({"y": "z", "a": "b"}, ["x", "z", "x"]),
|
||||
],
|
||||
)
|
||||
def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names):
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"])
|
||||
result = getattr(mi, func)(rename_dict)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["rename", "set_names"])
|
||||
@pytest.mark.parametrize(
|
||||
"rename_dict, exp_names",
|
||||
[
|
||||
({"x": "z"}, ["z", "y"]),
|
||||
({"x": "z", "y": "x"}, ["z", "x"]),
|
||||
({"a": "z"}, ["x", "y"]),
|
||||
({}, ["x", "y"]),
|
||||
],
|
||||
)
|
||||
def test_name_mi_with_dict_like(func, rename_dict, exp_names):
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
|
||||
result = getattr(mi, func)(rename_dict)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_index_name_with_dict_like_raising():
|
||||
# GH#20421
|
||||
ix = pd.Index([1, 2])
|
||||
msg = "Can only pass dict-like as `names` for MultiIndex."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ix.set_names({"x": "z"})
|
||||
|
||||
|
||||
def test_multiindex_name_and_level_raising():
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
|
||||
with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."):
|
||||
mi.set_names(names={"x": "z"}, level={"x": "z"})
|
@ -0,0 +1,148 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
# c1
|
||||
# 2016-01-01 00:00:00 a 0
|
||||
# b 1
|
||||
# c 2
|
||||
# 2016-01-01 12:00:00 a 3
|
||||
# b 4
|
||||
# c 5
|
||||
# 2016-01-02 00:00:00 a 6
|
||||
# b 7
|
||||
# c 8
|
||||
# 2016-01-02 12:00:00 a 9
|
||||
# b 10
|
||||
# c 11
|
||||
# 2016-01-03 00:00:00 a 12
|
||||
# b 13
|
||||
# c 14
|
||||
dr = date_range("2016-01-01", "2016-01-03", freq="12h")
|
||||
abc = ["a", "b", "c"]
|
||||
mi = MultiIndex.from_product([dr, abc])
|
||||
frame = DataFrame({"c1": range(15)}, index=mi)
|
||||
return frame
|
||||
|
||||
|
||||
def test_partial_string_matching_single_index(df):
|
||||
# partial string matching on a single index
|
||||
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
|
||||
df_swap = df_swap.sort_index()
|
||||
just_a = df_swap.loc["a"]
|
||||
result = just_a.loc["2016-01-01"]
|
||||
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
|
||||
expected.index = expected.index.droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_loc_partial_timestamp_multiindex(df):
|
||||
mi = df.index
|
||||
key = ("2016-01-01", "a")
|
||||
loc = mi.get_loc(key)
|
||||
|
||||
expected = np.zeros(len(mi), dtype=bool)
|
||||
expected[[0, 3]] = True
|
||||
tm.assert_numpy_array_equal(loc, expected)
|
||||
|
||||
key2 = ("2016-01-02", "a")
|
||||
loc2 = mi.get_loc(key2)
|
||||
expected2 = np.zeros(len(mi), dtype=bool)
|
||||
expected2[[6, 9]] = True
|
||||
tm.assert_numpy_array_equal(loc2, expected2)
|
||||
|
||||
key3 = ("2016-01", "a")
|
||||
loc3 = mi.get_loc(key3)
|
||||
expected3 = np.zeros(len(mi), dtype=bool)
|
||||
expected3[mi.get_level_values(1).get_loc("a")] = True
|
||||
tm.assert_numpy_array_equal(loc3, expected3)
|
||||
|
||||
key4 = ("2016", "a")
|
||||
loc4 = mi.get_loc(key4)
|
||||
expected4 = expected3
|
||||
tm.assert_numpy_array_equal(loc4, expected4)
|
||||
|
||||
# non-monotonic
|
||||
taker = np.arange(len(mi), dtype=np.intp)
|
||||
taker[::2] = taker[::-2]
|
||||
mi2 = mi.take(taker)
|
||||
loc5 = mi2.get_loc(key)
|
||||
expected5 = np.zeros(len(mi2), dtype=bool)
|
||||
expected5[[3, 14]] = True
|
||||
tm.assert_numpy_array_equal(loc5, expected5)
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex(df):
|
||||
# GH10331
|
||||
df_swap = df.swaplevel(0, 1).sort_index()
|
||||
SLC = IndexSlice
|
||||
|
||||
# indexing with IndexSlice
|
||||
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# match on secondary index
|
||||
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
|
||||
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on year only
|
||||
result = df.loc["2016"]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date
|
||||
result = df.loc["2016-01-01"]
|
||||
expected = df.iloc[0:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date and hour, from middle
|
||||
result = df.loc["2016-01-02 12"]
|
||||
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
|
||||
# that level, so that level gets dropped
|
||||
expected = df.iloc[9:12].droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on secondary index
|
||||
result = df_swap.loc[SLC[:, "2016-01-02"], :]
|
||||
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# tuple selector with partial string match on date
|
||||
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
|
||||
result = df.loc[("2016-01-01", "a"), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
expected = df.iloc[[0, 3]].droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Slicing date on first level should break (of course) bc the DTI is the
|
||||
# second level on df_swap
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df_swap.loc["2016-01-01"]
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex_str_key_raises(df):
|
||||
# Even though this syntax works on a single index, this is somewhat
|
||||
# ambiguous and we don't want to extend this behavior forward to work
|
||||
# in multi-indexes. This would amount to selecting a scalar from a
|
||||
# column.
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df["2016-01-01"]
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex_daily_resolution(df):
|
||||
# GH12685 (partial string with daily resolution or below)
|
||||
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
|
||||
expected = df.iloc[118:180]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,10 @@
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
|
||||
|
||||
def test_pickle_compat_construction():
|
||||
# this is testing for pickle compat
|
||||
# need an object to create with
|
||||
with pytest.raises(TypeError, match="Must pass both levels and codes"):
|
||||
MultiIndex()
|
@ -0,0 +1,174 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_reindex(idx):
|
||||
result, indexer = idx.reindex(list(idx[:4]))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.names == ["first", "second"]
|
||||
assert [level.name for level in result.levels] == ["first", "second"]
|
||||
|
||||
result, indexer = idx.reindex(list(idx))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert indexer is None
|
||||
assert result.names == ["first", "second"]
|
||||
assert [level.name for level in result.levels] == ["first", "second"]
|
||||
|
||||
|
||||
def test_reindex_level(idx):
|
||||
index = Index(["one"])
|
||||
|
||||
target, indexer = idx.reindex(index, level="second")
|
||||
target2, indexer2 = index.reindex(idx, level="second")
|
||||
|
||||
exp_index = idx.join(index, level="second", how="right")
|
||||
exp_index2 = idx.join(index, level="second", how="left")
|
||||
|
||||
assert target.equals(exp_index)
|
||||
exp_indexer = np.array([0, 2, 4])
|
||||
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
||||
|
||||
assert target2.equals(exp_index2)
|
||||
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
||||
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
idx.reindex(idx, method="pad", level="second")
|
||||
|
||||
|
||||
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
|
||||
# GH6552
|
||||
idx = idx.copy()
|
||||
target = idx.copy()
|
||||
idx.names = target.names = [None, None]
|
||||
|
||||
other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
|
||||
# list & ndarray cases
|
||||
assert idx.reindex([])[0].names == [None, None]
|
||||
assert idx.reindex(np.array([]))[0].names == [None, None]
|
||||
assert idx.reindex(target.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(target.values)[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
||||
|
||||
idx.names = ["foo", "bar"]
|
||||
assert idx.reindex([])[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
|
||||
# GH7774
|
||||
idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
|
||||
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
|
||||
using_infer_string,
|
||||
):
|
||||
# GH7774
|
||||
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
||||
exp = np.object_ if not using_infer_string else str
|
||||
assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp
|
||||
|
||||
# case with EA levels
|
||||
cat = pd.Categorical(["foo", "bar"])
|
||||
dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
|
||||
mi = MultiIndex.from_product([cat, dti])
|
||||
assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
|
||||
assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
|
||||
|
||||
|
||||
def test_reindex_base(idx):
|
||||
expected = np.arange(idx.size, dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
|
||||
def test_reindex_non_unique():
|
||||
idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
|
||||
a = pd.Series(np.arange(4), index=idx)
|
||||
new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
msg = "cannot handle a non-unique multi-index!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.reindex(new_idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
|
||||
def test_reindex_empty_with_level(values):
|
||||
# GH41170
|
||||
idx = MultiIndex.from_arrays(values)
|
||||
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
|
||||
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
|
||||
expected_indexer = np.array([], dtype=result_indexer.dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
|
||||
|
||||
def test_reindex_not_all_tuples():
|
||||
keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
|
||||
mi = MultiIndex.from_tuples(keys[:-1])
|
||||
idx = Index(keys)
|
||||
res, indexer = mi.reindex(idx)
|
||||
|
||||
tm.assert_index_equal(res, idx)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, expected)
|
||||
|
||||
|
||||
def test_reindex_limit_arg_with_multiindex():
|
||||
# GH21247
|
||||
|
||||
idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
|
||||
|
||||
df = pd.Series([0.02, 0.01, 0.012], index=idx)
|
||||
|
||||
new_idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(3, "A"),
|
||||
(3, "B"),
|
||||
(4, "A"),
|
||||
(4, "B"),
|
||||
(4, "C"),
|
||||
(5, "B"),
|
||||
(5, "C"),
|
||||
(6, "B"),
|
||||
(6, "C"),
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="limit argument only valid if doing pad, backfill or nearest reindexing",
|
||||
):
|
||||
df.reindex(new_idx, fill_value=0, limit=1)
|
||||
|
||||
|
||||
def test_reindex_with_none_in_nested_multiindex():
|
||||
# GH42883
|
||||
index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)])
|
||||
index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)])
|
||||
df1_dtype = pd.DataFrame([1, 2], index=index)
|
||||
df2_dtype = pd.DataFrame([2, 1], index=index2)
|
||||
|
||||
result = df1_dtype.reindex_like(df2_dtype)
|
||||
expected = df2_dtype
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,224 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_insert(idx):
|
||||
# key contained in all levels
|
||||
new_index = idx.insert(0, ("bar", "two"))
|
||||
assert new_index.equal_levels(idx)
|
||||
assert new_index[0] == ("bar", "two")
|
||||
|
||||
# key not contained in all levels
|
||||
new_index = idx.insert(0, ("abc", "three"))
|
||||
|
||||
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
|
||||
tm.assert_index_equal(new_index.levels[0], exp0)
|
||||
assert new_index.names == ["first", "second"]
|
||||
|
||||
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
|
||||
tm.assert_index_equal(new_index.levels[1], exp1)
|
||||
assert new_index[0] == ("abc", "three")
|
||||
|
||||
# key wrong length
|
||||
msg = "Item must have length equal to number of levels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.insert(0, ("foo2",))
|
||||
|
||||
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
|
||||
left.set_index(["1st", "2nd"], inplace=True)
|
||||
ts = left["3rd"].copy(deep=True)
|
||||
|
||||
left.loc[("b", "x"), "3rd"] = 2
|
||||
left.loc[("b", "a"), "3rd"] = -1
|
||||
left.loc[("b", "b"), "3rd"] = 3
|
||||
left.loc[("a", "x"), "3rd"] = 4
|
||||
left.loc[("a", "w"), "3rd"] = 5
|
||||
left.loc[("a", "a"), "3rd"] = 6
|
||||
|
||||
ts.loc[("b", "x")] = 2
|
||||
ts.loc["b", "a"] = -1
|
||||
ts.loc[("b", "b")] = 3
|
||||
ts.loc["a", "x"] = 4
|
||||
ts.loc[("a", "w")] = 5
|
||||
ts.loc["a", "a"] = 6
|
||||
|
||||
right = pd.DataFrame(
|
||||
[
|
||||
["a", "b", 0],
|
||||
["b", "d", 1],
|
||||
["b", "x", 2],
|
||||
["b", "a", -1],
|
||||
["b", "b", 3],
|
||||
["a", "x", 4],
|
||||
["a", "w", 5],
|
||||
["a", "a", 6],
|
||||
],
|
||||
columns=["1st", "2nd", "3rd"],
|
||||
)
|
||||
right.set_index(["1st", "2nd"], inplace=True)
|
||||
# FIXME data types changes to float because
|
||||
# of intermediate nan insertion;
|
||||
tm.assert_frame_equal(left, right, check_dtype=False)
|
||||
tm.assert_series_equal(ts, right["3rd"])
|
||||
|
||||
|
||||
def test_insert2():
|
||||
# GH9250
|
||||
idx = (
|
||||
[("test1", i) for i in range(5)]
|
||||
+ [("test2", i) for i in range(6)]
|
||||
+ [("test", 17), ("test", 18)]
|
||||
)
|
||||
|
||||
left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
|
||||
|
||||
left.loc[("test", 17)] = 11
|
||||
left.loc[("test", 18)] = 12
|
||||
|
||||
right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
|
||||
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
|
||||
def test_append(idx):
|
||||
result = idx[:3].append(idx[3:])
|
||||
assert result.equals(idx)
|
||||
|
||||
foos = [idx[:1], idx[1:3], idx[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
assert result.equals(idx)
|
||||
|
||||
# empty
|
||||
result = idx.append([])
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_append_index():
|
||||
idx1 = Index([1.1, 1.2, 1.3])
|
||||
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
|
||||
idx3 = Index(["A", "B", "C"])
|
||||
|
||||
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
|
||||
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
|
||||
|
||||
result = idx1.append(midx_lv2)
|
||||
|
||||
# see gh-7112
|
||||
tz = pytz.timezone("Asia/Tokyo")
|
||||
expected_tuples = [
|
||||
(1.1, tz.localize(datetime(2011, 1, 1))),
|
||||
(1.2, tz.localize(datetime(2011, 1, 2))),
|
||||
(1.3, tz.localize(datetime(2011, 1, 3))),
|
||||
]
|
||||
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(idx1)
|
||||
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(midx_lv2)
|
||||
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(midx_lv3)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv3.append(midx_lv2)
|
||||
expected = Index._simple_new(
|
||||
np.array(
|
||||
[
|
||||
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
|
||||
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
|
||||
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
|
||||
]
|
||||
+ expected_tuples,
|
||||
dtype=object,
|
||||
),
|
||||
None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)])
|
||||
def test_append_names_match(name, exp):
|
||||
# GH#48288
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name])
|
||||
result = midx.append(midx2)
|
||||
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_names_dont_match():
|
||||
# GH#48288
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"])
|
||||
result = midx.append(midx2)
|
||||
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_overlapping_interval_levels():
|
||||
# GH 54934
|
||||
ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0])
|
||||
ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5])
|
||||
mi1 = MultiIndex.from_product([ivl1, ivl1])
|
||||
mi2 = MultiIndex.from_product([ivl2, ivl2])
|
||||
result = mi1.append(mi2)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
(pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)),
|
||||
(pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)),
|
||||
(pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)),
|
||||
(pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)),
|
||||
(pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)),
|
||||
(pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)),
|
||||
(pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)),
|
||||
(pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(m.repeat(reps), expected)
|
||||
|
||||
|
||||
def test_insert_base(idx):
|
||||
result = idx[1:4]
|
||||
|
||||
# test 0th element
|
||||
assert idx[0:4].equals(result.insert(0, idx[0]))
|
||||
|
||||
|
||||
def test_delete_base(idx):
|
||||
expected = idx[1:]
|
||||
result = idx.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = idx[:-1]
|
||||
result = idx.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
msg = "index 6 is out of bounds for axis 0 with size 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.delete(len(idx))
|
@ -0,0 +1,772 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import (
|
||||
is_float_dtype,
|
||||
is_unsigned_integer_dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_ops_error_cases(idx, case, sort, method):
|
||||
# non-iterable input
|
||||
msg = "Input must be Index or array-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)(case, sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
||||
def test_intersection_base(idx, sort, klass):
|
||||
first = idx[2::-1] # first 3 elements reversed
|
||||
second = idx[:5]
|
||||
|
||||
if klass is not MultiIndex:
|
||||
second = klass(second.values)
|
||||
|
||||
intersect = first.intersection(second, sort=sort)
|
||||
if sort is None:
|
||||
expected = first.sort_values()
|
||||
else:
|
||||
expected = first
|
||||
tm.assert_index_equal(intersect, expected)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.intersection([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
||||
def test_union_base(idx, sort, klass):
|
||||
first = idx[::-1]
|
||||
second = idx[:5]
|
||||
|
||||
if klass is not MultiIndex:
|
||||
second = klass(second.values)
|
||||
|
||||
union = first.union(second, sort=sort)
|
||||
if sort is None:
|
||||
expected = first.sort_values()
|
||||
else:
|
||||
expected = first
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.union([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_difference_base(idx, sort):
|
||||
second = idx[4:]
|
||||
answer = idx[:4]
|
||||
result = idx.difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
assert result.equals(answer)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = idx.difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_symmetric_difference(idx, sort):
|
||||
first = idx[1:]
|
||||
second = idx[:-1]
|
||||
answer = idx[[-1, 0]]
|
||||
result = first.symmetric_difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.symmetric_difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.symmetric_difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_multiindex_symmetric_difference():
|
||||
# GH 13490
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
|
||||
result = idx.symmetric_difference(idx)
|
||||
assert result.names == idx.names
|
||||
|
||||
idx2 = idx.copy().rename(["A", "B"])
|
||||
result = idx.symmetric_difference(idx2)
|
||||
assert result.names == [None, None]
|
||||
|
||||
|
||||
def test_empty(idx):
|
||||
# GH 15270
|
||||
assert not idx.empty
|
||||
assert idx[:0].empty
|
||||
|
||||
|
||||
def test_difference(idx, sort):
|
||||
first = idx
|
||||
result = first.difference(idx[-3:], sort=sort)
|
||||
vals = idx[:-3].values
|
||||
|
||||
if sort is None:
|
||||
vals = sorted(vals)
|
||||
|
||||
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
|
||||
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty difference: reflexive
|
||||
result = idx.difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: superset
|
||||
result = idx[-3:].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: degenerate
|
||||
result = idx[:0].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# names not the same
|
||||
chunklet = idx[-3:]
|
||||
chunklet.names = ["foo", "baz"]
|
||||
result = first.difference(chunklet, sort=sort)
|
||||
assert result.names == (None, None)
|
||||
|
||||
# empty, but non-equal
|
||||
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
|
||||
assert len(result) == 0
|
||||
|
||||
# raise Exception called with non-MultiIndex
|
||||
result = first.difference(first.values, sort=sort)
|
||||
assert result.equals(first[:0])
|
||||
|
||||
# name from empty array
|
||||
result = first.difference([], sort=sort)
|
||||
assert first.equals(result)
|
||||
assert first.names == result.names
|
||||
|
||||
# name from non-empty array
|
||||
result = first.difference([("foo", "one")], sort=sort)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
|
||||
)
|
||||
expected.names = first.names
|
||||
assert first.names == result.names
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.difference([1, 2, 3, 4, 5], sort=sort)
|
||||
|
||||
|
||||
def test_difference_sort_special():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
# sort=None, the default
|
||||
result = idx.difference([])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_difference_sort_special_true():
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
result = idx.difference([], sort=True)
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
|
||||
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
# sort=None, the default
|
||||
msg = "sort order is undefined for incomparable objects"
|
||||
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
||||
result = idx.difference(other)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.difference(other, sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable_true():
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
|
||||
# TODO: this is raising in constructing a Categorical when calling
|
||||
# algos.safe_sort. Should we catch and re-raise with a better message?
|
||||
msg = "'values' is not ordered, please explicitly specify the categories order "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference(other, sort=True)
|
||||
|
||||
|
||||
def test_union(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_union = piece1.union(piece2, sort=sort)
|
||||
|
||||
if sort in (None, False):
|
||||
tm.assert_index_equal(the_union.sort_values(), idx.sort_values())
|
||||
else:
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
# corner case, pass self or empty thing:
|
||||
the_union = idx.union(idx, sort=sort)
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
the_union = idx.union(idx[:0], sort=sort)
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
tuples = idx.values
|
||||
result = idx[:4].union(tuples[4:], sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result.sort_values(), idx.sort_values())
|
||||
else:
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_union_with_regular_index(idx, using_infer_string):
|
||||
other = Index(["A", "B", "C"])
|
||||
|
||||
result = other.union(idx)
|
||||
assert ("foo", "one") in result
|
||||
assert "B" in result
|
||||
|
||||
if using_infer_string:
|
||||
with pytest.raises(NotImplementedError, match="Can only union"):
|
||||
idx.union(other)
|
||||
else:
|
||||
msg = "The values in the array are unorderable"
|
||||
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
||||
result2 = idx.union(other)
|
||||
# This is more consistent now, if sorting fails then we don't sort at all
|
||||
# in the MultiIndex case.
|
||||
assert not result.equals(result2)
|
||||
|
||||
|
||||
def test_intersection(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_int = piece1.intersection(piece2, sort=sort)
|
||||
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(the_int, idx[3:5])
|
||||
else:
|
||||
tm.assert_index_equal(the_int.sort_values(), idx[3:5])
|
||||
|
||||
# corner case, pass self
|
||||
the_int = idx.intersection(idx, sort=sort)
|
||||
tm.assert_index_equal(the_int, idx)
|
||||
|
||||
# empty intersection: disjoint
|
||||
empty = idx[:2].intersection(idx[2:], sort=sort)
|
||||
expected = idx[:0]
|
||||
assert empty.equals(expected)
|
||||
|
||||
tuples = idx.values
|
||||
result = idx.intersection(tuples)
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setop_with_categorical(idx, sort, method):
|
||||
other = idx.to_flat_index().astype("category")
|
||||
res_names = [None] * idx.nlevels
|
||||
|
||||
result = getattr(idx, method)(other, sort=sort)
|
||||
expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = getattr(idx, method)(other[:5], sort=sort)
|
||||
expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_intersection_non_object(idx, sort):
|
||||
other = Index(range(3), name="foo")
|
||||
|
||||
result = idx.intersection(other, sort=sort)
|
||||
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
|
||||
result = idx.intersection(np.asarray(other)[:0], sort=sort)
|
||||
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# With non-zero length non-index, we try and fail to convert to tuples
|
||||
idx.intersection(np.asarray(other), sort=sort)
|
||||
|
||||
|
||||
def test_intersect_equal_sort():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
|
||||
def test_intersect_equal_sort_true():
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
result = idx.intersection(idx, sort=True)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
|
||||
def test_union_sort_other_empty(slice_):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
other = idx[slice_]
|
||||
tm.assert_index_equal(idx.union(other), idx)
|
||||
tm.assert_index_equal(other.union(idx), idx)
|
||||
|
||||
# sort=False
|
||||
tm.assert_index_equal(idx.union(other, sort=False), idx)
|
||||
|
||||
|
||||
def test_union_sort_other_empty_sort():
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
other = idx[:0]
|
||||
result = idx.union(other, sort=True)
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_sort_other_incomparable():
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_union_sort_other_incomparable_sort():
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
msg = "'<' not supported between instances of 'Timestamp' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
|
||||
def test_union_non_object_dtype_raises():
|
||||
# GH#32646 raise NotImplementedError instead of less-informative error
|
||||
mi = MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
|
||||
idx = mi.levels[1]
|
||||
|
||||
msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
mi.union(idx)
|
||||
|
||||
|
||||
def test_union_empty_self_different_names():
|
||||
# GH#38423
|
||||
mi = MultiIndex.from_arrays([[]])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
result = mi.union(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_multiindex_empty_rangeindex():
|
||||
# GH#41234
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
ri = pd.RangeIndex(0)
|
||||
|
||||
result_left = mi.union(ri)
|
||||
tm.assert_index_equal(mi, result_left, check_names=False)
|
||||
|
||||
result_right = ri.union(mi)
|
||||
tm.assert_index_equal(mi, result_right, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_sort_validation(method):
|
||||
idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=2)
|
||||
|
||||
# sort=True is supported as of GH#?
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [pd.NA, 100])
|
||||
def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
|
||||
# GH#48606
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
||||
)
|
||||
result = midx.difference(midx2)
|
||||
expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx.difference(midx.sort_values(ascending=False))
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)],
|
||||
names=["a", None],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [pd.NA, 5])
|
||||
def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
|
||||
# GH#48607
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
||||
)
|
||||
result = midx.symmetric_difference(midx2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("tuples", "exp_tuples"),
|
||||
[
|
||||
([("val1", "test1")], [("val1", "test1")]),
|
||||
([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
|
||||
(
|
||||
[("val2", "test2"), ("val1", "test1")],
|
||||
[("val2", "test2"), ("val1", "test1")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_intersect_with_duplicates(tuples, exp_tuples):
|
||||
# GH#36915
|
||||
left = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
right = MultiIndex.from_tuples(
|
||||
[("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = left.intersection(right)
|
||||
expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, names, expected",
|
||||
[
|
||||
((1,), None, [None, None]),
|
||||
((1,), ["a"], [None, None]),
|
||||
((1,), ["b"], [None, None]),
|
||||
((1, 2), ["c", "d"], [None, None]),
|
||||
((1, 2), ["b", "a"], [None, None]),
|
||||
((1, 2, 3), ["a", "b", "c"], [None, None]),
|
||||
((1, 2), ["a", "c"], ["a", None]),
|
||||
((1, 2), ["c", "b"], [None, "b"]),
|
||||
((1, 2), ["a", "b"], ["a", "b"]),
|
||||
((1, 2), [None, "b"], [None, "b"]),
|
||||
],
|
||||
)
|
||||
def test_maybe_match_names(data, names, expected):
|
||||
# GH#38323
|
||||
mi = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
mi2 = MultiIndex.from_tuples([data], names=names)
|
||||
result = mi._maybe_match_names(mi2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_intersection_equal_different_names():
|
||||
# GH#30302
|
||||
mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
|
||||
result = mi1.intersection(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_intersection_different_names():
|
||||
# GH#38323
|
||||
mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
|
||||
mi2 = MultiIndex.from_arrays([[1], [3]])
|
||||
result = mi.intersection(mi2)
|
||||
tm.assert_index_equal(result, mi2)
|
||||
|
||||
|
||||
def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
|
||||
# GH#38623
|
||||
mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
|
||||
mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
|
||||
result = mi1.intersection(mi2)
|
||||
expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_with_missing_values_on_both_sides(nulls_fixture):
|
||||
# GH#38623
|
||||
mi1 = MultiIndex.from_arrays([[1, nulls_fixture]])
|
||||
mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]])
|
||||
result = mi1.union(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_nan_got_duplicated(dtype, sort):
|
||||
# GH#38977, GH#49010
|
||||
mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]])
|
||||
mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]])
|
||||
result = mi1.union(mi2, sort=sort)
|
||||
if sort is None:
|
||||
expected = MultiIndex.from_arrays(
|
||||
[pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]]
|
||||
)
|
||||
else:
|
||||
expected = mi2
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [4, 1])
|
||||
def test_union_keep_ea_dtype(any_numeric_ea_dtype, val):
|
||||
# GH#48505
|
||||
|
||||
arr1 = Series([val, 2], dtype=any_numeric_ea_dtype)
|
||||
arr2 = Series([2, 1], dtype=any_numeric_ea_dtype)
|
||||
midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None])
|
||||
midx2 = MultiIndex.from_arrays([arr2, [2, 1]])
|
||||
result = midx.union(midx2)
|
||||
if val == 4:
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]]
|
||||
)
|
||||
else:
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dupe_val", [3, pd.NA])
|
||||
def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype):
|
||||
# GH48900
|
||||
mi1 = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
|
||||
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
|
||||
]
|
||||
)
|
||||
mi2 = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
]
|
||||
)
|
||||
result = mi1.union(mi2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_union_duplicates(index, request):
|
||||
# GH#38977
|
||||
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
|
||||
pytest.skip(f"No duplicates in an empty {type(index).__name__}")
|
||||
|
||||
values = index.unique().values.tolist()
|
||||
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
|
||||
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
|
||||
result = mi2.union(mi1)
|
||||
expected = mi2.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if (
|
||||
is_unsigned_integer_dtype(mi2.levels[0])
|
||||
and (mi2.get_level_values(0) < 2**63).all()
|
||||
):
|
||||
# GH#47294 - union uses lib.fast_zip, converting data to Python integers
|
||||
# and loses type information. Result is then unsigned only when values are
|
||||
# sufficiently large to require unsigned dtype. This happens only if other
|
||||
# has dups or one of both have missing values
|
||||
expected = expected.set_levels(
|
||||
[expected.levels[0].astype(np.int64), expected.levels[1]]
|
||||
)
|
||||
elif is_float_dtype(mi2.levels[0]):
|
||||
# mi2 has duplicates witch is a different path than above, Fix that path
|
||||
# to use correct float dtype?
|
||||
expected = expected.set_levels(
|
||||
[expected.levels[0].astype(float), expected.levels[1]]
|
||||
)
|
||||
|
||||
result = mi1.union(mi2)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_keep_dtype_precision(any_real_numeric_dtype):
|
||||
# GH#48498
|
||||
arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype)
|
||||
arr2 = Series([1, 4], dtype=any_real_numeric_dtype)
|
||||
midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None])
|
||||
midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None])
|
||||
|
||||
result = midx.union(midx2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]),
|
||||
names=["a", None],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype):
|
||||
# GH#48498
|
||||
arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype)
|
||||
arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
|
||||
midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None])
|
||||
midx2 = MultiIndex.from_arrays([arr2, [1, 2]])
|
||||
result = midx.union(midx2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"levels1, levels2, codes1, codes2, names",
|
||||
[
|
||||
(
|
||||
[["a", "b", "c"], [0, ""]],
|
||||
[["c", "d", "b"], [""]],
|
||||
[[0, 1, 2], [1, 1, 1]],
|
||||
[[0, 1, 2], [0, 0, 0]],
|
||||
["name1", "name2"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
|
||||
# GH#25169
|
||||
mi1 = MultiIndex(levels=levels1, codes=codes1, names=names)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes2, names=names)
|
||||
mi_int = mi1.intersection(mi2)
|
||||
assert mi_int._lexsort_depth == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a",
|
||||
[pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"b",
|
||||
[
|
||||
pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True),
|
||||
pd.Categorical(["a", "b"], categories=["b", "a"]),
|
||||
],
|
||||
)
|
||||
def test_intersection_with_non_lex_sorted_categories(a, b):
|
||||
# GH#49974
|
||||
other = ["1", "2"]
|
||||
|
||||
df1 = DataFrame({"x": a, "y": other})
|
||||
df2 = DataFrame({"x": b, "y": other})
|
||||
|
||||
expected = MultiIndex.from_arrays([a, other], names=["x", "y"])
|
||||
|
||||
res1 = MultiIndex.from_frame(df1).intersection(
|
||||
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
|
||||
)
|
||||
res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2))
|
||||
res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
|
||||
MultiIndex.from_frame(df2)
|
||||
)
|
||||
res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
|
||||
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
|
||||
)
|
||||
|
||||
tm.assert_index_equal(res1, expected)
|
||||
tm.assert_index_equal(res2, expected)
|
||||
tm.assert_index_equal(res3, expected)
|
||||
tm.assert_index_equal(res4, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [pd.NA, 100])
|
||||
def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
|
||||
# GH#48604
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
||||
)
|
||||
result = midx.intersection(midx2)
|
||||
expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_with_na_when_constructing_dataframe():
|
||||
# GH43222
|
||||
series1 = Series(
|
||||
(1,),
|
||||
index=MultiIndex.from_arrays(
|
||||
[Series([None], dtype="string"), Series([None], dtype="string")]
|
||||
),
|
||||
)
|
||||
series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
|
||||
result = DataFrame([series1, series2])
|
||||
expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,349 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
PerformanceWarning,
|
||||
UnsortedIndexError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.frozen import FrozenList
|
||||
|
||||
|
||||
def test_sortlevel(idx):
|
||||
tuples = list(idx)
|
||||
np.random.default_rng(2).shuffle(tuples)
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_not_sort_remaining():
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
|
||||
assert sorted_idx.equals(mi)
|
||||
|
||||
|
||||
def test_sortlevel_deterministic():
|
||||
tuples = [
|
||||
("bar", "one"),
|
||||
("foo", "two"),
|
||||
("qux", "two"),
|
||||
("foo", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_na_position():
|
||||
# GH#51612
|
||||
midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)])
|
||||
result = midx.sortlevel(level=[0, 1], na_position="last")[0]
|
||||
expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_argsort(idx):
|
||||
result = np.argsort(idx)
|
||||
expected = idx.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, axis=1)
|
||||
|
||||
msg = "the 'kind' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, kind="mergesort")
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, order=("a", "b"))
|
||||
|
||||
|
||||
def test_unsortedindex():
|
||||
# GH 11897
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc(axis=0)["z", "a"]
|
||||
expected = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc(axis=0)["z", slice("a")]
|
||||
df.sort_index(inplace=True)
|
||||
assert len(df.loc(axis=0)["z", :]) == 2
|
||||
|
||||
with pytest.raises(KeyError, match="'q'"):
|
||||
df.loc(axis=0)["q", :]
|
||||
|
||||
|
||||
def test_unsortedindex_doc_examples():
|
||||
# https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
|
||||
dfm = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.default_rng(2).random(4),
|
||||
}
|
||||
)
|
||||
|
||||
dfm = dfm.set_index(["jim", "joe"])
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
dfm.loc[(1, "z")]
|
||||
|
||||
msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert not dfm.index._is_lexsorted()
|
||||
assert dfm.index._lexsort_depth == 1
|
||||
|
||||
# sort it
|
||||
dfm = dfm.sort_index()
|
||||
dfm.loc[(1, "z")]
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert dfm.index._is_lexsorted()
|
||||
assert dfm.index._lexsort_depth == 2
|
||||
|
||||
|
||||
def test_reconstruct_sort():
|
||||
# starts off lexsorted & monotonic
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert mi.is_monotonic_increasing
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert recons.is_monotonic_increasing
|
||||
assert mi is recons
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
assert not mi.is_monotonic_increasing
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_monotonic_increasing
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex(
|
||||
levels=[["b", "d", "a"], [1, 2, 3]],
|
||||
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
||||
names=["col1", "col2"],
|
||||
)
|
||||
assert not mi.is_monotonic_increasing
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_monotonic_increasing
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
|
||||
def test_reconstruct_remove_unused():
|
||||
# xref to GH 2770
|
||||
df = DataFrame(
|
||||
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
|
||||
columns=["first", "second", "third"],
|
||||
)
|
||||
df2 = df.set_index(["first", "second"], drop=False)
|
||||
df2 = df2[df2["first"] != "deleteMe"]
|
||||
|
||||
# removed levels are there
|
||||
expected = MultiIndex(
|
||||
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
|
||||
codes=[[1, 2], [1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[["keepMe", "keepMeToo"], [2, 3]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index.remove_unused_levels()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# idempotent
|
||||
result2 = result.remove_unused_levels()
|
||||
tm.assert_index_equal(result2, expected)
|
||||
assert result2.is_(result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
|
||||
)
|
||||
def test_remove_unused_levels_large(first_type, second_type):
|
||||
# GH16556
|
||||
|
||||
# because tests should be deterministic (and this test in particular
|
||||
# checks that levels are removed, which is not the case for every
|
||||
# random input):
|
||||
rng = np.random.default_rng(10) # seed is arbitrary value that works
|
||||
|
||||
size = 1 << 16
|
||||
df = DataFrame(
|
||||
{
|
||||
"first": rng.integers(0, 1 << 13, size).astype(first_type),
|
||||
"second": rng.integers(0, 1 << 10, size).astype(second_type),
|
||||
"third": rng.random(size),
|
||||
}
|
||||
)
|
||||
df = df.groupby(["first", "second"]).sum()
|
||||
df = df[df.third < 0.1]
|
||||
|
||||
result = df.index.remove_unused_levels()
|
||||
assert len(result.levels[0]) < len(df.index.levels[0])
|
||||
assert len(result.levels[1]) < len(df.index.levels[1])
|
||||
assert result.equals(df.index)
|
||||
|
||||
expected = df.reset_index().set_index(["first", "second"]).index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
|
||||
@pytest.mark.parametrize(
|
||||
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
|
||||
)
|
||||
def test_remove_unused_nan(level0, level1):
|
||||
# GH 18417
|
||||
mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
|
||||
|
||||
result = mi.remove_unused_levels()
|
||||
tm.assert_index_equal(result, mi)
|
||||
for level in 0, 1:
|
||||
assert "unused" not in result.levels[level]
|
||||
|
||||
|
||||
def test_argsort(idx):
|
||||
result = idx.argsort()
|
||||
expected = idx.values.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_remove_unused_levels_with_nan():
|
||||
# GH 37510
|
||||
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
|
||||
idx = idx.set_levels(["a", np.nan], level="id1")
|
||||
idx = idx.remove_unused_levels()
|
||||
result = idx.levels
|
||||
expected = FrozenList([["a", np.nan], [4]])
|
||||
assert str(result) == str(expected)
|
||||
|
||||
|
||||
def test_sort_values_nan():
|
||||
# GH48495, GH48626
|
||||
midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]])
|
||||
result = midx.sort_values()
|
||||
expected = MultiIndex(
|
||||
levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_sort_values_incomparable():
|
||||
# GH48495
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, Timestamp("2000-01-01")],
|
||||
[3, 4],
|
||||
]
|
||||
)
|
||||
match = "'<' not supported between instances of 'Timestamp' and 'int'"
|
||||
with pytest.raises(TypeError, match=match):
|
||||
mi.sort_values()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_position", ["first", "last"])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"])
|
||||
def test_sort_values_with_na_na_position(dtype, na_position):
|
||||
# 51612
|
||||
arrays = [
|
||||
Series([1, 1, 2], dtype=dtype),
|
||||
Series([1, None, 3], dtype=dtype),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.sort_values(na_position=na_position)
|
||||
if na_position == "first":
|
||||
arrays = [
|
||||
Series([1, 1, 2], dtype=dtype),
|
||||
Series([None, 1, 3], dtype=dtype),
|
||||
]
|
||||
else:
|
||||
arrays = [
|
||||
Series([1, 1, 2], dtype=dtype),
|
||||
Series([1, None, 3], dtype=dtype),
|
||||
]
|
||||
expected = MultiIndex.from_arrays(arrays)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_sort_unnecessary_warning():
|
||||
# GH#55386
|
||||
midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)])
|
||||
midx = midx.set_levels([2.5, np.nan, 1], level=0)
|
||||
result = midx.sort_values()
|
||||
expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take(idx):
|
||||
indexer = [4, 3, 0, 2]
|
||||
result = idx.take(indexer)
|
||||
expected = idx[indexer]
|
||||
assert result.equals(expected)
|
||||
|
||||
# GH 10791
|
||||
msg = "'MultiIndex' object has no attribute 'freq'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.freq
|
||||
|
||||
|
||||
def test_take_invalid_kwargs(idx):
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_take_fill_value():
|
||||
# GH 12631
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
(np.nan, pd.NaT),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for( axis 0 with)? size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
Reference in New Issue
Block a user