This commit is contained in:
2024-12-04 13:35:57 +05:00
parent d346bf4b2a
commit 73ce681a55
7059 changed files with 1196501 additions and 0 deletions

View File

@ -0,0 +1,80 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestIndexConstructor:
# Tests for the Index constructor, specifically for cases that do
# not return a subclass
@pytest.mark.parametrize("value", [1, np.int64(1)])
def test_constructor_corner(self, value):
# corner case
msg = (
r"Index\(\.\.\.\) must be called with a collection of some "
f"kind, {value} was passed"
)
with pytest.raises(TypeError, match=msg):
Index(value)
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
def test_construction_list_mixed_tuples(self, index_vals):
# see gh-10697: if we are constructing from a mixed list of tuples,
# make sure that we are independent of the sorting order.
index = Index(index_vals)
assert isinstance(index, Index)
assert not isinstance(index, MultiIndex)
def test_constructor_cast(self):
msg = "could not convert string to float"
with pytest.raises(ValueError, match=msg):
Index(["a", "b", "c"], dtype=float)
@pytest.mark.parametrize("tuple_list", [[()], [(), ()]])
def test_construct_empty_tuples(self, tuple_list):
# GH #45608
result = Index(tuple_list)
expected = MultiIndex.from_tuples(tuple_list)
tm.assert_index_equal(result, expected)
def test_index_string_inference(self):
# GH#54430
pytest.importorskip("pyarrow")
dtype = "string[pyarrow_numpy]"
expected = Index(["a", "b"], dtype=dtype)
with pd.option_context("future.infer_string", True):
ser = Index(["a", "b"])
tm.assert_index_equal(ser, expected)
expected = Index(["a", 1], dtype="object")
with pd.option_context("future.infer_string", True):
ser = Index(["a", 1])
tm.assert_index_equal(ser, expected)
def test_inference_on_pandas_objects(self):
# GH#56012
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
result = Index(idx)
assert result.dtype != np.object_
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
result = Index(ser)
assert result.dtype != np.object_
def test_constructor_not_read_only(self):
# GH#57130
ser = Series([1, 2], dtype=object)
with pd.option_context("mode.copy_on_write", True):
idx = Index(ser)
assert idx._values.flags.writeable

View File

@ -0,0 +1,163 @@
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas._config.config as cf
from pandas import Index
import pandas._testing as tm
class TestIndexRendering:
def test_repr_is_valid_construction_code(self):
# for the case of Index, where the repr is traditional rather than
# stylized
idx = Index(["a", "b"])
res = eval(repr(idx))
tm.assert_index_equal(res, idx)
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
# ASCII
# short
(
Index(["a", "bb", "ccc"]),
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
),
# multiple lines
(
Index(["a", "bb", "ccc"] * 10),
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
" dtype='object')",
),
# truncated
(
Index(["a", "bb", "ccc"] * 100),
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
" ...\n"
" 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
" dtype='object', length=300)",
),
# Non-ASCII
# short
(
Index(["", "いい", "ううう"]),
"""Index(['', 'いい', 'ううう'], dtype='object')""",
),
# multiple lines
(
Index(["", "いい", "ううう"] * 10),
(
"Index(['', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう'],\n"
" dtype='object')"
),
),
# truncated
(
Index(["", "いい", "ううう"] * 100),
(
"Index(['', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '',\n"
" ...\n"
" 'ううう', '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう'],\n"
" dtype='object', length=300)"
),
),
],
)
def test_string_index_repr(self, index, expected):
result = repr(index)
assert result == expected
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
# short
(
Index(["", "いい", "ううう"]),
("Index(['', 'いい', 'ううう'], dtype='object')"),
),
# multiple lines
(
Index(["", "いい", "ううう"] * 10),
(
"Index(['', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう'],\n"
" dtype='object')"
""
),
),
# truncated
(
Index(["", "いい", "ううう"] * 100),
(
"Index(['', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '',\n"
" ...\n"
" 'ううう', '', 'いい', 'ううう', '', "
"'いい', 'ううう', '', 'いい',\n"
" 'ううう'],\n"
" dtype='object', length=300)"
),
),
],
)
def test_string_index_repr_with_unicode_option(self, index, expected):
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
result = repr(index)
assert result == expected
def test_repr_summary(self):
with cf.option_context("display.max_seq_items", 10):
result = repr(Index(np.arange(1000)))
assert len(result) < 200
assert "..." in result
def test_summary_bug(self):
# GH#3869
ind = Index(["{other}%s", "~:{range}:0"], name="A")
result = ind._summary()
# shouldn't be formatted accidentally.
assert "~:{range}:0" in result
assert "{other}%s" in result
def test_index_repr_bool_nan(self):
# GH32146
arr = Index([True, False, np.nan], dtype=object)
msg = "Index.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
exp1 = arr.format()
out1 = ["True", "False", "NaN"]
assert out1 == exp1
exp2 = repr(arr)
out2 = "Index([True, False, nan], dtype='object')"
assert out2 == exp2
def test_format_different_scalar_lengths(self):
# GH#35439
idx = Index(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
msg = r"Index\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected

View File

@ -0,0 +1,104 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
import pandas as pd
from pandas import (
Index,
NaT,
)
import pandas._testing as tm
class TestGetSliceBounds:
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
def test_get_slice_bounds_within(self, side, expected):
index = Index(list("abcdef"))
result = index.get_slice_bound("e", side=side)
assert result == expected
@pytest.mark.parametrize("side", ["left", "right"])
@pytest.mark.parametrize(
"data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
)
def test_get_slice_bounds_outside(self, side, expected, data, bound):
index = Index(data)
result = index.get_slice_bound(bound, side=side)
assert result == expected
def test_get_slice_bounds_invalid_side(self):
with pytest.raises(ValueError, match="Invalid value for side kwarg"):
Index([]).get_slice_bound("a", side="middle")
class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_dtype_mismatch(self):
# GH#25459
indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
@pytest.mark.parametrize(
"idx_values,idx_non_unique",
[
([np.nan, 100, 200, 100], [np.nan, 100]),
([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]),
],
)
def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique):
indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan]))
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
indexes, missing = Index(idx_values).get_indexer_non_unique(
Index(idx_non_unique)
)
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
class TestGetLoc:
@pytest.mark.slow # to_flat_index takes a while
def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
# Go through the libindex path for which using
# _bin_search vs ndarray.searchsorted makes a difference
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
lev = list("ABCD")
dti = pd.date_range("2016-01-01", periods=10)
mi = pd.MultiIndex.from_product([lev, range(5), dti])
oidx = mi.to_flat_index()
loc = len(oidx) // 2
tup = oidx[loc]
res = oidx.get_loc(tup)
assert res == loc
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
# case that goes through _maybe_get_bool_indexer
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
# we dont raise KeyError on nan
res = idx.get_loc(np.nan)
assert res == 1
# we only match on None, not on np.nan
res = idx.get_loc(None)
expected = np.array([False, False, True, False, False, True])
tm.assert_numpy_array_equal(res, expected)
# we don't match at all on mismatched NA
with pytest.raises(KeyError, match="NaT"):
idx.get_loc(NaT)
def test_getitem_boolean_ea_indexer():
# GH#45806
ser = pd.Series([True, False, pd.NA], dtype="boolean")
result = ser.index[ser]
expected = Index([0])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,11 @@
from pandas import Index
import pandas._testing as tm
def test_pickle_preserves_object_dtype():
# GH#43188, GH#43155 don't infer numeric dtype
index = Index([1, 2, 3], dtype=object)
result = tm.round_trip_pickle(index)
assert result.dtype == object
tm.assert_index_equal(index, result)

View File

@ -0,0 +1,95 @@
"""
Tests for ndarray-like method on the base Index class
"""
import numpy as np
import pytest
from pandas import Index
import pandas._testing as tm
class TestReshape:
def test_repeat(self):
repeats = 2
index = Index([1, 2, 3])
expected = Index([1, 1, 2, 2, 3, 3])
result = index.repeat(repeats)
tm.assert_index_equal(result, expected)
def test_insert(self):
# GH 7256
# validate neg/pos inserts
result = Index(["b", "c", "d"])
# test 0th element
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
# test Nth element that follows Python list behavior
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
# test loc +/- neq (0, -1)
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
# test empty
null_index = Index([])
tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a"))
def test_insert_missing(self, nulls_fixture, using_infer_string):
# GH#22295
# test there is no mangling of NA values
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
result = Index(list("abc"), dtype=object).insert(
1, Index([nulls_fixture], dtype=object)
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
)
@pytest.mark.parametrize("loc", [-1, 2])
def test_insert_datetime_into_object(self, loc, val):
# GH#44509
idx = Index(["1", "2", "3"])
result = idx.insert(loc, val)
expected = Index(["1", "2", val, "3"])
tm.assert_index_equal(result, expected)
assert type(expected[2]) is type(val)
def test_insert_none_into_string_numpy(self):
# GH#55365
pytest.importorskip("pyarrow")
index = Index(["a", "b", "c"], dtype="string[pyarrow_numpy]")
result = index.insert(-1, None)
expected = Index(["a", "b", None, "c"], dtype="string[pyarrow_numpy]")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"pos,expected",
[
(0, Index(["b", "c", "d"], name="index")),
(-1, Index(["a", "b", "c"], name="index")),
],
)
def test_delete(self, pos, expected):
index = Index(["a", "b", "c", "d"], name="index")
result = index.delete(pos)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_delete_raises(self):
index = Index(["a", "b", "c", "d"], name="index")
msg = "index 5 is out of bounds for axis 0 with size 4"
with pytest.raises(IndexError, match=msg):
index.delete(5)
def test_append_multiple(self):
index = Index(["a", "b", "c", "d", "e", "f"])
foos = [index[:2], index[2:4], index[4:]]
result = foos[0].append(foos[1:])
tm.assert_index_equal(result, index)
# empty
result = index.append([])
tm.assert_index_equal(result, index)

View File

@ -0,0 +1,266 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.core.algorithms import safe_sort
def equal_contents(arr1, arr2) -> bool:
"""
Checks if the set of unique elements of arr1 and arr2 are equivalent.
"""
return frozenset(arr1) == frozenset(arr2)
class TestIndexSetOps:
@pytest.mark.parametrize(
"method", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_setops_sort_validation(self, method):
idx1 = Index(["a", "b"])
idx2 = Index(["b", "c"])
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=2)
# sort=True is supported as of GH#??
getattr(idx1, method)(idx2, sort=True)
def test_setops_preserve_object_dtype(self):
idx = Index([1, 2, 3], dtype=object)
result = idx.intersection(idx[1:])
expected = idx[1:]
tm.assert_index_equal(result, expected)
# if other is not monotonic increasing, intersection goes through
# a different route
result = idx.intersection(idx[1:][::-1])
tm.assert_index_equal(result, expected)
result = idx._union(idx[1:], sort=None)
expected = idx
tm.assert_numpy_array_equal(result, expected.values)
result = idx.union(idx[1:], sort=None)
tm.assert_index_equal(result, expected)
# if other is not monotonic increasing, _union goes through
# a different route
result = idx._union(idx[1:][::-1], sort=None)
tm.assert_numpy_array_equal(result, expected.values)
result = idx.union(idx[1:][::-1], sort=None)
tm.assert_index_equal(result, expected)
def test_union_base(self):
index = Index([0, "a", 1, "b", 2, "c"])
first = index[3:]
second = index[:5]
result = first.union(second)
expected = Index([0, 1, 2, "a", "b", "c"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("klass", [np.array, Series, list])
def test_union_different_type_base(self, klass):
# GH 10149
index = Index([0, "a", 1, "b", 2, "c"])
first = index[3:]
second = index[:5]
result = first.union(klass(second.values))
assert equal_contents(result, index)
def test_union_sort_other_incomparable(self):
# https://github.com/pandas-dev/pandas/issues/24959
idx = Index([1, pd.Timestamp("2000")])
# default (sort=None)
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1])
tm.assert_index_equal(result, idx)
# sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1], sort=None)
tm.assert_index_equal(result, idx)
# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)
def test_union_sort_other_incomparable_true(self):
idx = Index([1, pd.Timestamp("2000")])
with pytest.raises(TypeError, match=".*"):
idx.union(idx[:1], sort=True)
def test_intersection_equal_sort_true(self):
idx = Index(["c", "a", "b"])
sorted_ = Index(["a", "b", "c"])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
def test_intersection_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:5]
second = index[:3]
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
result = first.intersection(second, sort=sort)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("klass", [np.array, Series, list])
def test_intersection_different_type_base(self, klass, sort):
# GH 10149
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:5]
second = index[:3]
result = first.intersection(klass(second.values), sort=sort)
assert equal_contents(result, second)
def test_intersection_nosort(self):
result = Index(["c", "b", "a"]).intersection(["b", "a"])
expected = Index(["b", "a"])
tm.assert_index_equal(result, expected)
def test_intersection_equal_sort(self):
idx = Index(["c", "a", "b"])
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
def test_intersection_str_dates(self, sort):
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
i1 = Index(dt_dates, dtype=object)
i2 = Index(["aa"], dtype=object)
result = i2.intersection(i1, sort=sort)
assert len(result) == 0
@pytest.mark.parametrize(
"index2,expected_arr",
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
)
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
# non-monotonic non-unique
index1 = Index(["A", "B", "A", "C"])
expected = Index(expected_arr)
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
def test_difference_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:4]
second = index[3:]
result = first.difference(second, sort)
expected = Index([0, "a", 1])
if sort is None:
expected = Index(safe_sort(expected))
tm.assert_index_equal(result, expected)
def test_symmetric_difference(self):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:4]
second = index[3:]
result = first.symmetric_difference(second)
expected = Index([0, 1, 2, "a", "c"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"method,expected,sort",
[
(
"intersection",
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "S1")],
),
False,
),
(
"intersection",
np.array(
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
dtype=[("num", int), ("let", "S1")],
),
None,
),
(
"union",
np.array(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
dtype=[("num", int), ("let", "S1")],
),
None,
),
],
)
def test_tuple_union_bug(self, method, expected, sort):
index1 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "S1")],
)
)
index2 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
dtype=[("num", int), ("let", "S1")],
)
)
result = getattr(index1, method)(index2, sort=sort)
assert result.ndim == 1
expected = Index(expected)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("first_list", [["b", "a"], []])
@pytest.mark.parametrize("second_list", [["a", "b"], []])
@pytest.mark.parametrize(
"first_name, second_name, expected_name",
[("A", "B", None), (None, "B", None), ("A", None, None)],
)
def test_union_name_preservation(
self, first_list, second_list, first_name, second_name, expected_name, sort
):
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)
vals = set(first_list).union(second_list)
if sort is None and len(first_list) > 0 and len(second_list) > 0:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
tm.assert_index_equal(union.sort_values(), expected.sort_values())
@pytest.mark.parametrize(
"diff_type, expected",
[["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
)
def test_difference_object_type(self, diff_type, expected):
# GH 13432
idx1 = Index([0, 1, "A", "B"])
idx2 = Index([0, 2, "A", "C"])
result = getattr(idx1, diff_type)(idx2)
expected = Index(expected)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,13 @@
import numpy as np
from pandas import Index
import pandas._testing as tm
class TestWhere:
def test_where_intlike_str_doesnt_cast_ints(self):
idx = Index(range(3))
mask = np.array([True, False, True])
res = idx.where(mask, "2")
expected = Index([0, "2", 2])
tm.assert_index_equal(res, expected)

View File

@ -0,0 +1,62 @@
import pytest
from pandas import (
CategoricalIndex,
Index,
)
import pandas._testing as tm
class TestAppend:
@pytest.fixture
def ci(self):
categories = list("cab")
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
def test_append(self, ci):
# append cats with the same categories
result = ci[:3].append(ci[3:])
tm.assert_index_equal(result, ci, exact=True)
foos = [ci[:1], ci[1:3], ci[3:]]
result = foos[0].append(foos[1:])
tm.assert_index_equal(result, ci, exact=True)
def test_append_empty(self, ci):
# empty
result = ci.append([])
tm.assert_index_equal(result, ci, exact=True)
def test_append_mismatched_categories(self, ci):
# appending with different categories or reordered is not ok
msg = "all inputs must be Index"
with pytest.raises(TypeError, match=msg):
ci.append(ci.values.set_categories(list("abcd")))
with pytest.raises(TypeError, match=msg):
ci.append(ci.values.reorder_categories(list("abc")))
def test_append_category_objects(self, ci):
# with objects
result = ci.append(Index(["c", "a"]))
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
tm.assert_index_equal(result, expected, exact=True)
def test_append_non_categories(self, ci):
# invalid objects -> cast to object via concat_compat
result = ci.append(Index(["a", "d"]))
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
tm.assert_index_equal(result, expected, exact=True)
def test_append_object(self, ci):
# GH#14298 - if base object is not categorical -> coerce to object
result = Index(["c", "a"]).append(ci)
expected = Index(list("caaabbca"))
tm.assert_index_equal(result, expected, exact=True)
def test_append_to_another(self):
# hits Index._concat
fst = Index(["a", "b"])
snd = CategoricalIndex(["d", "e"])
result = fst.append(snd)
expected = Index(["a", "b", "d", "e"])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,90 @@
from datetime import date
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Index,
IntervalIndex,
)
import pandas._testing as tm
class TestAstype:
def test_astype(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
result = ci.astype(object)
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))
# this IS equal, but not the same class
assert result.equals(ci)
assert isinstance(result, Index)
assert not isinstance(result, CategoricalIndex)
# interval
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
ci = CategoricalIndex(
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
)
result = ci.astype("interval")
expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan)
tm.assert_index_equal(result, expected)
result = IntervalIndex(result.values)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("index_ordered", [True, False])
def test_astype_category(self, name, dtype_ordered, index_ordered):
# GH#18630
index = CategoricalIndex(
list("aabbca"), categories=list("cab"), ordered=index_ordered
)
if name:
index = index.rename(name)
# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(
index.tolist(),
name=name,
categories=index.categories,
ordered=dtype_ordered,
)
tm.assert_index_equal(result, expected)
# non-standard categories
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
tm.assert_index_equal(result, expected)
if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = index.astype("category")
expected = index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("box", [True, False])
def test_categorical_date_roundtrip(self, box):
# astype to categorical and back should preserve date objects
v = date.today()
obj = Index([v, v])
assert obj.dtype == object
if box:
obj = obj.array
cat = obj.astype("category")
rtrip = cat.astype(object)
assert rtrip.dtype == object
assert type(rtrip[0]) is date

View File

@ -0,0 +1,394 @@
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
from pandas._libs import index as libindex
from pandas._libs.arrays import NDArrayBacked
import pandas as pd
from pandas import (
Categorical,
CategoricalDtype,
)
import pandas._testing as tm
from pandas.core.indexes.api import (
CategoricalIndex,
Index,
)
class TestCategoricalIndex:
@pytest.fixture
def simple_index(self) -> CategoricalIndex:
return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
def test_can_hold_identifiers(self):
idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_insert(self, simple_index):
ci = simple_index
categories = ci.categories
# test 0th element
result = ci.insert(0, "a")
expected = CategoricalIndex(list("aaabbca"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
# test Nth element that follows Python list behavior
result = ci.insert(-1, "a")
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
# test empty
result = CategoricalIndex([], categories=categories).insert(0, "a")
expected = CategoricalIndex(["a"], categories=categories)
tm.assert_index_equal(result, expected, exact=True)
# invalid -> cast to object
expected = ci.astype(object).insert(0, "d")
result = ci.insert(0, "d").astype(object)
tm.assert_index_equal(result, expected, exact=True)
# GH 18295 (test missing)
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
for na in (np.nan, pd.NaT, None):
result = CategoricalIndex(list("aabcb")).insert(1, na)
tm.assert_index_equal(result, expected)
def test_insert_na_mismatched_dtype(self):
ci = CategoricalIndex([0, 1, 1])
result = ci.insert(0, pd.NaT)
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_delete(self, simple_index):
ci = simple_index
categories = ci.categories
result = ci.delete(0)
expected = CategoricalIndex(list("abbca"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
result = ci.delete(-1)
expected = CategoricalIndex(list("aabbc"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
with tm.external_error_raised((IndexError, ValueError)):
# Either depending on NumPy version
ci.delete(10)
@pytest.mark.parametrize(
"data, non_lexsorted_data",
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
)
def test_is_monotonic(self, data, non_lexsorted_data):
c = CategoricalIndex(data)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
c = CategoricalIndex(data, ordered=True)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
c = CategoricalIndex(data, categories=reversed(data))
assert c.is_monotonic_increasing is False
assert c.is_monotonic_decreasing is True
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
assert c.is_monotonic_increasing is False
assert c.is_monotonic_decreasing is True
# test when data is neither monotonic increasing nor decreasing
reordered_data = [data[0], data[2], data[1]]
c = CategoricalIndex(reordered_data, categories=reversed(data))
assert c.is_monotonic_increasing is False
assert c.is_monotonic_decreasing is False
# non lexsorted categories
categories = non_lexsorted_data
c = CategoricalIndex(categories[:2], categories=categories)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
c = CategoricalIndex(categories[1:3], categories=categories)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
def test_has_duplicates(self):
idx = CategoricalIndex([0, 0, 0], name="foo")
assert idx.is_unique is False
assert idx.has_duplicates is True
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
assert idx.is_unique is False
assert idx.has_duplicates is True
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
assert idx.is_unique is True
assert idx.has_duplicates is False
@pytest.mark.parametrize(
"data, categories, expected",
[
(
[1, 1, 1],
[1, 2, 3],
{
"first": np.array([False, True, True]),
"last": np.array([True, True, False]),
False: np.array([True, True, True]),
},
),
(
[1, 1, 1],
list("abc"),
{
"first": np.array([False, True, True]),
"last": np.array([True, True, False]),
False: np.array([True, True, True]),
},
),
(
[2, "a", "b"],
list("abc"),
{
"first": np.zeros(shape=(3), dtype=np.bool_),
"last": np.zeros(shape=(3), dtype=np.bool_),
False: np.zeros(shape=(3), dtype=np.bool_),
},
),
(
list("abb"),
list("abc"),
{
"first": np.array([False, False, True]),
"last": np.array([False, True, False]),
False: np.array([False, True, True]),
},
),
],
)
def test_drop_duplicates(self, data, categories, expected):
idx = CategoricalIndex(data, categories=categories, name="foo")
for keep, e in expected.items():
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
e = idx[~e]
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, e)
@pytest.mark.parametrize(
"data, categories, expected_data",
[
([1, 1, 1], [1, 2, 3], [1]),
([1, 1, 1], list("abc"), [np.nan]),
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
],
)
def test_unique(self, data, categories, expected_data, ordered):
dtype = CategoricalDtype(categories, ordered=ordered)
idx = CategoricalIndex(data, dtype=dtype)
expected = CategoricalIndex(expected_data, dtype=dtype)
tm.assert_index_equal(idx.unique(), expected)
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr doesn't roundtrip")
def test_repr_roundtrip(self):
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
str(ci)
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
# formatting
str(ci)
# long format
# this is not reprable
ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
str(ci)
def test_isin(self):
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
tm.assert_numpy_array_equal(
ci.isin(["c"]), np.array([False, False, False, True, False, False])
)
tm.assert_numpy_array_equal(
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
)
tm.assert_numpy_array_equal(
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
)
# mismatched categorical -> coerced to ndarray so doesn't matter
result = ci.isin(ci.set_categories(list("abcdefghi")))
expected = np.array([True] * 6)
tm.assert_numpy_array_equal(result, expected)
result = ci.isin(ci.set_categories(list("defghi")))
expected = np.array([False] * 5 + [True])
tm.assert_numpy_array_equal(result, expected)
def test_isin_overlapping_intervals(self):
# GH 34974
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
result = CategoricalIndex(idx).isin(idx)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
def test_identical(self):
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
assert ci1.identical(ci1)
assert ci1.identical(ci1.copy())
assert not ci1.identical(ci2)
def test_ensure_copied_data(self):
# gh-12309: Check the "copy" argument of each
# Index.__new__ is honored.
#
# Must be tested separately from other indexes because
# self.values is not an ndarray.
index = CategoricalIndex(list("ab") * 5)
result = CategoricalIndex(index.values, copy=True)
tm.assert_index_equal(index, result)
assert not np.shares_memory(result._data._codes, index._data._codes)
result = CategoricalIndex(index.values, copy=False)
assert result._data._codes is index._data._codes
class TestCategoricalIndex2:
def test_view_i8(self):
# GH#25464
ci = CategoricalIndex(list("ab") * 50)
msg = "When changing to a larger dtype, its size must be a divisor"
with pytest.raises(ValueError, match=msg):
ci.view("i8")
with pytest.raises(ValueError, match=msg):
ci._data.view("i8")
ci = ci[:-4] # length divisible by 8
res = ci.view("i8")
expected = ci._data.codes.view("i8")
tm.assert_numpy_array_equal(res, expected)
cat = ci._data
tm.assert_numpy_array_equal(cat.view("i8"), expected)
@pytest.mark.parametrize(
"dtype, engine_type",
[
(np.int8, libindex.Int8Engine),
(np.int16, libindex.Int16Engine),
(np.int32, libindex.Int32Engine),
(np.int64, libindex.Int64Engine),
],
)
def test_engine_type(self, dtype, engine_type):
if dtype != np.int64:
# num. of uniques required to push CategoricalIndex.codes to a
# dtype (128 categories required for .codes dtype to be int16 etc.)
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
ci = CategoricalIndex(range(num_uniques))
else:
# having 2**32 - 2**31 categories would be very memory-intensive,
# so we cheat a bit with the dtype
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
arr = ci.values._ndarray.astype("int64")
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
assert np.issubdtype(ci.codes.dtype, dtype)
assert isinstance(ci._engine, engine_type)
@pytest.mark.parametrize(
"func,op_name",
[
(lambda idx: idx - idx, "__sub__"),
(lambda idx: idx + idx, "__add__"),
(lambda idx: idx - ["a", "b"], "__sub__"),
(lambda idx: idx + ["a", "b"], "__add__"),
(lambda idx: ["a", "b"] - idx, "__rsub__"),
(lambda idx: ["a", "b"] + idx, "__radd__"),
],
)
def test_disallow_addsub_ops(self, func, op_name):
# GH 10039
# set ops (+/-) raise TypeError
idx = Index(Categorical(["a", "b"]))
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
msg = "|".join(
[
f"cannot perform {op_name} with this index type: CategoricalIndex",
"can only concatenate list",
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
]
)
with pytest.raises(TypeError, match=msg):
func(idx)
def test_method_delegation(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
result = ci.set_categories(list("cab"))
tm.assert_index_equal(
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
)
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
result = ci.rename_categories(list("efg"))
tm.assert_index_equal(
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
)
# GH18862 (let rename_categories take callables)
result = ci.rename_categories(lambda x: x.upper())
tm.assert_index_equal(
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
)
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
result = ci.add_categories(["d"])
tm.assert_index_equal(
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
)
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
result = ci.remove_categories(["c"])
tm.assert_index_equal(
result,
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
)
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
result = ci.as_unordered()
tm.assert_index_equal(result, ci)
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
result = ci.as_ordered()
tm.assert_index_equal(
result,
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
)
# invalid
msg = "cannot use inplace with CategoricalIndex"
with pytest.raises(ValueError, match=msg):
ci.set_categories(list("cab"), inplace=True)
def test_remove_maintains_order(self):
ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
tm.assert_index_equal(
result,
CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
)
result = result.remove_categories(["c"])
tm.assert_index_equal(
result,
CategoricalIndex(
["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
),
)

View File

@ -0,0 +1,142 @@
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Index,
)
import pandas._testing as tm
class TestCategoricalIndexConstructors:
def test_construction_disallows_scalar(self):
msg = "must be called with a collection of some kind"
with pytest.raises(TypeError, match=msg):
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
with pytest.raises(TypeError, match=msg):
CategoricalIndex(categories=list("abcd"), ordered=False)
def test_construction(self):
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
categories = ci.categories
result = Index(ci)
tm.assert_index_equal(result, ci, exact=True)
assert not result.ordered
result = Index(ci.values)
tm.assert_index_equal(result, ci, exact=True)
assert not result.ordered
# empty
result = CategoricalIndex([], categories=categories)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
assert not result.ordered
# passing categories
result = CategoricalIndex(list("aabbca"), categories=categories)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
c = Categorical(list("aabbca"))
result = CategoricalIndex(c)
tm.assert_index_equal(result.categories, Index(list("abc")))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
assert not result.ordered
result = CategoricalIndex(c, categories=categories)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
assert not result.ordered
ci = CategoricalIndex(c, categories=list("abcd"))
result = CategoricalIndex(ci)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
assert not result.ordered
result = CategoricalIndex(ci, categories=list("ab"))
tm.assert_index_equal(result.categories, Index(list("ab")))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
)
assert not result.ordered
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
tm.assert_index_equal(result.categories, Index(list("ab")))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
)
assert result.ordered
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
expected = CategoricalIndex(
ci, categories=list("ab"), ordered=True, dtype="category"
)
tm.assert_index_equal(result, expected, exact=True)
# turn me to an Index
result = Index(np.array(ci))
assert isinstance(result, Index)
assert not isinstance(result, CategoricalIndex)
def test_construction_with_dtype(self):
# specify dtype
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
result = Index(np.array(ci), dtype="category")
tm.assert_index_equal(result, ci, exact=True)
result = Index(np.array(ci).tolist(), dtype="category")
tm.assert_index_equal(result, ci, exact=True)
# these are generally only equal when the categories are reordered
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
tm.assert_index_equal(result, ci, exact=True)
# make sure indexes are handled
idx = Index(range(3))
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
result = CategoricalIndex(idx, categories=idx, ordered=True)
tm.assert_index_equal(result, expected, exact=True)
def test_construction_empty_with_bool_categories(self):
# see GH#22702
cat = CategoricalIndex([], categories=[True, False])
categories = sorted(cat.categories.tolist())
assert categories == [False, True]
def test_construction_with_categorical_dtype(self):
# construction with CategoricalDtype
# GH#18109
data, cats, ordered = "a a b b".split(), "c b a".split(), True
dtype = CategoricalDtype(categories=cats, ordered=ordered)
result = CategoricalIndex(data, dtype=dtype)
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
tm.assert_index_equal(result, expected, exact=True)
# GH#19032
result = Index(data, dtype=dtype)
tm.assert_index_equal(result, expected, exact=True)
# error when combining categories/ordered and dtype kwargs
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
with pytest.raises(ValueError, match=msg):
CategoricalIndex(data, categories=cats, dtype=dtype)
with pytest.raises(ValueError, match=msg):
CategoricalIndex(data, ordered=ordered, dtype=dtype)

View File

@ -0,0 +1,96 @@
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalIndex,
Index,
MultiIndex,
)
class TestEquals:
def test_equals_categorical(self):
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
assert ci1.equals(ci1)
assert not ci1.equals(ci2)
assert ci1.equals(ci1.astype(object))
assert ci1.astype(object).equals(ci1)
assert (ci1 == ci1).all()
assert not (ci1 != ci1).all()
assert not (ci1 > ci1).all()
assert not (ci1 < ci1).all()
assert (ci1 <= ci1).all()
assert (ci1 >= ci1).all()
assert not (ci1 == 1).all()
assert (ci1 == Index(["a", "b"])).all()
assert (ci1 == ci1.values).all()
# invalid comparisons
with pytest.raises(ValueError, match="Lengths must match"):
ci1 == Index(["a", "b", "c"])
msg = "Categoricals can only be compared if 'categories' are the same"
with pytest.raises(TypeError, match=msg):
ci1 == ci2
with pytest.raises(TypeError, match=msg):
ci1 == Categorical(ci1.values, ordered=False)
with pytest.raises(TypeError, match=msg):
ci1 == Categorical(ci1.values, categories=list("abc"))
# tests
# make sure that we are testing for category inclusion properly
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
assert not ci.equals(list("aabca"))
# Same categories, but different order
# Unordered
assert ci.equals(CategoricalIndex(list("aabca")))
# Ordered
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
assert ci.equals(ci.copy())
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
assert not ci.equals(list("aabca"))
assert not ci.equals(CategoricalIndex(list("aabca")))
assert ci.equals(ci.copy())
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
assert not ci.equals(list("aabca") + [np.nan])
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
assert ci.equals(ci.copy())
def test_equals_categorical_unordered(self):
# https://github.com/pandas-dev/pandas/issues/16603
a = CategoricalIndex(["A"], categories=["A", "B"])
b = CategoricalIndex(["A"], categories=["B", "A"])
c = CategoricalIndex(["C"], categories=["B", "A"])
assert a.equals(b)
assert not a.equals(c)
assert not b.equals(c)
def test_equals_non_category(self):
# GH#37667 Case where other contains a value not among ci's
# categories ("D") and also contains np.nan
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
other = Index(["A", "B", "D", np.nan])
assert not ci.equals(other)
def test_equals_multiindex(self):
# dont raise NotImplementedError when calling is_dtype_compat
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
ci = mi.to_flat_index().astype("category")
assert not ci.equals(mi)
def test_equals_string_dtype(self, any_string_dtype):
# GH#55364
idx = CategoricalIndex(list("abc"), name="B")
other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
assert idx.equals(other)

View File

@ -0,0 +1,54 @@
import numpy as np
import pytest
from pandas import CategoricalIndex
import pandas._testing as tm
class TestFillNA:
def test_fillna_categorical(self):
# GH#11343
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
# fill by value in categories
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
tm.assert_index_equal(idx.fillna(1.0), exp)
cat = idx._data
# fill by value not in categories raises TypeError on EA, casts on CI
msg = "Cannot setitem on a Categorical with a new category"
with pytest.raises(TypeError, match=msg):
cat.fillna(2.0)
result = idx.fillna(2.0)
expected = idx.astype(object).fillna(2.0)
tm.assert_index_equal(result, expected)
def test_fillna_copies_with_no_nas(self):
# Nothing to fill, should still get a copy for the Categorical method,
# but OK to get a view on CategoricalIndex method
ci = CategoricalIndex([0, 1, 1])
result = ci.fillna(0)
assert result is not ci
assert tm.shares_memory(result, ci)
# But at the EA level we always get a copy.
cat = ci._data
result = cat.fillna(0)
assert result._ndarray is not cat._ndarray
assert result._ndarray.base is None
assert not tm.shares_memory(result, cat)
def test_fillna_validates_with_no_nas(self):
# We validate the fill value even if fillna is a no-op
ci = CategoricalIndex([2, 3, 3])
cat = ci._data
msg = "Cannot setitem on a Categorical with a new category"
res = ci.fillna(False)
# nothing to fill, so we dont cast
tm.assert_index_equal(res, ci)
# Same check directly on the Categorical
with pytest.raises(TypeError, match=msg):
cat.fillna(False)

View File

@ -0,0 +1,120 @@
"""
Tests for CategoricalIndex.__repr__ and related methods.
"""
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas._config.config as cf
from pandas import CategoricalIndex
import pandas._testing as tm
class TestCategoricalIndexRepr:
def test_format_different_scalar_lengths(self):
# GH#35439
idx = CategoricalIndex(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
msg = r"CategoricalIndex\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
def test_string_categorical_index_repr(self):
# short
idx = CategoricalIndex(["a", "bb", "ccc"])
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# multiple lines
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# truncated
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
...
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
assert repr(idx) == expected
# larger categories
idx = CategoricalIndex(list("abcdefghijklmmo"))
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
'm', 'm', 'o'],
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# short
idx = CategoricalIndex(["", "いい", "ううう"])
expected = """CategoricalIndex(['', 'いい', 'ううう'], categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# multiple lines
idx = CategoricalIndex(["", "いい", "ううう"] * 10)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '',
'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# truncated
idx = CategoricalIndex(["", "いい", "ううう"] * 100)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '',
...
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
assert repr(idx) == expected
# larger categories
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
expected = """CategoricalIndex(['', '', '', '', '', '', '', '', '', '', '', '',
'', '', ''],
categories=['', '', '', '', ..., '', '', '', ''], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
# short
idx = CategoricalIndex(["", "いい", "ううう"])
expected = """CategoricalIndex(['', 'いい', 'ううう'], categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# multiple lines
idx = CategoricalIndex(["", "いい", "ううう"] * 10)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう',
'', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# truncated
idx = CategoricalIndex(["", "いい", "ううう"] * 100)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '',
...
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう',
'', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
assert repr(idx) == expected
# larger categories
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
expected = """CategoricalIndex(['', '', '', '', '', '', '', '', '', '',
'', '', '', '', ''],
categories=['', '', '', '', ..., '', '', '', ''], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected

View File

@ -0,0 +1,420 @@
import numpy as np
import pytest
from pandas.errors import InvalidIndexError
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
IntervalIndex,
Timestamp,
)
import pandas._testing as tm
class TestTake:
def test_take_fill_value(self):
# GH 12631
# numeric category
idx = CategoricalIndex([1, 2, 3], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = CategoricalIndex([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = CategoricalIndex([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# object category
idx = CategoricalIndex(
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
)
result = idx.take(np.array([1, 0, -1]))
expected = CategoricalIndex(
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = CategoricalIndex(
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = CategoricalIndex(
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_take_fill_value_datetime(self):
# datetime category
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
idx = CategoricalIndex(idx)
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
expected = CategoricalIndex(expected)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
expected = CategoricalIndex(expected, categories=exp_cats)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
expected = CategoricalIndex(expected)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_take_invalid_kwargs(self):
idx = CategoricalIndex([1, 2, 3], name="foo")
indices = [1, 0, -1]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
class TestGetLoc:
def test_get_loc(self):
# GH 12531
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
idx1 = Index(list("abcde"))
assert cidx1.get_loc("a") == idx1.get_loc("a")
assert cidx1.get_loc("e") == idx1.get_loc("e")
for i in [cidx1, idx1]:
with pytest.raises(KeyError, match="'NOT-EXIST'"):
i.get_loc("NOT-EXIST")
# non-unique
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
idx2 = Index(list("aacded"))
# results in bool array
res = cidx2.get_loc("d")
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
tm.assert_numpy_array_equal(
res, np.array([False, False, False, True, False, True])
)
# unique element results in scalar
res = cidx2.get_loc("e")
assert res == idx2.get_loc("e")
assert res == 4
for i in [cidx2, idx2]:
with pytest.raises(KeyError, match="'NOT-EXIST'"):
i.get_loc("NOT-EXIST")
# non-unique, sliceable
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
idx3 = Index(list("aabbb"))
# results in slice
res = cidx3.get_loc("a")
assert res == idx3.get_loc("a")
assert res == slice(0, 2, None)
res = cidx3.get_loc("b")
assert res == idx3.get_loc("b")
assert res == slice(2, 5, None)
for i in [cidx3, idx3]:
with pytest.raises(KeyError, match="'c'"):
i.get_loc("c")
def test_get_loc_unique(self):
cidx = CategoricalIndex(list("abc"))
result = cidx.get_loc("b")
assert result == 1
def test_get_loc_monotonic_nonunique(self):
cidx = CategoricalIndex(list("abbc"))
result = cidx.get_loc("b")
expected = slice(1, 3, None)
assert result == expected
def test_get_loc_nonmonotonic_nonunique(self):
cidx = CategoricalIndex(list("abcb"))
result = cidx.get_loc("b")
expected = np.array([False, True, False, True], dtype=bool)
tm.assert_numpy_array_equal(result, expected)
def test_get_loc_nan(self):
# GH#41933
ci = CategoricalIndex(["A", "B", np.nan])
res = ci.get_loc(np.nan)
assert res == 2
class TestGetIndexer:
def test_get_indexer_base(self):
# Determined by cat ordering.
idx = CategoricalIndex(list("cab"), categories=list("cab"))
expected = np.arange(len(idx), dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match="Invalid fill method"):
idx.get_indexer(idx, method="invalid")
def test_get_indexer_requires_unique(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
oidx = Index(np.array(ci))
msg = "Reindexing only valid with uniquely valued Index objects"
for n in [1, 2, 5, len(ci)]:
finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
with pytest.raises(InvalidIndexError, match=msg):
ci.get_indexer(finder)
# see gh-17323
#
# Even when indexer is equal to the
# members in the index, we should
# respect duplicates instead of taking
# the fast-track path.
for finder in [list("aabbca"), list("aababca")]:
with pytest.raises(InvalidIndexError, match=msg):
ci.get_indexer(finder)
def test_get_indexer_non_unique(self):
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
idx2 = CategoricalIndex(list("abf"))
for indexer in [idx2, list("abf"), Index(list("abf"))]:
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(indexer)
r1, _ = idx1.get_indexer_non_unique(indexer)
expected = np.array([0, 1, 2, -1], dtype=np.intp)
tm.assert_almost_equal(r1, expected)
def test_get_indexer_method(self):
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
idx2 = CategoricalIndex(list("abf"))
msg = "method pad not yet implemented for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="pad")
msg = "method backfill not yet implemented for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="backfill")
msg = "method nearest not yet implemented for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="nearest")
def test_get_indexer_array(self):
arr = np.array(
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
dtype=object,
)
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
result = ci.get_indexer(arr)
expected = np.array([0, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_same_order(self):
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
expected = np.array([1, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_different_order(self):
# https://github.com/pandas-dev/pandas/issues/19551
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
expected = np.array([1, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_nans_in_index_and_target(self):
# GH 45361
ci = CategoricalIndex([1, 2, np.nan, 3])
other1 = [2, 3, 4, np.nan]
res1 = ci.get_indexer(other1)
expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res1, expected1)
other2 = [1, 4, 2, 3]
res2 = ci.get_indexer(other2)
expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(res2, expected2)
class TestWhere:
def test_where(self, listlike_box):
klass = listlike_box
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * (len(i) - 1)
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
def test_where_non_categories(self):
ci = CategoricalIndex(["a", "b", "c", "d"])
mask = np.array([True, False, True, False])
result = ci.where(mask, 2)
expected = Index(["a", 2, "c", 2], dtype=object)
tm.assert_index_equal(result, expected)
msg = "Cannot setitem on a Categorical with a new category"
with pytest.raises(TypeError, match=msg):
# Test the Categorical method directly
ci._data._where(mask, 2)
class TestContains:
def test_contains(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
assert "a" in ci
assert "z" not in ci
assert "e" not in ci
assert np.nan not in ci
# assert codes NOT in index
assert 0 not in ci
assert 1 not in ci
def test_contains_nan(self):
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
assert np.nan in ci
@pytest.mark.parametrize("unwrap", [True, False])
def test_contains_na_dtype(self, unwrap):
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
pi = dti.to_period("D")
tdi = dti - dti[-1]
ci = CategoricalIndex(dti)
obj = ci
if unwrap:
obj = ci._data
assert np.nan in obj
assert None in obj
assert pd.NaT in obj
assert np.datetime64("NaT") in obj
assert np.timedelta64("NaT") not in obj
obj2 = CategoricalIndex(tdi)
if unwrap:
obj2 = obj2._data
assert np.nan in obj2
assert None in obj2
assert pd.NaT in obj2
assert np.datetime64("NaT") not in obj2
assert np.timedelta64("NaT") in obj2
obj3 = CategoricalIndex(pi)
if unwrap:
obj3 = obj3._data
assert np.nan in obj3
assert None in obj3
assert pd.NaT in obj3
assert np.datetime64("NaT") not in obj3
assert np.timedelta64("NaT") not in obj3
@pytest.mark.parametrize(
"item, expected",
[
(pd.Interval(0, 1), True),
(1.5, True),
(pd.Interval(0.5, 1.5), False),
("a", False),
(Timestamp(1), False),
(pd.Timedelta(1), False),
],
ids=str,
)
def test_contains_interval(self, item, expected):
# GH 23705
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
result = item in ci
assert result is expected
def test_contains_list(self):
# GH#21729
idx = CategoricalIndex([1, 2, 3])
assert "a" not in idx
with pytest.raises(TypeError, match="unhashable type"):
["a"] in idx
with pytest.raises(TypeError, match="unhashable type"):
["a", "b"] in idx

View File

@ -0,0 +1,144 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"data, categories",
[
(list("abcbca"), list("cab")),
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
],
ids=["string", "interval"],
)
def test_map_str(data, categories, ordered):
# GH 31202 - override base class since we want to maintain categorical/ordered
index = CategoricalIndex(data, categories=categories, ordered=ordered)
result = index.map(str)
expected = CategoricalIndex(
map(str, data), categories=map(str, categories), ordered=ordered
)
tm.assert_index_equal(result, expected)
def test_map():
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
result = ci.map(lambda x: x.lower())
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
tm.assert_index_equal(result, exp)
ci = CategoricalIndex(
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
)
result = ci.map(lambda x: x.lower())
exp = CategoricalIndex(
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
)
tm.assert_index_equal(result, exp)
# GH 12766: Return an index not an array
tm.assert_index_equal(
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
)
# change categories dtype
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
def f(x):
return {"A": 10, "B": 20, "C": 30}.get(x)
result = ci.map(f)
exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
tm.assert_index_equal(result, exp)
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
tm.assert_index_equal(result, exp)
result = ci.map({"A": 10, "B": 20, "C": 30})
tm.assert_index_equal(result, exp)
def test_map_with_categorical_series():
# GH 12756
a = Index([1, 2, 3, 4])
b = Series(["even", "odd", "even", "odd"], dtype="category")
c = Series(["even", "odd", "even", "odd"])
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
tm.assert_index_equal(a.map(b), exp)
exp = Index(["odd", "even", "odd", np.nan])
tm.assert_index_equal(a.map(c), exp)
@pytest.mark.parametrize(
("data", "f", "expected"),
(
([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
(
[1, 1, np.nan],
Series([False, False]),
CategoricalIndex([False, False, np.nan]),
),
(
[1, 2, np.nan],
Series([False, False, False]),
Index([False, False, np.nan]),
),
),
)
def test_map_with_nan_ignore(data, f, expected): # GH 24241
values = CategoricalIndex(data)
result = values.map(f, na_action="ignore")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
("data", "f", "expected"),
(
([1, 1, np.nan], pd.isna, Index([False, False, True])),
([1, 2, np.nan], pd.isna, Index([False, False, True])),
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
(
[1, 1, np.nan],
Series([False, False]),
CategoricalIndex([False, False, np.nan]),
),
(
[1, 2, np.nan],
Series([False, False, False]),
Index([False, False, np.nan]),
),
),
)
def test_map_with_nan_none(data, f, expected): # GH 24241
values = CategoricalIndex(data)
result = values.map(f, na_action=None)
tm.assert_index_equal(result, expected)
def test_map_with_dict_or_series():
orig_values = ["a", "B", 1, "a"]
new_values = ["one", 2, 3.0, "one"]
cur_index = CategoricalIndex(orig_values, name="XXX")
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
mapper = Series(new_values[:-1], index=orig_values[:-1])
result = cur_index.map(mapper)
# Order of categories in result can be different
tm.assert_index_equal(result, expected)
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
result = cur_index.map(mapper)
# Order of categories in result can be different
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,78 @@
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalIndex,
Index,
Interval,
)
import pandas._testing as tm
class TestReindex:
def test_reindex_list_non_unique(self):
# GH#11586
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"])
with pytest.raises(ValueError, match=msg):
ci.reindex(["a", "c"])
def test_reindex_categorical_non_unique(self):
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"])
with pytest.raises(ValueError, match=msg):
ci.reindex(Categorical(["a", "c"]))
def test_reindex_list_non_unique_unused_category(self):
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
with pytest.raises(ValueError, match=msg):
ci.reindex(["a", "c"])
def test_reindex_categorical_non_unique_unused_category(self):
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
with pytest.raises(ValueError, match=msg):
ci.reindex(Categorical(["a", "c"]))
def test_reindex_duplicate_target(self):
# See GH25459
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
res, indexer = cat.reindex(["a", "c", "c"])
exp = Index(["a", "c", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
res, indexer = cat.reindex(
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
)
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
def test_reindex_empty_index(self):
# See GH16770
c = CategoricalIndex([])
res, indexer = c.reindex(["a", "b"])
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
def test_reindex_categorical_added_category(self):
# GH 42424
ci = CategoricalIndex(
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
ordered=True,
)
ci_add = CategoricalIndex(
[
Interval(0, 1, closed="right"),
Interval(1, 2, closed="right"),
Interval(2, 3, closed="right"),
Interval(3, 4, closed="right"),
],
ordered=True,
)
result, _ = ci.reindex(ci_add)
expected = ci_add
tm.assert_index_equal(expected, result)

View File

@ -0,0 +1,18 @@
import numpy as np
import pytest
from pandas import (
CategoricalIndex,
Index,
)
import pandas._testing as tm
@pytest.mark.parametrize("na_value", [None, np.nan])
def test_difference_with_na(na_value):
# GH 57318
ci = CategoricalIndex(["a", "b", "c", None])
other = Index(["c", na_value])
result = ci.difference(other)
expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,41 @@
import numpy as np
import pytest
from pandas import (
Series,
array,
)
@pytest.fixture(params=[None, False])
def sort(request):
"""
Valid values for the 'sort' parameter used in the Index
setops methods (intersection, union, etc.)
Caution:
Don't confuse this one with the "sort" fixture used
for DataFrame.append or concat. That one has
parameters [True, False].
We can't combine them as sort=True is not permitted
in the Index setops methods.
"""
return request.param
@pytest.fixture(params=["D", "3D", "-3D", "h", "2h", "-2h", "min", "2min", "s", "-3s"])
def freq_sample(request):
"""
Valid values for 'freq' parameter used to create date_range and
timedelta_range..
"""
return request.param
@pytest.fixture(params=[list, tuple, np.array, array, Series])
def listlike_box(request):
"""
Types that may be passed as the indexer to searchsorted.
"""
return request.param

View File

@ -0,0 +1,89 @@
import numpy as np
import pytest
from pandas import (
PeriodIndex,
Series,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class DropDuplicates:
def test_drop_duplicates_metadata(self, idx):
# GH#10115
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx)
result = idx_dup.drop_duplicates()
expected = idx
if not isinstance(idx, PeriodIndex):
# freq is reset except for PeriodIndex
assert idx_dup.freq is None
assert result.freq is None
expected = idx._with_freq(None)
else:
assert result.freq == expected.freq
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"keep, expected, index",
[
(
"first",
np.concatenate(([False] * 10, [True] * 5)),
np.arange(0, 10, dtype=np.int64),
),
(
"last",
np.concatenate(([True] * 5, [False] * 10)),
np.arange(5, 15, dtype=np.int64),
),
(
False,
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
np.arange(5, 10, dtype=np.int64),
),
],
)
def test_drop_duplicates(self, keep, expected, index, idx):
# to check Index/Series compat
idx = idx.append(idx[:5])
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
expected = idx[~expected]
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, expected)
result = Series(idx).drop_duplicates(keep=keep)
expected = Series(expected, index=index)
tm.assert_series_equal(result, expected)
class TestDropDuplicatesPeriodIndex(DropDuplicates):
@pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"])
def freq(self, request):
return request.param
@pytest.fixture
def idx(self, freq):
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
@pytest.fixture
def idx(self, freq_sample):
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
@pytest.fixture
def idx(self, freq_sample):
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")

View File

@ -0,0 +1,181 @@
"""
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
"""
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalIndex,
DatetimeIndex,
Index,
PeriodIndex,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class EqualsTests:
def test_not_equals_numeric(self, index):
assert not index.equals(Index(index.asi8))
assert not index.equals(Index(index.asi8.astype("u8")))
assert not index.equals(Index(index.asi8).astype("f8"))
def test_equals(self, index):
assert index.equals(index)
assert index.equals(index.astype(object))
assert index.equals(CategoricalIndex(index))
assert index.equals(CategoricalIndex(index.astype(object)))
def test_not_equals_non_arraylike(self, index):
assert not index.equals(list(index))
def test_not_equals_strings(self, index):
other = Index([str(x) for x in index], dtype=object)
assert not index.equals(other)
assert not index.equals(CategoricalIndex(other))
def test_not_equals_misc_strs(self, index):
other = Index(list("abc"))
assert not index.equals(other)
class TestPeriodIndexEquals(EqualsTests):
@pytest.fixture
def index(self):
return period_range("2013-01-01", periods=5, freq="D")
# TODO: de-duplicate with other test_equals2 methods
@pytest.mark.parametrize("freq", ["D", "M"])
def test_equals2(self, freq):
# GH#13107
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = PeriodIndex._simple_new(
idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h"))
)
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
class TestDatetimeIndexEquals(EqualsTests):
@pytest.fixture
def index(self):
return date_range("2013-01-01", periods=5)
def test_equals2(self):
# GH#13107
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
# check that we do not raise when comparing with OutOfBounds objects
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
assert not idx.equals(oob)
assert not idx2.equals(oob)
assert not idx3.equals(oob)
# check that we do not raise when comparing with OutOfBounds dt64
oob2 = oob.map(np.datetime64)
assert not idx.equals(oob2)
assert not idx2.equals(oob2)
assert not idx3.equals(oob2)
@pytest.mark.parametrize("freq", ["B", "C"])
def test_not_equals_bday(self, freq):
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
assert not rng.equals(list(rng))
class TestTimedeltaIndexEquals(EqualsTests):
@pytest.fixture
def index(self):
return timedelta_range("1 day", periods=10)
def test_equals2(self):
# GH#13107
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.astype(object).equals(idx2.astype(object))
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# Check that we dont raise OverflowError on comparisons outside the
# implementation range GH#28532
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
assert not idx.equals(oob)
assert not idx2.equals(oob)
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
assert (oob == oob2).all()
assert not idx.equals(oob2)
assert not idx2.equals(oob2)
oob3 = oob.map(np.timedelta64)
assert (oob3 == oob).all()
assert not idx.equals(oob3)
assert not idx2.equals(oob3)

View File

@ -0,0 +1,45 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
)
import pandas._testing as tm
dtlike_dtypes = [
np.dtype("timedelta64[ns]"),
np.dtype("datetime64[ns]"),
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
pd.PeriodDtype("ns"),
]
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
def construct(dtype):
if dtype is dtlike_dtypes[-1]:
# PeriodArray will try to cast ints to strings
return DatetimeIndex(vals).astype(dtype)
return Index(vals, dtype=dtype)
left = construct(ldtype)
right = construct(rdtype)
result = left.get_indexer_non_unique(right)
if ldtype is rdtype:
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
ex2 = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(result[0], ex1)
tm.assert_numpy_array_equal(result[1], ex2)
else:
no_matches = np.array([-1] * 6, dtype=np.intp)
missing = np.arange(6, dtype=np.intp)
tm.assert_numpy_array_equal(result[0], no_matches)
tm.assert_numpy_array_equal(result[1], missing)

View File

@ -0,0 +1,46 @@
from pandas import (
Index,
NaT,
date_range,
)
def test_is_monotonic_with_nat():
# GH#31437
# PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex,
# in particular never be monotonic when we have NaT
dti = date_range("2016-01-01", periods=3)
pi = dti.to_period("D")
tdi = Index(dti.view("timedelta64[ns]"))
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
if isinstance(obj, Index):
# i.e. not Engines
assert obj.is_monotonic_increasing
assert obj.is_monotonic_increasing
assert not obj.is_monotonic_decreasing
assert obj.is_unique
dti1 = dti.insert(0, NaT)
pi1 = dti1.to_period("D")
tdi1 = Index(dti1.view("timedelta64[ns]"))
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
if isinstance(obj, Index):
# i.e. not Engines
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_decreasing
assert obj.is_unique
dti2 = dti.insert(3, NaT)
pi2 = dti2.to_period("h")
tdi2 = Index(dti2.view("timedelta64[ns]"))
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
if isinstance(obj, Index):
# i.e. not Engines
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_decreasing
assert obj.is_unique

View File

@ -0,0 +1,53 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
NaT,
PeriodIndex,
TimedeltaIndex,
)
import pandas._testing as tm
class NATests:
def test_nat(self, index_without_na):
empty_index = index_without_na[:0]
index_with_na = index_without_na.copy(deep=True)
index_with_na._data[1] = NaT
assert empty_index._na_value is NaT
assert index_with_na._na_value is NaT
assert index_without_na._na_value is NaT
idx = index_without_na
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
idx = index_with_na
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
class TestDatetimeIndexNA(NATests):
@pytest.fixture
def index_without_na(self, tz_naive_fixture):
tz = tz_naive_fixture
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
class TestTimedeltaIndexNA(NATests):
@pytest.fixture
def index_without_na(self):
return TimedeltaIndex(["1 days", "2 days"])
class TestPeriodIndexNA(NATests):
@pytest.fixture
def index_without_na(self):
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")

View File

@ -0,0 +1,315 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
Index,
NaT,
PeriodIndex,
TimedeltaIndex,
timedelta_range,
)
import pandas._testing as tm
def check_freq_ascending(ordered, orig, ascending):
"""
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
when the original index is generated (or generate-able) with
period_range/date_range/timedelta_range.
"""
if isinstance(ordered, PeriodIndex):
assert ordered.freq == orig.freq
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
if ascending:
assert ordered.freq.n == orig.freq.n
else:
assert ordered.freq.n == -1 * orig.freq.n
def check_freq_nonmonotonic(ordered, orig):
"""
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
when the original index is _not_ generated (or generate-able) with
period_range/date_range//timedelta_range.
"""
if isinstance(ordered, PeriodIndex):
assert ordered.freq == orig.freq
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
assert ordered.freq is None
class TestSortValues:
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
def non_monotonic_idx(self, request):
if request.param is DatetimeIndex:
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
elif request.param is PeriodIndex:
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
return dti.to_period("D")
else:
return TimedeltaIndex(
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
)
def test_argmin_argmax(self, non_monotonic_idx):
assert non_monotonic_idx.argmin() == 1
assert non_monotonic_idx.argmax() == 0
def test_sort_values(self, non_monotonic_idx):
idx = non_monotonic_idx
ordered = idx.sort_values()
assert ordered.is_monotonic_increasing
ordered = idx.sort_values(ascending=False)
assert ordered[::-1].is_monotonic_increasing
ordered, dexer = idx.sort_values(return_indexer=True)
assert ordered.is_monotonic_increasing
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
assert ordered[::-1].is_monotonic_increasing
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
def check_sort_values_with_freq(self, idx):
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
check_freq_ascending(ordered, idx, True)
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
check_freq_ascending(ordered, idx, False)
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
check_freq_ascending(ordered, idx, True)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
check_freq_ascending(ordered, idx, False)
@pytest.mark.parametrize("freq", ["D", "h"])
def test_sort_values_with_freq_timedeltaindex(self, freq):
# GH#10295
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
self.check_sort_values_with_freq(idx)
@pytest.mark.parametrize(
"idx",
[
DatetimeIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
),
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
name="tzidx",
tz="Asia/Tokyo",
),
],
)
def test_sort_values_with_freq_datetimeindex(self, idx):
self.check_sort_values_with_freq(idx)
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
def test_sort_values_with_freq_periodindex(self, freq):
# here with_freq refers to being period_range-like
idx = PeriodIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
)
self.check_sort_values_with_freq(idx)
@pytest.mark.parametrize(
"idx",
[
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"),
Index([2011, 2012, 2013], name="idx"), # for compatibility check
],
)
def test_sort_values_with_freq_periodindex2(self, idx):
# here with_freq indicates this is period_range-like
self.check_sort_values_with_freq(idx)
def check_sort_values_without_freq(self, idx, expected):
ordered = idx.sort_values(na_position="first")
tm.assert_index_equal(ordered, expected)
check_freq_nonmonotonic(ordered, idx)
if not idx.isna().any():
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
check_freq_nonmonotonic(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
check_freq_nonmonotonic(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, exp)
check_freq_nonmonotonic(ordered, idx)
if not idx.isna().any():
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, exp)
check_freq_nonmonotonic(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, exp)
check_freq_nonmonotonic(ordered, idx)
def test_sort_values_without_freq_timedeltaindex(self):
# GH#10295
idx = TimedeltaIndex(
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
)
expected = TimedeltaIndex(
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
)
self.check_sort_values_without_freq(idx, expected)
@pytest.mark.parametrize(
"index_dates,expected_dates",
[
(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
),
(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
),
(
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
),
],
)
def test_sort_values_without_freq_datetimeindex(
self, index_dates, expected_dates, tz_naive_fixture
):
tz = tz_naive_fixture
# without freq
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
self.check_sort_values_without_freq(idx, expected)
@pytest.mark.parametrize(
"idx,expected",
[
(
PeriodIndex(
[
"2011-01-01",
"2011-01-03",
"2011-01-05",
"2011-01-02",
"2011-01-01",
],
freq="D",
name="idx1",
),
PeriodIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-03",
"2011-01-05",
],
freq="D",
name="idx1",
),
),
(
PeriodIndex(
[
"2011-01-01",
"2011-01-03",
"2011-01-05",
"2011-01-02",
"2011-01-01",
],
freq="D",
name="idx2",
),
PeriodIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-03",
"2011-01-05",
],
freq="D",
name="idx2",
),
),
(
PeriodIndex(
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
freq="D",
name="idx3",
),
PeriodIndex(
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
freq="D",
name="idx3",
),
),
(
PeriodIndex(
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y"
),
PeriodIndex(
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y"
),
),
(
# For compatibility check
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
),
],
)
def test_sort_values_without_freq_periodindex(self, idx, expected):
# here without_freq means not generateable by period_range
self.check_sort_values_without_freq(idx, expected)
def test_sort_values_without_freq_periodindex_nat(self):
# doesn't quite fit into check_sort_values_without_freq
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
ordered = idx.sort_values(na_position="first")
tm.assert_index_equal(ordered, expected)
check_freq_nonmonotonic(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
check_freq_nonmonotonic(ordered, idx)
def test_order_stability_compat():
# GH#35922. sort_values is stable both for normal and datetime-like Index
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y")
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
tm.assert_numpy_array_equal(indexer1, indexer2)

View File

@ -0,0 +1,103 @@
import numpy as np
from pandas import (
DatetimeIndex,
NaT,
PeriodIndex,
Series,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class TestValueCounts:
# GH#7735
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
tz = tz_naive_fixture
orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz)
self._check_value_counts_with_repeats(orig)
def test_value_counts_unique_timedeltaindex(self):
orig = timedelta_range("1 days 09:00:00", freq="h", periods=10)
self._check_value_counts_with_repeats(orig)
def test_value_counts_unique_periodindex(self):
orig = period_range("2011-01-01 09:00", freq="h", periods=10)
self._check_value_counts_with_repeats(orig)
def _check_value_counts_with_repeats(self, orig):
# create repeated values, 'n'th element is repeated by n+1 times
idx = type(orig)(
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
)
exp_idx = orig[::-1]
if not isinstance(exp_idx, PeriodIndex):
exp_idx = exp_idx._with_freq(None)
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
tm.assert_index_equal(idx.unique(), orig)
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
tz = tz_naive_fixture
idx = DatetimeIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
NaT,
],
tz=tz,
)
self._check_value_counts_dropna(idx)
def test_value_counts_unique_timedeltaindex2(self):
idx = TimedeltaIndex(
[
"1 days 09:00:00",
"1 days 09:00:00",
"1 days 09:00:00",
"1 days 08:00:00",
"1 days 08:00:00",
NaT,
]
)
self._check_value_counts_dropna(idx)
def test_value_counts_unique_periodindex2(self):
idx = PeriodIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
NaT,
],
freq="h",
)
self._check_value_counts_dropna(idx)
def _check_value_counts_dropna(self, idx):
exp_idx = idx[[2, 3]]
expected = Series([3, 2], index=exp_idx, name="count")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = idx[[2, 3, -1]]
expected = Series([3, 2, 1], index=exp_idx, name="count")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)

Some files were not shown because too many files have changed in this diff Show More