forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,80 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIndexConstructor:
|
||||
# Tests for the Index constructor, specifically for cases that do
|
||||
# not return a subclass
|
||||
|
||||
@pytest.mark.parametrize("value", [1, np.int64(1)])
|
||||
def test_constructor_corner(self, value):
|
||||
# corner case
|
||||
msg = (
|
||||
r"Index\(\.\.\.\) must be called with a collection of some "
|
||||
f"kind, {value} was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Index(value)
|
||||
|
||||
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
|
||||
def test_construction_list_mixed_tuples(self, index_vals):
|
||||
# see gh-10697: if we are constructing from a mixed list of tuples,
|
||||
# make sure that we are independent of the sorting order.
|
||||
index = Index(index_vals)
|
||||
assert isinstance(index, Index)
|
||||
assert not isinstance(index, MultiIndex)
|
||||
|
||||
def test_constructor_cast(self):
|
||||
msg = "could not convert string to float"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(["a", "b", "c"], dtype=float)
|
||||
|
||||
@pytest.mark.parametrize("tuple_list", [[()], [(), ()]])
|
||||
def test_construct_empty_tuples(self, tuple_list):
|
||||
# GH #45608
|
||||
result = Index(tuple_list)
|
||||
expected = MultiIndex.from_tuples(tuple_list)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_index_string_inference(self):
|
||||
# GH#54430
|
||||
pytest.importorskip("pyarrow")
|
||||
dtype = "string[pyarrow_numpy]"
|
||||
expected = Index(["a", "b"], dtype=dtype)
|
||||
with pd.option_context("future.infer_string", True):
|
||||
ser = Index(["a", "b"])
|
||||
tm.assert_index_equal(ser, expected)
|
||||
|
||||
expected = Index(["a", 1], dtype="object")
|
||||
with pd.option_context("future.infer_string", True):
|
||||
ser = Index(["a", 1])
|
||||
tm.assert_index_equal(ser, expected)
|
||||
|
||||
def test_inference_on_pandas_objects(self):
|
||||
# GH#56012
|
||||
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
result = Index(idx)
|
||||
assert result.dtype != np.object_
|
||||
|
||||
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
result = Index(ser)
|
||||
assert result.dtype != np.object_
|
||||
|
||||
def test_constructor_not_read_only(self):
|
||||
# GH#57130
|
||||
ser = Series([1, 2], dtype=object)
|
||||
with pd.option_context("mode.copy_on_write", True):
|
||||
idx = Index(ser)
|
||||
assert idx._values.flags.writeable
|
@ -0,0 +1,163 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIndexRendering:
|
||||
def test_repr_is_valid_construction_code(self):
|
||||
# for the case of Index, where the repr is traditional rather than
|
||||
# stylized
|
||||
idx = Index(["a", "b"])
|
||||
res = eval(repr(idx))
|
||||
tm.assert_index_equal(res, idx)
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
|
||||
@pytest.mark.parametrize(
|
||||
"index,expected",
|
||||
[
|
||||
# ASCII
|
||||
# short
|
||||
(
|
||||
Index(["a", "bb", "ccc"]),
|
||||
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["a", "bb", "ccc"] * 10),
|
||||
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
||||
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
||||
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
||||
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
||||
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
||||
" dtype='object')",
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["a", "bb", "ccc"] * 100),
|
||||
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
|
||||
" ...\n"
|
||||
" 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
||||
" dtype='object', length=300)",
|
||||
),
|
||||
# Non-ASCII
|
||||
# short
|
||||
(
|
||||
Index(["あ", "いい", "ううう"]),
|
||||
"""Index(['あ', 'いい', 'ううう'], dtype='object')""",
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 10),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう'],\n"
|
||||
" dtype='object')"
|
||||
),
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 100),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ',\n"
|
||||
" ...\n"
|
||||
" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう'],\n"
|
||||
" dtype='object', length=300)"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_string_index_repr(self, index, expected):
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
|
||||
@pytest.mark.parametrize(
|
||||
"index,expected",
|
||||
[
|
||||
# short
|
||||
(
|
||||
Index(["あ", "いい", "ううう"]),
|
||||
("Index(['あ', 'いい', 'ううう'], dtype='object')"),
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 10),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう'],\n"
|
||||
" dtype='object')"
|
||||
""
|
||||
),
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 100),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ',\n"
|
||||
" ...\n"
|
||||
" 'ううう', 'あ', 'いい', 'ううう', 'あ', "
|
||||
"'いい', 'ううう', 'あ', 'いい',\n"
|
||||
" 'ううう'],\n"
|
||||
" dtype='object', length=300)"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_string_index_repr_with_unicode_option(self, index, expected):
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
|
||||
def test_repr_summary(self):
|
||||
with cf.option_context("display.max_seq_items", 10):
|
||||
result = repr(Index(np.arange(1000)))
|
||||
assert len(result) < 200
|
||||
assert "..." in result
|
||||
|
||||
def test_summary_bug(self):
|
||||
# GH#3869
|
||||
ind = Index(["{other}%s", "~:{range}:0"], name="A")
|
||||
result = ind._summary()
|
||||
# shouldn't be formatted accidentally.
|
||||
assert "~:{range}:0" in result
|
||||
assert "{other}%s" in result
|
||||
|
||||
def test_index_repr_bool_nan(self):
|
||||
# GH32146
|
||||
arr = Index([True, False, np.nan], dtype=object)
|
||||
msg = "Index.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
exp1 = arr.format()
|
||||
out1 = ["True", "False", "NaN"]
|
||||
assert out1 == exp1
|
||||
|
||||
exp2 = repr(arr)
|
||||
out2 = "Index([True, False, nan], dtype='object')"
|
||||
assert out2 == exp2
|
||||
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = Index(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
msg = r"Index\.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert idx.format() == expected
|
@ -0,0 +1,104 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetSliceBounds:
|
||||
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
||||
def test_get_slice_bounds_within(self, side, expected):
|
||||
index = Index(list("abcdef"))
|
||||
result = index.get_slice_bound("e", side=side)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("side", ["left", "right"])
|
||||
@pytest.mark.parametrize(
|
||||
"data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
|
||||
)
|
||||
def test_get_slice_bounds_outside(self, side, expected, data, bound):
|
||||
index = Index(data)
|
||||
result = index.get_slice_bound(bound, side=side)
|
||||
assert result == expected
|
||||
|
||||
def test_get_slice_bounds_invalid_side(self):
|
||||
with pytest.raises(ValueError, match="Invalid value for side kwarg"):
|
||||
Index([]).get_slice_bound("a", side="middle")
|
||||
|
||||
|
||||
class TestGetIndexerNonUnique:
|
||||
def test_get_indexer_non_unique_dtype_mismatch(self):
|
||||
# GH#25459
|
||||
indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
|
||||
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values,idx_non_unique",
|
||||
[
|
||||
([np.nan, 100, 200, 100], [np.nan, 100]),
|
||||
([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique):
|
||||
indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan]))
|
||||
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
|
||||
|
||||
indexes, missing = Index(idx_values).get_indexer_non_unique(
|
||||
Index(idx_non_unique)
|
||||
)
|
||||
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
@pytest.mark.slow # to_flat_index takes a while
|
||||
def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
|
||||
# Go through the libindex path for which using
|
||||
# _bin_search vs ndarray.searchsorted makes a difference
|
||||
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
|
||||
lev = list("ABCD")
|
||||
dti = pd.date_range("2016-01-01", periods=10)
|
||||
|
||||
mi = pd.MultiIndex.from_product([lev, range(5), dti])
|
||||
oidx = mi.to_flat_index()
|
||||
|
||||
loc = len(oidx) // 2
|
||||
tup = oidx[loc]
|
||||
|
||||
res = oidx.get_loc(tup)
|
||||
assert res == loc
|
||||
|
||||
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
|
||||
# case that goes through _maybe_get_bool_indexer
|
||||
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
|
||||
|
||||
# we dont raise KeyError on nan
|
||||
res = idx.get_loc(np.nan)
|
||||
assert res == 1
|
||||
|
||||
# we only match on None, not on np.nan
|
||||
res = idx.get_loc(None)
|
||||
expected = np.array([False, False, True, False, False, True])
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
# we don't match at all on mismatched NA
|
||||
with pytest.raises(KeyError, match="NaT"):
|
||||
idx.get_loc(NaT)
|
||||
|
||||
|
||||
def test_getitem_boolean_ea_indexer():
|
||||
# GH#45806
|
||||
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
||||
result = ser.index[ser]
|
||||
expected = Index([0])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,11 @@
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_pickle_preserves_object_dtype():
|
||||
# GH#43188, GH#43155 don't infer numeric dtype
|
||||
index = Index([1, 2, 3], dtype=object)
|
||||
|
||||
result = tm.round_trip_pickle(index)
|
||||
assert result.dtype == object
|
||||
tm.assert_index_equal(index, result)
|
@ -0,0 +1,95 @@
|
||||
"""
|
||||
Tests for ndarray-like method on the base Index class
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReshape:
|
||||
def test_repeat(self):
|
||||
repeats = 2
|
||||
index = Index([1, 2, 3])
|
||||
expected = Index([1, 1, 2, 2, 3, 3])
|
||||
|
||||
result = index.repeat(repeats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert(self):
|
||||
# GH 7256
|
||||
# validate neg/pos inserts
|
||||
result = Index(["b", "c", "d"])
|
||||
|
||||
# test 0th element
|
||||
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
|
||||
|
||||
# test loc +/- neq (0, -1)
|
||||
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
|
||||
|
||||
# test empty
|
||||
null_index = Index([])
|
||||
tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a"))
|
||||
|
||||
def test_insert_missing(self, nulls_fixture, using_infer_string):
|
||||
# GH#22295
|
||||
# test there is no mangling of NA values
|
||||
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
|
||||
result = Index(list("abc"), dtype=object).insert(
|
||||
1, Index([nulls_fixture], dtype=object)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
|
||||
)
|
||||
@pytest.mark.parametrize("loc", [-1, 2])
|
||||
def test_insert_datetime_into_object(self, loc, val):
|
||||
# GH#44509
|
||||
idx = Index(["1", "2", "3"])
|
||||
result = idx.insert(loc, val)
|
||||
expected = Index(["1", "2", val, "3"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert type(expected[2]) is type(val)
|
||||
|
||||
def test_insert_none_into_string_numpy(self):
|
||||
# GH#55365
|
||||
pytest.importorskip("pyarrow")
|
||||
index = Index(["a", "b", "c"], dtype="string[pyarrow_numpy]")
|
||||
result = index.insert(-1, None)
|
||||
expected = Index(["a", "b", None, "c"], dtype="string[pyarrow_numpy]")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pos,expected",
|
||||
[
|
||||
(0, Index(["b", "c", "d"], name="index")),
|
||||
(-1, Index(["a", "b", "c"], name="index")),
|
||||
],
|
||||
)
|
||||
def test_delete(self, pos, expected):
|
||||
index = Index(["a", "b", "c", "d"], name="index")
|
||||
result = index.delete(pos)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
def test_delete_raises(self):
|
||||
index = Index(["a", "b", "c", "d"], name="index")
|
||||
msg = "index 5 is out of bounds for axis 0 with size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.delete(5)
|
||||
|
||||
def test_append_multiple(self):
|
||||
index = Index(["a", "b", "c", "d", "e", "f"])
|
||||
|
||||
foos = [index[:2], index[2:4], index[4:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# empty
|
||||
result = index.append([])
|
||||
tm.assert_index_equal(result, index)
|
@ -0,0 +1,266 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.algorithms import safe_sort
|
||||
|
||||
|
||||
def equal_contents(arr1, arr2) -> bool:
|
||||
"""
|
||||
Checks if the set of unique elements of arr1 and arr2 are equivalent.
|
||||
"""
|
||||
return frozenset(arr1) == frozenset(arr2)
|
||||
|
||||
|
||||
class TestIndexSetOps:
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_sort_validation(self, method):
|
||||
idx1 = Index(["a", "b"])
|
||||
idx2 = Index(["b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=2)
|
||||
|
||||
# sort=True is supported as of GH#??
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
|
||||
def test_setops_preserve_object_dtype(self):
|
||||
idx = Index([1, 2, 3], dtype=object)
|
||||
result = idx.intersection(idx[1:])
|
||||
expected = idx[1:]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# if other is not monotonic increasing, intersection goes through
|
||||
# a different route
|
||||
result = idx.intersection(idx[1:][::-1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx._union(idx[1:], sort=None)
|
||||
expected = idx
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
result = idx.union(idx[1:], sort=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# if other is not monotonic increasing, _union goes through
|
||||
# a different route
|
||||
result = idx._union(idx[1:][::-1], sort=None)
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
result = idx.union(idx[1:][::-1], sort=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_union_base(self):
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[3:]
|
||||
second = index[:5]
|
||||
|
||||
result = first.union(second)
|
||||
|
||||
expected = Index([0, 1, 2, "a", "b", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
||||
def test_union_different_type_base(self, klass):
|
||||
# GH 10149
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[3:]
|
||||
second = index[:5]
|
||||
|
||||
result = first.union(klass(second.values))
|
||||
|
||||
assert equal_contents(result, index)
|
||||
|
||||
def test_union_sort_other_incomparable(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = Index([1, pd.Timestamp("2000")])
|
||||
# default (sort=None)
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1])
|
||||
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=None
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1], sort=None)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
def test_union_sort_other_incomparable_true(self):
|
||||
idx = Index([1, pd.Timestamp("2000")])
|
||||
with pytest.raises(TypeError, match=".*"):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
def test_intersection_equal_sort_true(self):
|
||||
idx = Index(["c", "a", "b"])
|
||||
sorted_ = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
|
||||
|
||||
def test_intersection_base(self, sort):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:5]
|
||||
second = index[:3]
|
||||
|
||||
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
|
||||
result = first.intersection(second, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
||||
def test_intersection_different_type_base(self, klass, sort):
|
||||
# GH 10149
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:5]
|
||||
second = index[:3]
|
||||
|
||||
result = first.intersection(klass(second.values), sort=sort)
|
||||
assert equal_contents(result, second)
|
||||
|
||||
def test_intersection_nosort(self):
|
||||
result = Index(["c", "b", "a"]).intersection(["b", "a"])
|
||||
expected = Index(["b", "a"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_equal_sort(self):
|
||||
idx = Index(["c", "a", "b"])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
def test_intersection_str_dates(self, sort):
|
||||
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
|
||||
|
||||
i1 = Index(dt_dates, dtype=object)
|
||||
i2 = Index(["aa"], dtype=object)
|
||||
result = i2.intersection(i1, sort=sort)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index2,expected_arr",
|
||||
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
|
||||
)
|
||||
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
|
||||
# non-monotonic non-unique
|
||||
index1 = Index(["A", "B", "A", "C"])
|
||||
expected = Index(expected_arr)
|
||||
result = index1.intersection(index2, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference_base(self, sort):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:4]
|
||||
second = index[3:]
|
||||
|
||||
result = first.difference(second, sort)
|
||||
expected = Index([0, "a", 1])
|
||||
if sort is None:
|
||||
expected = Index(safe_sort(expected))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:4]
|
||||
second = index[3:]
|
||||
|
||||
result = first.symmetric_difference(second)
|
||||
expected = Index([0, 1, 2, "a", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method,expected,sort",
|
||||
[
|
||||
(
|
||||
"intersection",
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
),
|
||||
False,
|
||||
),
|
||||
(
|
||||
"intersection",
|
||||
np.array(
|
||||
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
"union",
|
||||
np.array(
|
||||
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
),
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_tuple_union_bug(self, method, expected, sort):
|
||||
index1 = Index(
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
)
|
||||
)
|
||||
index2 = Index(
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
)
|
||||
)
|
||||
|
||||
result = getattr(index1, method)(index2, sort=sort)
|
||||
assert result.ndim == 1
|
||||
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("first_list", [["b", "a"], []])
|
||||
@pytest.mark.parametrize("second_list", [["a", "b"], []])
|
||||
@pytest.mark.parametrize(
|
||||
"first_name, second_name, expected_name",
|
||||
[("A", "B", None), (None, "B", None), ("A", None, None)],
|
||||
)
|
||||
def test_union_name_preservation(
|
||||
self, first_list, second_list, first_name, second_name, expected_name, sort
|
||||
):
|
||||
first = Index(first_list, name=first_name)
|
||||
second = Index(second_list, name=second_name)
|
||||
union = first.union(second, sort=sort)
|
||||
|
||||
vals = set(first_list).union(second_list)
|
||||
|
||||
if sort is None and len(first_list) > 0 and len(second_list) > 0:
|
||||
expected = Index(sorted(vals), name=expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
else:
|
||||
expected = Index(vals, name=expected_name)
|
||||
tm.assert_index_equal(union.sort_values(), expected.sort_values())
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"diff_type, expected",
|
||||
[["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
|
||||
)
|
||||
def test_difference_object_type(self, diff_type, expected):
|
||||
# GH 13432
|
||||
idx1 = Index([0, 1, "A", "B"])
|
||||
idx2 = Index([0, 2, "A", "C"])
|
||||
result = getattr(idx1, diff_type)(idx2)
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_intlike_str_doesnt_cast_ints(self):
|
||||
idx = Index(range(3))
|
||||
mask = np.array([True, False, True])
|
||||
res = idx.where(mask, "2")
|
||||
expected = Index([0, "2", 2])
|
||||
tm.assert_index_equal(res, expected)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,62 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAppend:
|
||||
@pytest.fixture
|
||||
def ci(self):
|
||||
categories = list("cab")
|
||||
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
|
||||
|
||||
def test_append(self, ci):
|
||||
# append cats with the same categories
|
||||
result = ci[:3].append(ci[3:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
foos = [ci[:1], ci[1:3], ci[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_empty(self, ci):
|
||||
# empty
|
||||
result = ci.append([])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_mismatched_categories(self, ci):
|
||||
# appending with different categories or reordered is not ok
|
||||
msg = "all inputs must be Index"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.set_categories(list("abcd")))
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.reorder_categories(list("abc")))
|
||||
|
||||
def test_append_category_objects(self, ci):
|
||||
# with objects
|
||||
result = ci.append(Index(["c", "a"]))
|
||||
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_non_categories(self, ci):
|
||||
# invalid objects -> cast to object via concat_compat
|
||||
result = ci.append(Index(["a", "d"]))
|
||||
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_object(self, ci):
|
||||
# GH#14298 - if base object is not categorical -> coerce to object
|
||||
result = Index(["c", "a"]).append(ci)
|
||||
expected = Index(list("caaabbca"))
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_to_another(self):
|
||||
# hits Index._concat
|
||||
fst = Index(["a", "b"])
|
||||
snd = CategoricalIndex(["d", "e"])
|
||||
result = fst.append(snd)
|
||||
expected = Index(["a", "b", "d", "e"])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,90 @@
|
||||
from datetime import date
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = ci.astype(object)
|
||||
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))
|
||||
|
||||
# this IS equal, but not the same class
|
||||
assert result.equals(ci)
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
# interval
|
||||
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
|
||||
|
||||
ci = CategoricalIndex(
|
||||
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
|
||||
)
|
||||
|
||||
result = ci.astype("interval")
|
||||
expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = IntervalIndex(result.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("index_ordered", [True, False])
|
||||
def test_astype_category(self, name, dtype_ordered, index_ordered):
|
||||
# GH#18630
|
||||
index = CategoricalIndex(
|
||||
list("aabbca"), categories=list("cab"), ordered=index_ordered
|
||||
)
|
||||
if name:
|
||||
index = index.rename(name)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(
|
||||
index.tolist(),
|
||||
name=name,
|
||||
categories=index.categories,
|
||||
ordered=dtype_ordered,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = index.astype("category")
|
||||
expected = index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False])
|
||||
def test_categorical_date_roundtrip(self, box):
|
||||
# astype to categorical and back should preserve date objects
|
||||
v = date.today()
|
||||
|
||||
obj = Index([v, v])
|
||||
assert obj.dtype == object
|
||||
if box:
|
||||
obj = obj.array
|
||||
|
||||
cat = obj.astype("category")
|
||||
|
||||
rtrip = cat.astype(object)
|
||||
assert rtrip.dtype == object
|
||||
assert type(rtrip[0]) is date
|
@ -0,0 +1,394 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas._libs.arrays import NDArrayBacked
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.api import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
@pytest.fixture
|
||||
def simple_index(self) -> CategoricalIndex:
|
||||
return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
def test_can_hold_identifiers(self):
|
||||
idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
def test_insert(self, simple_index):
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
# test 0th element
|
||||
result = ci.insert(0, "a")
|
||||
expected = CategoricalIndex(list("aaabbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
result = ci.insert(-1, "a")
|
||||
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test empty
|
||||
result = CategoricalIndex([], categories=categories).insert(0, "a")
|
||||
expected = CategoricalIndex(["a"], categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# invalid -> cast to object
|
||||
expected = ci.astype(object).insert(0, "d")
|
||||
result = ci.insert(0, "d").astype(object)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
|
||||
for na in (np.nan, pd.NaT, None):
|
||||
result = CategoricalIndex(list("aabcb")).insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_na_mismatched_dtype(self):
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.insert(0, pd.NaT)
|
||||
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_delete(self, simple_index):
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
result = ci.delete(0)
|
||||
expected = CategoricalIndex(list("abbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
result = ci.delete(-1)
|
||||
expected = CategoricalIndex(list("aabbc"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
with tm.external_error_raised((IndexError, ValueError)):
|
||||
# Either depending on NumPy version
|
||||
ci.delete(10)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, non_lexsorted_data",
|
||||
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
|
||||
)
|
||||
def test_is_monotonic(self, data, non_lexsorted_data):
|
||||
c = CategoricalIndex(data)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, ordered=True)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
# test when data is neither monotonic increasing nor decreasing
|
||||
reordered_data = [data[0], data[2], data[1]]
|
||||
c = CategoricalIndex(reordered_data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
# non lexsorted categories
|
||||
categories = non_lexsorted_data
|
||||
|
||||
c = CategoricalIndex(categories[:2], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(categories[1:3], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
def test_has_duplicates(self):
|
||||
idx = CategoricalIndex([0, 0, 0], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected",
|
||||
[
|
||||
(
|
||||
[1, 1, 1],
|
||||
[1, 2, 3],
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[1, 1, 1],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[2, "a", "b"],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.zeros(shape=(3), dtype=np.bool_),
|
||||
"last": np.zeros(shape=(3), dtype=np.bool_),
|
||||
False: np.zeros(shape=(3), dtype=np.bool_),
|
||||
},
|
||||
),
|
||||
(
|
||||
list("abb"),
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, False, True]),
|
||||
"last": np.array([False, True, False]),
|
||||
False: np.array([False, True, True]),
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, data, categories, expected):
|
||||
idx = CategoricalIndex(data, categories=categories, name="foo")
|
||||
for keep, e in expected.items():
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
|
||||
e = idx[~e]
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, e)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected_data",
|
||||
[
|
||||
([1, 1, 1], [1, 2, 3], [1]),
|
||||
([1, 1, 1], list("abc"), [np.nan]),
|
||||
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
|
||||
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
|
||||
],
|
||||
)
|
||||
def test_unique(self, data, categories, expected_data, ordered):
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
|
||||
idx = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(expected_data, dtype=dtype)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr doesn't roundtrip")
|
||||
def test_repr_roundtrip(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
str(ci)
|
||||
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
|
||||
|
||||
# formatting
|
||||
str(ci)
|
||||
|
||||
# long format
|
||||
# this is not reprable
|
||||
ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
|
||||
str(ci)
|
||||
|
||||
def test_isin(self):
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c"]), np.array([False, False, False, True, False, False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
|
||||
)
|
||||
|
||||
# mismatched categorical -> coerced to ndarray so doesn't matter
|
||||
result = ci.isin(ci.set_categories(list("abcdefghi")))
|
||||
expected = np.array([True] * 6)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = ci.isin(ci.set_categories(list("defghi")))
|
||||
expected = np.array([False] * 5 + [True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_isin_overlapping_intervals(self):
|
||||
# GH 34974
|
||||
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
|
||||
result = CategoricalIndex(idx).isin(idx)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_identical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
assert ci1.identical(ci1)
|
||||
assert ci1.identical(ci1.copy())
|
||||
assert not ci1.identical(ci2)
|
||||
|
||||
def test_ensure_copied_data(self):
|
||||
# gh-12309: Check the "copy" argument of each
|
||||
# Index.__new__ is honored.
|
||||
#
|
||||
# Must be tested separately from other indexes because
|
||||
# self.values is not an ndarray.
|
||||
index = CategoricalIndex(list("ab") * 5)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=True)
|
||||
tm.assert_index_equal(index, result)
|
||||
assert not np.shares_memory(result._data._codes, index._data._codes)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=False)
|
||||
assert result._data._codes is index._data._codes
|
||||
|
||||
|
||||
class TestCategoricalIndex2:
|
||||
def test_view_i8(self):
|
||||
# GH#25464
|
||||
ci = CategoricalIndex(list("ab") * 50)
|
||||
msg = "When changing to a larger dtype, its size must be a divisor"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.view("i8")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci._data.view("i8")
|
||||
|
||||
ci = ci[:-4] # length divisible by 8
|
||||
|
||||
res = ci.view("i8")
|
||||
expected = ci._data.codes.view("i8")
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
cat = ci._data
|
||||
tm.assert_numpy_array_equal(cat.view("i8"), expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, engine_type",
|
||||
[
|
||||
(np.int8, libindex.Int8Engine),
|
||||
(np.int16, libindex.Int16Engine),
|
||||
(np.int32, libindex.Int32Engine),
|
||||
(np.int64, libindex.Int64Engine),
|
||||
],
|
||||
)
|
||||
def test_engine_type(self, dtype, engine_type):
|
||||
if dtype != np.int64:
|
||||
# num. of uniques required to push CategoricalIndex.codes to a
|
||||
# dtype (128 categories required for .codes dtype to be int16 etc.)
|
||||
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
|
||||
ci = CategoricalIndex(range(num_uniques))
|
||||
else:
|
||||
# having 2**32 - 2**31 categories would be very memory-intensive,
|
||||
# so we cheat a bit with the dtype
|
||||
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
|
||||
arr = ci.values._ndarray.astype("int64")
|
||||
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
|
||||
assert np.issubdtype(ci.codes.dtype, dtype)
|
||||
assert isinstance(ci._engine, engine_type)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,op_name",
|
||||
[
|
||||
(lambda idx: idx - idx, "__sub__"),
|
||||
(lambda idx: idx + idx, "__add__"),
|
||||
(lambda idx: idx - ["a", "b"], "__sub__"),
|
||||
(lambda idx: idx + ["a", "b"], "__add__"),
|
||||
(lambda idx: ["a", "b"] - idx, "__rsub__"),
|
||||
(lambda idx: ["a", "b"] + idx, "__radd__"),
|
||||
],
|
||||
)
|
||||
def test_disallow_addsub_ops(self, func, op_name):
|
||||
# GH 10039
|
||||
# set ops (+/-) raise TypeError
|
||||
idx = Index(Categorical(["a", "b"]))
|
||||
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
|
||||
msg = "|".join(
|
||||
[
|
||||
f"cannot perform {op_name} with this index type: CategoricalIndex",
|
||||
"can only concatenate list",
|
||||
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
||||
|
||||
def test_method_delegation(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.set_categories(list("cab"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.rename_categories(list("efg"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
|
||||
)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = ci.rename_categories(lambda x: x.upper())
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.add_categories(["d"])
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_unordered()
|
||||
tm.assert_index_equal(result, ci)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_ordered()
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
|
||||
)
|
||||
|
||||
# invalid
|
||||
msg = "cannot use inplace with CategoricalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.set_categories(list("cab"), inplace=True)
|
||||
|
||||
def test_remove_maintains_order(self):
|
||||
ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
|
||||
result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
|
||||
)
|
||||
result = result.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(
|
||||
["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
|
||||
),
|
||||
)
|
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexConstructors:
|
||||
def test_construction_disallows_scalar(self):
|
||||
msg = "must be called with a collection of some kind"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(categories=list("abcd"), ordered=False)
|
||||
|
||||
def test_construction(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
|
||||
categories = ci.categories
|
||||
|
||||
result = Index(ci)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
result = Index(ci.values)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
# empty
|
||||
result = CategoricalIndex([], categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
|
||||
assert not result.ordered
|
||||
|
||||
# passing categories
|
||||
result = CategoricalIndex(list("aabbca"), categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
|
||||
c = Categorical(list("aabbca"))
|
||||
result = CategoricalIndex(c)
|
||||
tm.assert_index_equal(result.categories, Index(list("abc")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(c, categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
ci = CategoricalIndex(c, categories=list("abcd"))
|
||||
result = CategoricalIndex(ci)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"))
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
expected = CategoricalIndex(
|
||||
ci, categories=list("ab"), ordered=True, dtype="category"
|
||||
)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# turn me to an Index
|
||||
result = Index(np.array(ci))
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
def test_construction_with_dtype(self):
|
||||
# specify dtype
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
result = Index(np.array(ci).tolist(), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# these are generally only equal when the categories are reordered
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# make sure indexes are handled
|
||||
idx = Index(range(3))
|
||||
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
|
||||
result = CategoricalIndex(idx, categories=idx, ordered=True)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_construction_empty_with_bool_categories(self):
|
||||
# see GH#22702
|
||||
cat = CategoricalIndex([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_construction_with_categorical_dtype(self):
|
||||
# construction with CategoricalDtype
|
||||
# GH#18109
|
||||
data, cats, ordered = "a a b b".split(), "c b a".split(), True
|
||||
dtype = CategoricalDtype(categories=cats, ordered=ordered)
|
||||
|
||||
result = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH#19032
|
||||
result = Index(data, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# error when combining categories/ordered and dtype kwargs
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, categories=cats, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, ordered=ordered, dtype=dtype)
|
@ -0,0 +1,96 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals_categorical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
|
||||
assert ci1.equals(ci1)
|
||||
assert not ci1.equals(ci2)
|
||||
assert ci1.equals(ci1.astype(object))
|
||||
assert ci1.astype(object).equals(ci1)
|
||||
|
||||
assert (ci1 == ci1).all()
|
||||
assert not (ci1 != ci1).all()
|
||||
assert not (ci1 > ci1).all()
|
||||
assert not (ci1 < ci1).all()
|
||||
assert (ci1 <= ci1).all()
|
||||
assert (ci1 >= ci1).all()
|
||||
|
||||
assert not (ci1 == 1).all()
|
||||
assert (ci1 == Index(["a", "b"])).all()
|
||||
assert (ci1 == ci1.values).all()
|
||||
|
||||
# invalid comparisons
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
ci1 == Index(["a", "b", "c"])
|
||||
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == ci2
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, categories=list("abc"))
|
||||
|
||||
# tests
|
||||
# make sure that we are testing for category inclusion properly
|
||||
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
# Same categories, but different order
|
||||
# Unordered
|
||||
assert ci.equals(CategoricalIndex(list("aabca")))
|
||||
# Ordered
|
||||
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca")))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca") + [np.nan])
|
||||
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
def test_equals_categorical_unordered(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603
|
||||
a = CategoricalIndex(["A"], categories=["A", "B"])
|
||||
b = CategoricalIndex(["A"], categories=["B", "A"])
|
||||
c = CategoricalIndex(["C"], categories=["B", "A"])
|
||||
assert a.equals(b)
|
||||
assert not a.equals(c)
|
||||
assert not b.equals(c)
|
||||
|
||||
def test_equals_non_category(self):
|
||||
# GH#37667 Case where other contains a value not among ci's
|
||||
# categories ("D") and also contains np.nan
|
||||
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
|
||||
other = Index(["A", "B", "D", np.nan])
|
||||
|
||||
assert not ci.equals(other)
|
||||
|
||||
def test_equals_multiindex(self):
|
||||
# dont raise NotImplementedError when calling is_dtype_compat
|
||||
|
||||
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
|
||||
ci = mi.to_flat_index().astype("category")
|
||||
|
||||
assert not ci.equals(mi)
|
||||
|
||||
def test_equals_string_dtype(self, any_string_dtype):
|
||||
# GH#55364
|
||||
idx = CategoricalIndex(list("abc"), name="B")
|
||||
other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
|
||||
assert idx.equals(other)
|
@ -0,0 +1,54 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFillNA:
|
||||
def test_fillna_categorical(self):
|
||||
# GH#11343
|
||||
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
|
||||
# fill by value in categories
|
||||
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
|
||||
tm.assert_index_equal(idx.fillna(1.0), exp)
|
||||
|
||||
cat = idx._data
|
||||
|
||||
# fill by value not in categories raises TypeError on EA, casts on CI
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(2.0)
|
||||
|
||||
result = idx.fillna(2.0)
|
||||
expected = idx.astype(object).fillna(2.0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_fillna_copies_with_no_nas(self):
|
||||
# Nothing to fill, should still get a copy for the Categorical method,
|
||||
# but OK to get a view on CategoricalIndex method
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.fillna(0)
|
||||
assert result is not ci
|
||||
assert tm.shares_memory(result, ci)
|
||||
|
||||
# But at the EA level we always get a copy.
|
||||
cat = ci._data
|
||||
result = cat.fillna(0)
|
||||
assert result._ndarray is not cat._ndarray
|
||||
assert result._ndarray.base is None
|
||||
assert not tm.shares_memory(result, cat)
|
||||
|
||||
def test_fillna_validates_with_no_nas(self):
|
||||
# We validate the fill value even if fillna is a no-op
|
||||
ci = CategoricalIndex([2, 3, 3])
|
||||
cat = ci._data
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
res = ci.fillna(False)
|
||||
# nothing to fill, so we dont cast
|
||||
tm.assert_index_equal(res, ci)
|
||||
|
||||
# Same check directly on the Categorical
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(False)
|
@ -0,0 +1,120 @@
|
||||
"""
|
||||
Tests for CategoricalIndex.__repr__ and related methods.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexRepr:
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = CategoricalIndex(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
msg = r"CategoricalIndex\.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert idx.format() == expected
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
|
||||
def test_string_categorical_index_repr(self):
|
||||
# short
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"])
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
...
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("abcdefghijklmmo"))
|
||||
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
|
||||
'm', 'm', 'o'],
|
||||
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
|
||||
'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
|
||||
'さ', 'し', 'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
@ -0,0 +1,420 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take_fill_value(self):
|
||||
# GH 12631
|
||||
|
||||
# numeric category
|
||||
idx = CategoricalIndex([1, 2, 3], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# object category
|
||||
idx = CategoricalIndex(
|
||||
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_fill_value_datetime(self):
|
||||
# datetime category
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
||||
idx = CategoricalIndex(idx)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
||||
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
|
||||
expected = CategoricalIndex(expected, categories=exp_cats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = CategoricalIndex([1, 2, 3], name="foo")
|
||||
indices = [1, 0, -1]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
def test_get_loc(self):
|
||||
# GH 12531
|
||||
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
|
||||
idx1 = Index(list("abcde"))
|
||||
assert cidx1.get_loc("a") == idx1.get_loc("a")
|
||||
assert cidx1.get_loc("e") == idx1.get_loc("e")
|
||||
|
||||
for i in [cidx1, idx1]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique
|
||||
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
|
||||
idx2 = Index(list("aacded"))
|
||||
|
||||
# results in bool array
|
||||
res = cidx2.get_loc("d")
|
||||
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
|
||||
tm.assert_numpy_array_equal(
|
||||
res, np.array([False, False, False, True, False, True])
|
||||
)
|
||||
# unique element results in scalar
|
||||
res = cidx2.get_loc("e")
|
||||
assert res == idx2.get_loc("e")
|
||||
assert res == 4
|
||||
|
||||
for i in [cidx2, idx2]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique, sliceable
|
||||
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
|
||||
idx3 = Index(list("aabbb"))
|
||||
|
||||
# results in slice
|
||||
res = cidx3.get_loc("a")
|
||||
assert res == idx3.get_loc("a")
|
||||
assert res == slice(0, 2, None)
|
||||
|
||||
res = cidx3.get_loc("b")
|
||||
assert res == idx3.get_loc("b")
|
||||
assert res == slice(2, 5, None)
|
||||
|
||||
for i in [cidx3, idx3]:
|
||||
with pytest.raises(KeyError, match="'c'"):
|
||||
i.get_loc("c")
|
||||
|
||||
def test_get_loc_unique(self):
|
||||
cidx = CategoricalIndex(list("abc"))
|
||||
result = cidx.get_loc("b")
|
||||
assert result == 1
|
||||
|
||||
def test_get_loc_monotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abbc"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = slice(1, 3, None)
|
||||
assert result == expected
|
||||
|
||||
def test_get_loc_nonmonotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abcb"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = np.array([False, True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_loc_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
res = ci.get_loc(np.nan)
|
||||
|
||||
assert res == 2
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
def test_get_indexer_base(self):
|
||||
# Determined by cat ordering.
|
||||
idx = CategoricalIndex(list("cab"), categories=list("cab"))
|
||||
expected = np.arange(len(idx), dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
def test_get_indexer_requires_unique(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
oidx = Index(np.array(ci))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
|
||||
for n in [1, 2, 5, len(ci)]:
|
||||
finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
|
||||
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
# see gh-17323
|
||||
#
|
||||
# Even when indexer is equal to the
|
||||
# members in the index, we should
|
||||
# respect duplicates instead of taking
|
||||
# the fast-track path.
|
||||
for finder in [list("aabbca"), list("aababca")]:
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
def test_get_indexer_non_unique(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
for indexer in [idx2, list("abf"), Index(list("abf"))]:
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(indexer)
|
||||
|
||||
r1, _ = idx1.get_indexer_non_unique(indexer)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_almost_equal(r1, expected)
|
||||
|
||||
def test_get_indexer_method(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
msg = "method pad not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="pad")
|
||||
msg = "method backfill not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="backfill")
|
||||
|
||||
msg = "method nearest not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="nearest")
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array(
|
||||
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
|
||||
dtype=object,
|
||||
)
|
||||
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
|
||||
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_nans_in_index_and_target(self):
|
||||
# GH 45361
|
||||
ci = CategoricalIndex([1, 2, np.nan, 3])
|
||||
other1 = [2, 3, 4, np.nan]
|
||||
res1 = ci.get_indexer(other1)
|
||||
expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res1, expected1)
|
||||
other2 = [1, 4, 2, 3]
|
||||
res2 = ci.get_indexer(other2)
|
||||
expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res2, expected2)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
cond = [True] * len(i)
|
||||
expected = i
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * (len(i) - 1)
|
||||
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_non_categories(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "d"])
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
result = ci.where(mask, 2)
|
||||
expected = Index(["a", 2, "c", 2], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# Test the Categorical method directly
|
||||
ci._data._where(mask, 2)
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
|
||||
|
||||
assert "a" in ci
|
||||
assert "z" not in ci
|
||||
assert "e" not in ci
|
||||
assert np.nan not in ci
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in ci
|
||||
assert 1 not in ci
|
||||
|
||||
def test_contains_nan(self):
|
||||
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
|
||||
assert np.nan in ci
|
||||
|
||||
@pytest.mark.parametrize("unwrap", [True, False])
|
||||
def test_contains_na_dtype(self, unwrap):
|
||||
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
|
||||
pi = dti.to_period("D")
|
||||
tdi = dti - dti[-1]
|
||||
ci = CategoricalIndex(dti)
|
||||
|
||||
obj = ci
|
||||
if unwrap:
|
||||
obj = ci._data
|
||||
|
||||
assert np.nan in obj
|
||||
assert None in obj
|
||||
assert pd.NaT in obj
|
||||
assert np.datetime64("NaT") in obj
|
||||
assert np.timedelta64("NaT") not in obj
|
||||
|
||||
obj2 = CategoricalIndex(tdi)
|
||||
if unwrap:
|
||||
obj2 = obj2._data
|
||||
|
||||
assert np.nan in obj2
|
||||
assert None in obj2
|
||||
assert pd.NaT in obj2
|
||||
assert np.datetime64("NaT") not in obj2
|
||||
assert np.timedelta64("NaT") in obj2
|
||||
|
||||
obj3 = CategoricalIndex(pi)
|
||||
if unwrap:
|
||||
obj3 = obj3._data
|
||||
|
||||
assert np.nan in obj3
|
||||
assert None in obj3
|
||||
assert pd.NaT in obj3
|
||||
assert np.datetime64("NaT") not in obj3
|
||||
assert np.timedelta64("NaT") not in obj3
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(pd.Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(pd.Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(Timestamp(1), False),
|
||||
(pd.Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH 23705
|
||||
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
|
||||
result = item in ci
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
idx = CategoricalIndex([1, 2, 3])
|
||||
|
||||
assert "a" not in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in idx
|
@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories",
|
||||
[
|
||||
(list("abcbca"), list("cab")),
|
||||
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
||||
],
|
||||
ids=["string", "interval"],
|
||||
)
|
||||
def test_map_str(data, categories, ordered):
|
||||
# GH 31202 - override base class since we want to maintain categorical/ordered
|
||||
index = CategoricalIndex(data, categories=categories, ordered=ordered)
|
||||
result = index.map(str)
|
||||
expected = CategoricalIndex(
|
||||
map(str, data), categories=map(str, categories), ordered=ordered
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map():
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
ci = CategoricalIndex(
|
||||
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
|
||||
)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(
|
||||
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(
|
||||
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
|
||||
)
|
||||
|
||||
# change categories dtype
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
|
||||
def f(x):
|
||||
return {"A": 10, "B": 20, "C": 30}.get(x)
|
||||
|
||||
result = ci.map(f)
|
||||
exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map({"A": 10, "B": 20, "C": 30})
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
def test_map_with_categorical_series():
|
||||
# GH 12756
|
||||
a = Index([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(b), exp)
|
||||
exp = Index(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(c), exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
|
||||
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
CategoricalIndex([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_ignore(data, f, expected): # GH 24241
|
||||
values = CategoricalIndex(data)
|
||||
result = values.map(f, na_action="ignore")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
CategoricalIndex([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_none(data, f, expected): # GH 24241
|
||||
values = CategoricalIndex(data)
|
||||
result = values.map(f, na_action=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_with_dict_or_series():
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cur_index = CategoricalIndex(orig_values, name="XXX")
|
||||
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
|
||||
|
||||
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReindex:
|
||||
def test_reindex_list_non_unique(self):
|
||||
# GH#11586
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(["a", "c"])
|
||||
|
||||
def test_reindex_categorical_non_unique(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
def test_reindex_list_non_unique_unused_category(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(["a", "c"])
|
||||
|
||||
def test_reindex_categorical_non_unique_unused_category(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
def test_reindex_duplicate_target(self):
|
||||
# See GH25459
|
||||
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
res, indexer = cat.reindex(["a", "c", "c"])
|
||||
exp = Index(["a", "c", "c"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
res, indexer = cat.reindex(
|
||||
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
)
|
||||
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_empty_index(self):
|
||||
# See GH16770
|
||||
c = CategoricalIndex([])
|
||||
res, indexer = c.reindex(["a", "b"])
|
||||
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
|
||||
|
||||
def test_reindex_categorical_added_category(self):
|
||||
# GH 42424
|
||||
ci = CategoricalIndex(
|
||||
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
|
||||
ordered=True,
|
||||
)
|
||||
ci_add = CategoricalIndex(
|
||||
[
|
||||
Interval(0, 1, closed="right"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="right"),
|
||||
Interval(3, 4, closed="right"),
|
||||
],
|
||||
ordered=True,
|
||||
)
|
||||
result, _ = ci.reindex(ci_add)
|
||||
expected = ci_add
|
||||
tm.assert_index_equal(expected, result)
|
@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_value", [None, np.nan])
|
||||
def test_difference_with_na(na_value):
|
||||
# GH 57318
|
||||
ci = CategoricalIndex(["a", "b", "c", None])
|
||||
other = Index(["c", na_value])
|
||||
result = ci.difference(other)
|
||||
expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, False])
|
||||
def sort(request):
|
||||
"""
|
||||
Valid values for the 'sort' parameter used in the Index
|
||||
setops methods (intersection, union, etc.)
|
||||
|
||||
Caution:
|
||||
Don't confuse this one with the "sort" fixture used
|
||||
for DataFrame.append or concat. That one has
|
||||
parameters [True, False].
|
||||
|
||||
We can't combine them as sort=True is not permitted
|
||||
in the Index setops methods.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["D", "3D", "-3D", "h", "2h", "-2h", "min", "2min", "s", "-3s"])
|
||||
def freq_sample(request):
|
||||
"""
|
||||
Valid values for 'freq' parameter used to create date_range and
|
||||
timedelta_range..
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[list, tuple, np.array, array, Series])
|
||||
def listlike_box(request):
|
||||
"""
|
||||
Types that may be passed as the indexer to searchsorted.
|
||||
"""
|
||||
return request.param
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
PeriodIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class DropDuplicates:
|
||||
def test_drop_duplicates_metadata(self, idx):
|
||||
# GH#10115
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx)
|
||||
result = idx_dup.drop_duplicates()
|
||||
|
||||
expected = idx
|
||||
if not isinstance(idx, PeriodIndex):
|
||||
# freq is reset except for PeriodIndex
|
||||
assert idx_dup.freq is None
|
||||
assert result.freq is None
|
||||
expected = idx._with_freq(None)
|
||||
else:
|
||||
assert result.freq == expected.freq
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected, index",
|
||||
[
|
||||
(
|
||||
"first",
|
||||
np.concatenate(([False] * 10, [True] * 5)),
|
||||
np.arange(0, 10, dtype=np.int64),
|
||||
),
|
||||
(
|
||||
"last",
|
||||
np.concatenate(([True] * 5, [False] * 10)),
|
||||
np.arange(5, 15, dtype=np.int64),
|
||||
),
|
||||
(
|
||||
False,
|
||||
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
||||
np.arange(5, 10, dtype=np.int64),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, keep, expected, index, idx):
|
||||
# to check Index/Series compat
|
||||
idx = idx.append(idx[:5])
|
||||
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
||||
expected = idx[~expected]
|
||||
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Series(idx).drop_duplicates(keep=keep)
|
||||
expected = Series(expected, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestDropDuplicatesPeriodIndex(DropDuplicates):
|
||||
@pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"])
|
||||
def freq(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def idx(self, freq):
|
||||
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
@ -0,0 +1,181 @@
|
||||
"""
|
||||
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class EqualsTests:
|
||||
def test_not_equals_numeric(self, index):
|
||||
assert not index.equals(Index(index.asi8))
|
||||
assert not index.equals(Index(index.asi8.astype("u8")))
|
||||
assert not index.equals(Index(index.asi8).astype("f8"))
|
||||
|
||||
def test_equals(self, index):
|
||||
assert index.equals(index)
|
||||
assert index.equals(index.astype(object))
|
||||
assert index.equals(CategoricalIndex(index))
|
||||
assert index.equals(CategoricalIndex(index.astype(object)))
|
||||
|
||||
def test_not_equals_non_arraylike(self, index):
|
||||
assert not index.equals(list(index))
|
||||
|
||||
def test_not_equals_strings(self, index):
|
||||
other = Index([str(x) for x in index], dtype=object)
|
||||
assert not index.equals(other)
|
||||
assert not index.equals(CategoricalIndex(other))
|
||||
|
||||
def test_not_equals_misc_strs(self, index):
|
||||
other = Index(list("abc"))
|
||||
assert not index.equals(other)
|
||||
|
||||
|
||||
class TestPeriodIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return period_range("2013-01-01", periods=5, freq="D")
|
||||
|
||||
# TODO: de-duplicate with other test_equals2 methods
|
||||
@pytest.mark.parametrize("freq", ["D", "M"])
|
||||
def test_equals2(self, freq):
|
||||
# GH#13107
|
||||
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = PeriodIndex._simple_new(
|
||||
idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h"))
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
|
||||
class TestDatetimeIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return date_range("2013-01-01", periods=5)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds objects
|
||||
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
assert not idx3.equals(oob)
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds dt64
|
||||
oob2 = oob.map(np.datetime64)
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
assert not idx3.equals(oob2)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_not_equals_bday(self, freq):
|
||||
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
||||
assert not rng.equals(list(rng))
|
||||
|
||||
|
||||
class TestTimedeltaIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return timedelta_range("1 day", periods=10)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.astype(object).equals(idx2.astype(object))
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# Check that we dont raise OverflowError on comparisons outside the
|
||||
# implementation range GH#28532
|
||||
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
|
||||
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
|
||||
assert (oob == oob2).all()
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
|
||||
oob3 = oob.map(np.timedelta64)
|
||||
assert (oob3 == oob).all()
|
||||
assert not idx.equals(oob3)
|
||||
assert not idx2.equals(oob3)
|
@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
dtlike_dtypes = [
|
||||
np.dtype("timedelta64[ns]"),
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
|
||||
pd.PeriodDtype("ns"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
|
||||
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
|
||||
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
|
||||
vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
|
||||
|
||||
def construct(dtype):
|
||||
if dtype is dtlike_dtypes[-1]:
|
||||
# PeriodArray will try to cast ints to strings
|
||||
return DatetimeIndex(vals).astype(dtype)
|
||||
return Index(vals, dtype=dtype)
|
||||
|
||||
left = construct(ldtype)
|
||||
right = construct(rdtype)
|
||||
|
||||
result = left.get_indexer_non_unique(right)
|
||||
|
||||
if ldtype is rdtype:
|
||||
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
|
||||
ex2 = np.array([], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], ex1)
|
||||
tm.assert_numpy_array_equal(result[1], ex2)
|
||||
|
||||
else:
|
||||
no_matches = np.array([-1] * 6, dtype=np.intp)
|
||||
missing = np.arange(6, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], no_matches)
|
||||
tm.assert_numpy_array_equal(result[1], missing)
|
@ -0,0 +1,46 @@
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_with_nat():
|
||||
# GH#31437
|
||||
# PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex,
|
||||
# in particular never be monotonic when we have NaT
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
pi = dti.to_period("D")
|
||||
tdi = Index(dti.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert obj.is_monotonic_increasing
|
||||
assert obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti1 = dti.insert(0, NaT)
|
||||
pi1 = dti1.to_period("D")
|
||||
tdi1 = Index(dti1.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti2 = dti.insert(3, NaT)
|
||||
pi2 = dti2.to_period("h")
|
||||
tdi2 = Index(dti2.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class NATests:
|
||||
def test_nat(self, index_without_na):
|
||||
empty_index = index_without_na[:0]
|
||||
|
||||
index_with_na = index_without_na.copy(deep=True)
|
||||
index_with_na._data[1] = NaT
|
||||
|
||||
assert empty_index._na_value is NaT
|
||||
assert index_with_na._na_value is NaT
|
||||
assert index_without_na._na_value is NaT
|
||||
|
||||
idx = index_without_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
|
||||
idx = index_with_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
class TestDatetimeIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
|
||||
|
||||
class TestTimedeltaIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return TimedeltaIndex(["1 days", "2 days"])
|
||||
|
||||
|
||||
class TestPeriodIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
@ -0,0 +1,315 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_freq_ascending(ordered, orig, ascending):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is generated (or generate-able) with
|
||||
period_range/date_range/timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
if ascending:
|
||||
assert ordered.freq.n == orig.freq.n
|
||||
else:
|
||||
assert ordered.freq.n == -1 * orig.freq.n
|
||||
|
||||
|
||||
def check_freq_nonmonotonic(ordered, orig):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is _not_ generated (or generate-able) with
|
||||
period_range/date_range//timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
assert ordered.freq is None
|
||||
|
||||
|
||||
class TestSortValues:
|
||||
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
|
||||
def non_monotonic_idx(self, request):
|
||||
if request.param is DatetimeIndex:
|
||||
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
elif request.param is PeriodIndex:
|
||||
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
return dti.to_period("D")
|
||||
else:
|
||||
return TimedeltaIndex(
|
||||
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
|
||||
)
|
||||
|
||||
def test_argmin_argmax(self, non_monotonic_idx):
|
||||
assert non_monotonic_idx.argmin() == 1
|
||||
assert non_monotonic_idx.argmax() == 0
|
||||
|
||||
def test_sort_values(self, non_monotonic_idx):
|
||||
idx = non_monotonic_idx
|
||||
ordered = idx.sort_values()
|
||||
assert ordered.is_monotonic_increasing
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
assert ordered[::-1].is_monotonic_increasing
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True)
|
||||
assert ordered.is_monotonic_increasing
|
||||
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
assert ordered[::-1].is_monotonic_increasing
|
||||
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
|
||||
|
||||
def check_sort_values_with_freq(self, idx):
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "h"])
|
||||
def test_sort_values_with_freq_timedeltaindex(self, freq):
|
||||
# GH#10295
|
||||
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
|
||||
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
|
||||
),
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="h",
|
||||
name="tzidx",
|
||||
tz="Asia/Tokyo",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_datetimeindex(self, idx):
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
|
||||
def test_sort_values_with_freq_periodindex(self, freq):
|
||||
# here with_freq refers to being period_range-like
|
||||
idx = PeriodIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
||||
)
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"),
|
||||
Index([2011, 2012, 2013], name="idx"), # for compatibility check
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_periodindex2(self, idx):
|
||||
# here with_freq indicates this is period_range-like
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
def check_sort_values_without_freq(self, idx, expected):
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
def test_sort_values_without_freq_timedeltaindex(self):
|
||||
# GH#10295
|
||||
|
||||
idx = TimedeltaIndex(
|
||||
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
||||
)
|
||||
expected = TimedeltaIndex(
|
||||
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
||||
)
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_dates,expected_dates",
|
||||
[
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_datetimeindex(
|
||||
self, index_dates, expected_dates, tz_naive_fixture
|
||||
):
|
||||
tz = tz_naive_fixture
|
||||
|
||||
# without freq
|
||||
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
|
||||
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
|
||||
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,expected",
|
||||
[
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
PeriodIndex(
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y"
|
||||
),
|
||||
PeriodIndex(
|
||||
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y"
|
||||
),
|
||||
),
|
||||
(
|
||||
# For compatibility check
|
||||
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
|
||||
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_periodindex(self, idx, expected):
|
||||
# here without_freq means not generateable by period_range
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
def test_sort_values_without_freq_periodindex_nat(self):
|
||||
# doesn't quite fit into check_sort_values_without_freq
|
||||
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
||||
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
||||
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
|
||||
def test_order_stability_compat():
|
||||
# GH#35922. sort_values is stable both for normal and datetime-like Index
|
||||
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y")
|
||||
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
||||
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
|
||||
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_numpy_array_equal(indexer1, indexer2)
|
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestValueCounts:
|
||||
# GH#7735
|
||||
|
||||
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex(self):
|
||||
orig = timedelta_range("1 days 09:00:00", freq="h", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_periodindex(self):
|
||||
orig = period_range("2011-01-01 09:00", freq="h", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def _check_value_counts_with_repeats(self, orig):
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = type(orig)(
|
||||
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
|
||||
)
|
||||
|
||||
exp_idx = orig[::-1]
|
||||
if not isinstance(exp_idx, PeriodIndex):
|
||||
exp_idx = exp_idx._with_freq(None)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), orig)
|
||||
|
||||
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex2(self):
|
||||
idx = TimedeltaIndex(
|
||||
[
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 08:00:00",
|
||||
"1 days 08:00:00",
|
||||
NaT,
|
||||
]
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_periodindex2(self):
|
||||
idx = PeriodIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
freq="h",
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def _check_value_counts_dropna(self, idx):
|
||||
exp_idx = idx[[2, 3]]
|
||||
expected = Series([3, 2], index=exp_idx, name="count")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = idx[[2, 3, -1]]
|
||||
expected = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user