venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,248 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
CategoricalDtype,
|
||||
IntervalDtype,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class AstypeTests:
|
||||
"""Tests common to IntervalIndex with any subtype"""
|
||||
|
||||
def test_astype_idempotent(self, index):
|
||||
result = index.astype("interval")
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.astype(index.dtype)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
def test_astype_object(self, index):
|
||||
result = index.astype(object)
|
||||
expected = Index(index.values, dtype="object")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert not result.equals(index)
|
||||
|
||||
def test_astype_category(self, index):
|
||||
result = index.astype("category")
|
||||
expected = CategoricalIndex(index.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.astype(CategoricalDtype())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-default params
|
||||
categories = index.dropna().unique().values[:-1]
|
||||
dtype = CategoricalDtype(categories=categories, ordered=True)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"int64",
|
||||
"uint64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"period[M]",
|
||||
"timedelta64",
|
||||
"timedelta64[ns]",
|
||||
"datetime64",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
],
|
||||
)
|
||||
def test_astype_cannot_cast(self, index, dtype):
|
||||
msg = "Cannot cast IntervalIndex to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_astype_invalid_dtype(self, index):
|
||||
msg = "data type [\"']fake_dtype[\"'] not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype("fake_dtype")
|
||||
|
||||
|
||||
class TestIntSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with integer-like subtype"""
|
||||
|
||||
indexes = [
|
||||
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
|
||||
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
|
||||
)
|
||||
def test_subtype_conversion(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, index.closed)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
|
||||
)
|
||||
def test_subtype_integer(self, subtype_start, subtype_end):
|
||||
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
|
||||
dtype = IntervalDtype(subtype_end, index.closed)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype_end),
|
||||
index.right.astype(subtype_end),
|
||||
closed=index.closed,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#15832")
|
||||
def test_subtype_integer_errors(self):
|
||||
# int64 -> uint64 fails with negative values
|
||||
index = interval_range(-10, 10)
|
||||
dtype = IntervalDtype("uint64", "right")
|
||||
|
||||
# Until we decide what the exception message _should_ be, we
|
||||
# assert something that it should _not_ be.
|
||||
# We should _not_ be getting a message suggesting that the -10
|
||||
# has been wrapped around to a large-positive integer
|
||||
msg = "^(?!(left side of interval must be <= right side))"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestFloatSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with float subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(-10.0, 10.0, closed="neither"),
|
||||
IntervalIndex.from_arrays(
|
||||
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
|
||||
),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, subtype):
|
||||
index = interval_range(0.0, 10.0)
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raises with NA
|
||||
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.insert(0, np.nan).astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer_with_non_integer_borders(self, subtype):
|
||||
index = interval_range(0.0, 3.0, freq=0.25)
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_subtype_integer_errors(self):
|
||||
# float64 -> uint64 fails with negative values
|
||||
index = interval_range(-10.0, 10.0)
|
||||
dtype = IntervalDtype("uint64", "right")
|
||||
msg = re.escape(
|
||||
"Cannot convert interval[float64, right] to interval[uint64, right]; "
|
||||
"subtypes are incompatible"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_subtype_datetimelike(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestDatetimelikeSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with datetime-like subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
|
||||
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
|
||||
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
|
||||
interval_range(Timedelta("0 days"), periods=10, closed="both"),
|
||||
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
|
||||
if subtype != "int64":
|
||||
msg = (
|
||||
r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] "
|
||||
r"to interval\[uint64, .*\]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
return
|
||||
|
||||
result = index.astype(dtype)
|
||||
new_left = index.left.astype(subtype)
|
||||
new_right = index.right.astype(subtype)
|
||||
|
||||
expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_subtype_float(self, index):
|
||||
dtype = IntervalDtype("float64", "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_subtype_datetimelike(self):
|
||||
# datetime -> timedelta raises
|
||||
dtype = IntervalDtype("timedelta64[ns]", "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
# timedelta -> datetime raises
|
||||
dtype = IntervalDtype("datetime64[ns]", "right")
|
||||
index = interval_range(Timedelta("0 days"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
@ -0,0 +1,535 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas.core.dtypes.common import is_unsigned_integer_dtype
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
notna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class ConstructorTests:
|
||||
"""
|
||||
Common tests for all variations of IntervalIndex construction. Input data
|
||||
to be supplied in breaks format, then converted by the subclass method
|
||||
get_kwargs_from_breaks to the expected format.
|
||||
"""
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
([3, 14, 15, 92, 653], np.int64),
|
||||
(np.arange(10, dtype="int64"), np.int64),
|
||||
(Index(np.arange(-10, 11, dtype=np.int64)), np.int64),
|
||||
(Index(np.arange(10, 31, dtype=np.uint64)), np.uint64),
|
||||
(Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
|
||||
(date_range("20180101", periods=10), "<M8[ns]"),
|
||||
(
|
||||
date_range("20180101", periods=10, tz="US/Eastern"),
|
||||
"datetime64[ns, US/Eastern]",
|
||||
),
|
||||
(timedelta_range("1 day", periods=10), "<m8[ns]"),
|
||||
]
|
||||
)
|
||||
def breaks_and_expected_subtype(self, request):
|
||||
return request.param
|
||||
|
||||
def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name):
|
||||
breaks, expected_subtype = breaks_and_expected_subtype
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
|
||||
|
||||
result = constructor(closed=closed, name=name, **result_kwargs)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.name == name
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype))
|
||||
tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks, subtype",
|
||||
[
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"),
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"),
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"),
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"),
|
||||
(date_range("2017-01-01", periods=5), "int64"),
|
||||
(timedelta_range("1 day", periods=5), "int64"),
|
||||
],
|
||||
)
|
||||
def test_constructor_dtype(self, constructor, breaks, subtype):
|
||||
# GH 19262: conversion via dtype parameter
|
||||
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
|
||||
expected = constructor(**expected_kwargs)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
iv_dtype = IntervalDtype(subtype, "right")
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
result = constructor(dtype=dtype, **result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
Index([0, 1, 2, 3, 4], dtype=np.int64),
|
||||
Index([0, 1, 2, 3, 4], dtype=np.uint64),
|
||||
Index([0, 1, 2, 3, 4], dtype=np.float64),
|
||||
date_range("2017-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
],
|
||||
)
|
||||
def test_constructor_pass_closed(self, constructor, breaks):
|
||||
# not passing closed to IntervalDtype, but to IntervalArray constructor
|
||||
iv_dtype = IntervalDtype(breaks.dtype)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
with tm.assert_produces_warning(None):
|
||||
result = constructor(dtype=dtype, closed="left", **result_kwargs)
|
||||
assert result.dtype.closed == "left"
|
||||
|
||||
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
|
||||
def test_constructor_nan(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_subtype = np.float64
|
||||
expected_values = np.array(breaks[:-1], dtype=object)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[],
|
||||
np.array([], dtype="int64"),
|
||||
np.array([], dtype="uint64"),
|
||||
np.array([], dtype="float64"),
|
||||
np.array([], dtype="datetime64[ns]"),
|
||||
np.array([], dtype="timedelta64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_constructor_empty(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_values = np.array([], dtype=object)
|
||||
expected_subtype = getattr(breaks, "dtype", np.int64)
|
||||
|
||||
assert result.empty
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
tuple("0123456789"),
|
||||
list("abcdefghij"),
|
||||
np.array(list("abcdefghij"), dtype=object),
|
||||
np.array(list("abcdefghij"), dtype="<U1"),
|
||||
],
|
||||
)
|
||||
def test_constructor_string(self, constructor, breaks):
|
||||
# GH 19016
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(**self.get_kwargs_from_breaks(breaks))
|
||||
|
||||
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
|
||||
def test_constructor_categorical_valid(self, constructor, cat_constructor):
|
||||
# GH 21243/21253
|
||||
|
||||
breaks = np.arange(10, dtype="int64")
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
cat_breaks = cat_constructor(breaks)
|
||||
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
|
||||
result = constructor(**result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
# filler input data to be used when supplying invalid kwargs
|
||||
filler = self.get_kwargs_from_breaks(range(10))
|
||||
|
||||
# invalid closed
|
||||
msg = "closed must be one of 'right', 'left', 'both', 'neither'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(closed="invalid", **filler)
|
||||
|
||||
# unsupported dtype
|
||||
msg = "dtype must be an IntervalDtype, got int64"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="int64", **filler)
|
||||
|
||||
# invalid dtype
|
||||
msg = "data type [\"']invalid[\"'] not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="invalid", **filler)
|
||||
|
||||
# no point in nesting periods in an IntervalIndex
|
||||
periods = period_range("2000-01-01", periods=10)
|
||||
periods_kwargs = self.get_kwargs_from_breaks(periods)
|
||||
msg = "Period dtypes are not supported, use a PeriodIndex instead"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**periods_kwargs)
|
||||
|
||||
# decreasing values
|
||||
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
|
||||
msg = "left side of interval must be <= right side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**decreasing_kwargs)
|
||||
|
||||
|
||||
class TestFromArrays(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_arrays"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_arrays
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_arrays
|
||||
"""
|
||||
return {"left": breaks[:-1], "right": breaks[1:]}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_arrays(data[:-1], data[1:])
|
||||
|
||||
# unequal length
|
||||
left = [0, 1, 2]
|
||||
right = [2, 3]
|
||||
msg = "left and right must have the same length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(left, right)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
|
||||
)
|
||||
def test_mixed_float_int(self, left_subtype, right_subtype):
|
||||
"""mixed int/float left/right results in float for both sides"""
|
||||
left = np.arange(9, dtype=left_subtype)
|
||||
right = np.arange(1, 10, dtype=right_subtype)
|
||||
result = IntervalIndex.from_arrays(left, right)
|
||||
|
||||
expected_left = Index(left, dtype=np.float64)
|
||||
expected_right = Index(right, dtype=np.float64)
|
||||
expected_subtype = np.float64
|
||||
|
||||
tm.assert_index_equal(result.left, expected_left)
|
||||
tm.assert_index_equal(result.right, expected_right)
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
|
||||
@pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex])
|
||||
def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls):
|
||||
# GH#55714
|
||||
left = date_range("2016-01-01", periods=3, unit="s")
|
||||
right = date_range("2017-01-01", periods=3, unit="ms")
|
||||
result = interval_cls.from_arrays(left, right)
|
||||
expected = interval_cls.from_arrays(left.as_unit("ms"), right)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# td64
|
||||
left2 = left - left[0]
|
||||
right2 = right - left[0]
|
||||
result2 = interval_cls.from_arrays(left2, right2)
|
||||
expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2)
|
||||
tm.assert_equal(result2, expected2)
|
||||
|
||||
# dt64tz
|
||||
left3 = left.tz_localize("UTC")
|
||||
right3 = right.tz_localize("UTC")
|
||||
result3 = interval_cls.from_arrays(left3, right3)
|
||||
expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3)
|
||||
tm.assert_equal(result3, expected3)
|
||||
|
||||
|
||||
class TestFromBreaks(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_breaks"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_breaks
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_breaks
|
||||
"""
|
||||
return {"breaks": breaks}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_breaks(data)
|
||||
|
||||
def test_length_one(self):
|
||||
"""breaks of length one produce an empty IntervalIndex"""
|
||||
breaks = [0]
|
||||
result = IntervalIndex.from_breaks(breaks)
|
||||
expected = IntervalIndex.from_breaks([])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_left_right_dont_share_data(self):
|
||||
# GH#36310
|
||||
breaks = np.arange(5)
|
||||
result = IntervalIndex.from_breaks(breaks)._data
|
||||
assert result._left.base is None or result._left.base is not result._right.base
|
||||
|
||||
|
||||
class TestFromTuples(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_tuples"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_tuples
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_tuples
|
||||
"""
|
||||
if is_unsigned_integer_dtype(breaks):
|
||||
pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests")
|
||||
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
tuples = list(zip(breaks[:-1], breaks[1:]))
|
||||
if isinstance(breaks, (list, tuple)):
|
||||
return {"data": tuples}
|
||||
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
|
||||
return {"data": breaks._constructor(tuples)}
|
||||
return {"data": com.asarray_tuplesafe(tuples)}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# non-tuple
|
||||
tuples = [(0, 1), 2, (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples received an invalid item, 2"
|
||||
with pytest.raises(TypeError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
# too few/many items
|
||||
tuples = [(0, 1), (2,), (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
tuples = [(0, 1), (2, 3, 4), (5, 6)]
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
def test_na_tuples(self):
|
||||
# tuple (NA, NA) evaluates the same as NA as an element
|
||||
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
|
||||
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
|
||||
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
tm.assert_index_equal(idx_na_tuple, idx_na_element)
|
||||
|
||||
|
||||
class TestClassConstructors(ConstructorTests):
|
||||
"""Tests specific to the IntervalIndex/Index constructors"""
|
||||
|
||||
@pytest.fixture(
|
||||
params=[IntervalIndex, partial(Index, dtype="interval")],
|
||||
ids=["IntervalIndex", "Index"],
|
||||
)
|
||||
def klass(self, request):
|
||||
# We use a separate fixture here to include Index.__new__ with dtype kwarg
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by the IntervalIndex/Index constructors
|
||||
"""
|
||||
if is_unsigned_integer_dtype(breaks):
|
||||
pytest.skip(f"{breaks.dtype} not relevant for class constructor tests")
|
||||
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed) if notna(left) else left
|
||||
for left, right in zip(breaks[:-1], breaks[1:])
|
||||
]
|
||||
|
||||
if isinstance(breaks, list):
|
||||
return {"data": ivs}
|
||||
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
|
||||
return {"data": breaks._constructor(ivs)}
|
||||
return {"data": np.array(ivs, dtype=object)}
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
"""
|
||||
override the base class implementation since errors are handled
|
||||
differently; checks unnecessary since caught at the Interval level
|
||||
"""
|
||||
|
||||
def test_constructor_string(self):
|
||||
# GH23013
|
||||
# When forming the interval from breaks,
|
||||
# the interval of strings is already forbidden.
|
||||
pass
|
||||
|
||||
def test_constructor_errors(self, klass):
|
||||
# mismatched closed within intervals with no constructor override
|
||||
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
|
||||
msg = "intervals must all be closed on the same side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
klass(ivs)
|
||||
|
||||
# scalar
|
||||
msg = (
|
||||
r"(IntervalIndex|Index)\(...\) must be called with a collection of "
|
||||
"some kind, 5 was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
klass(5)
|
||||
|
||||
# not an interval; dtype depends on 32bit/windows builds
|
||||
msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
klass([0, 1])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, closed",
|
||||
[
|
||||
([], "both"),
|
||||
([np.nan, np.nan], "neither"),
|
||||
(
|
||||
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
|
||||
"left",
|
||||
),
|
||||
(
|
||||
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
|
||||
"neither",
|
||||
),
|
||||
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
|
||||
],
|
||||
)
|
||||
def test_override_inferred_closed(self, constructor, data, closed):
|
||||
# GH 19370
|
||||
if isinstance(data, IntervalIndex):
|
||||
tuples = data.to_tuples()
|
||||
else:
|
||||
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
|
||||
expected = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = constructor(data, closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
|
||||
)
|
||||
def test_index_object_dtype(self, values_constructor):
|
||||
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
|
||||
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
|
||||
values = values_constructor(intervals)
|
||||
result = Index(values, dtype=object)
|
||||
|
||||
assert type(result) is Index
|
||||
tm.assert_numpy_array_equal(result.values, np.array(values))
|
||||
|
||||
def test_index_mixed_closed(self):
|
||||
# GH27172
|
||||
intervals = [
|
||||
Interval(0, 1, closed="left"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="neither"),
|
||||
Interval(3, 4, closed="both"),
|
||||
]
|
||||
result = Index(intervals)
|
||||
expected = Index(intervals, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"])
|
||||
def test_interval_index_subtype(timezone, inclusive_endpoints_fixture):
|
||||
# GH#46999
|
||||
dates = date_range("2022", periods=3, tz=timezone)
|
||||
dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]"
|
||||
result = IntervalIndex.from_arrays(
|
||||
["2022-01-01", "2022-01-02"],
|
||||
["2022-01-02", "2022-01-03"],
|
||||
closed=inclusive_endpoints_fixture,
|
||||
dtype=dtype,
|
||||
)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
dates[:-1], dates[1:], closed=inclusive_endpoints_fixture
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_dtype_closed_mismatch():
|
||||
# GH#38394 closed specified in both dtype and IntervalIndex constructor
|
||||
|
||||
dtype = IntervalDtype(np.int64, "left")
|
||||
|
||||
msg = "closed keyword does not match dtype.closed"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex([], dtype=dtype, closed="neither")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalArray([], dtype=dtype, closed="neither")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))],
|
||||
)
|
||||
def test_ea_dtype(dtype):
|
||||
# GH#56765
|
||||
bins = [(0.0, 0.4), (0.4, 0.6)]
|
||||
interval_dtype = IntervalDtype(subtype=dtype, closed="left")
|
||||
result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype)
|
||||
assert result.dtype == interval_dtype
|
||||
expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,36 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals(self, closed):
|
||||
expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
|
||||
assert expected.equals(expected)
|
||||
assert expected.equals(expected.copy())
|
||||
|
||||
assert not expected.equals(expected.astype(object))
|
||||
assert not expected.equals(np.array(expected))
|
||||
assert not expected.equals(list(expected))
|
||||
|
||||
assert not expected.equals([1, 2])
|
||||
assert not expected.equals(np.array([1, 2]))
|
||||
assert not expected.equals(date_range("20130101", periods=2))
|
||||
|
||||
expected_name1 = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=closed, name="foo"
|
||||
)
|
||||
expected_name2 = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=closed, name="bar"
|
||||
)
|
||||
assert expected.equals(expected_name1)
|
||||
assert expected_name1.equals(expected_name2)
|
||||
|
||||
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
||||
expected_other_closed = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=other_closed
|
||||
)
|
||||
assert not expected.equals(expected_other_closed)
|
@ -0,0 +1,119 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndexRendering:
|
||||
# TODO: this is a test for DataFrame/Series, not IntervalIndex
|
||||
@pytest.mark.parametrize(
|
||||
"constructor,expected",
|
||||
[
|
||||
(
|
||||
Series,
|
||||
(
|
||||
"(0.0, 1.0] a\n"
|
||||
"NaN b\n"
|
||||
"(2.0, 3.0] c\n"
|
||||
"dtype: object"
|
||||
),
|
||||
),
|
||||
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
|
||||
],
|
||||
)
|
||||
def test_repr_missing(self, constructor, expected, using_infer_string, request):
|
||||
# GH 25984
|
||||
if using_infer_string and constructor is Series:
|
||||
request.applymarker(pytest.mark.xfail(reason="repr different"))
|
||||
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
obj = constructor(list("abc"), index=index)
|
||||
result = repr(obj)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
|
||||
def test_repr_floats(self):
|
||||
# GH 32553
|
||||
|
||||
markers = Series(
|
||||
["foo", "bar"],
|
||||
index=IntervalIndex(
|
||||
[
|
||||
Interval(left, right)
|
||||
for left, right in zip(
|
||||
Index([329.973, 345.137], dtype="float64"),
|
||||
Index([345.137, 360.191], dtype="float64"),
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
result = str(markers)
|
||||
expected = "(329.973, 345.137] foo\n(345.137, 360.191] bar\ndtype: object"
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed, expected_data",
|
||||
[
|
||||
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
|
||||
(
|
||||
[(0.5, 1.0), np.nan, (2.0, 3.0)],
|
||||
"right",
|
||||
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
|
||||
),
|
||||
(
|
||||
[
|
||||
(Timestamp("20180101"), Timestamp("20180102")),
|
||||
np.nan,
|
||||
((Timestamp("20180102"), Timestamp("20180103"))),
|
||||
],
|
||||
"both",
|
||||
[
|
||||
"[2018-01-01 00:00:00, 2018-01-02 00:00:00]",
|
||||
"NaN",
|
||||
"[2018-01-02 00:00:00, 2018-01-03 00:00:00]",
|
||||
],
|
||||
),
|
||||
(
|
||||
[
|
||||
(Timedelta("0 days"), Timedelta("1 days")),
|
||||
(Timedelta("1 days"), Timedelta("2 days")),
|
||||
np.nan,
|
||||
],
|
||||
"neither",
|
||||
[
|
||||
"(0 days 00:00:00, 1 days 00:00:00)",
|
||||
"(1 days 00:00:00, 2 days 00:00:00)",
|
||||
"NaN",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_values_for_csv(self, tuples, closed, expected_data):
|
||||
# GH 28210
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index._get_values_for_csv(na_rep="NaN")
|
||||
expected = np.array(expected_data)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_timestamp_with_timezone(self, unit):
|
||||
# GH 55035
|
||||
left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
|
||||
right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
|
||||
index = IntervalIndex.from_arrays(left, right)
|
||||
result = repr(index)
|
||||
expected = (
|
||||
"IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
|
||||
f"dtype='interval[datetime64[{unit}, UTC], right]')"
|
||||
)
|
||||
assert result == expected
|
@ -0,0 +1,671 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
array,
|
||||
date_range,
|
||||
interval_range,
|
||||
isna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetItem:
|
||||
def test_getitem(self, closed):
|
||||
idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
|
||||
assert idx[0] == Interval(0.0, 1.0, closed=closed)
|
||||
assert idx[1] == Interval(1.0, 2.0, closed=closed)
|
||||
assert isna(idx[2])
|
||||
|
||||
result = idx[0:1]
|
||||
expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx[0:2]
|
||||
expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx[1:3]
|
||||
expected = IntervalIndex.from_arrays(
|
||||
(1.0, np.nan), (2.0, np.nan), closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_getitem_2d_deprecated(self):
|
||||
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
|
||||
idx = IntervalIndex.from_breaks(range(11), closed="right")
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
idx[:, None]
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
# GH#44051
|
||||
idx[True]
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
# GH#44051
|
||||
idx[False]
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
idx = IntervalIndex.from_breaks(range(11), closed="right")
|
||||
cond = [True] * len(idx)
|
||||
expected = idx
|
||||
result = expected.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * len(idx[1:])
|
||||
expected = IntervalIndex([np.nan] + idx[1:].tolist())
|
||||
result = idx.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take(self, closed):
|
||||
index = IntervalIndex.from_breaks(range(11), closed=closed)
|
||||
|
||||
result = index.take(range(10))
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.take([0, 0, 1])
|
||||
expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
|
||||
def test_get_loc_interval(self, closed, side):
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
|
||||
# if get_loc is supplied an interval, it should only search
|
||||
# for exact matches, not overlaps or covers, else KeyError.
|
||||
msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
|
||||
if closed == side:
|
||||
if bound == [0, 1]:
|
||||
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
|
||||
elif bound == [2, 3]:
|
||||
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
|
||||
def test_get_loc_scalar(self, closed, scalar):
|
||||
# correct = {side: {query: answer}}.
|
||||
# If query is not in the dict, that query should raise a KeyError
|
||||
correct = {
|
||||
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
|
||||
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
|
||||
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
|
||||
"neither": {0.5: 0, 2.5: 1},
|
||||
}
|
||||
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
# if get_loc is supplied a scalar, it should return the index of
|
||||
# the interval which contains the scalar, or KeyError.
|
||||
if scalar in correct[closed].keys():
|
||||
assert idx.get_loc(scalar) == correct[closed][scalar]
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
idx.get_loc(scalar)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
|
||||
def test_get_loc_length_one_scalar(self, scalar, closed):
|
||||
# GH 20921
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
if scalar in index[0]:
|
||||
result = index.get_loc(scalar)
|
||||
assert result == 0
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
index.get_loc(scalar)
|
||||
|
||||
@pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
|
||||
@pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
|
||||
def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
|
||||
# GH 20921
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
interval = Interval(left, right, closed=other_closed)
|
||||
if interval == index[0]:
|
||||
result = index.get_loc(interval)
|
||||
assert result == 0
|
||||
else:
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
|
||||
):
|
||||
index.get_loc(interval)
|
||||
|
||||
# Make consistent with test_interval_new.py (see #16316, #16386)
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_get_loc_datetimelike_nonoverlapping(self, breaks):
|
||||
# GH 20636
|
||||
# nonoverlapping = IntervalIndex method and no i8 conversion
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
value = index[0].mid
|
||||
result = index.get_loc(value)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
interval = Interval(index[0].left, index[0].right)
|
||||
result = index.get_loc(interval)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arrays",
|
||||
[
|
||||
(date_range("20180101", periods=4), date_range("20180103", periods=4)),
|
||||
(
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
date_range("20180103", periods=4, tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
timedelta_range("0 days", periods=4),
|
||||
timedelta_range("2 days", periods=4),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def test_get_loc_datetimelike_overlapping(self, arrays):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_arrays(*arrays)
|
||||
|
||||
value = index[0].mid + Timedelta("12 hours")
|
||||
result = index.get_loc(value)
|
||||
expected = slice(0, 2, None)
|
||||
assert result == expected
|
||||
|
||||
interval = Interval(index[0].left, index[0].right)
|
||||
result = index.get_loc(interval)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
date_range("2018-01-04", periods=4, freq="-1D"),
|
||||
date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
|
||||
timedelta_range("3 days", periods=4, freq="-1D"),
|
||||
np.arange(3.0, -1.0, -1.0),
|
||||
np.arange(3, -1, -1),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_get_loc_decreasing(self, values):
|
||||
# GH 25860
|
||||
index = IntervalIndex.from_arrays(values[1:], values[:-1])
|
||||
result = index.get_loc(index[0])
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("key", [[5], (2, 3)])
|
||||
def test_get_loc_non_scalar_errors(self, key):
|
||||
# GH 31117
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
|
||||
|
||||
msg = str(key)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx.get_loc(key)
|
||||
|
||||
def test_get_indexer_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
|
||||
|
||||
expected = np.array([True, False, True])
|
||||
for key in [None, np.nan, NA]:
|
||||
assert key in index
|
||||
result = index.get_loc(key)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
index.get_loc(key)
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([Interval(2, 4, closed="right")], [1]),
|
||||
([Interval(2, 4, closed="left")], [-1]),
|
||||
([Interval(2, 4, closed="both")], [-1]),
|
||||
([Interval(2, 4, closed="neither")], [-1]),
|
||||
([Interval(1, 4, closed="right")], [-1]),
|
||||
([Interval(0, 4, closed="right")], [-1]),
|
||||
([Interval(0.5, 1.5, closed="right")], [-1]),
|
||||
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
|
||||
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_interval(self, query, expected):
|
||||
tuples = [(0, 2), (2, 4), (5, 7)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], [-1]),
|
||||
([0], [-1]),
|
||||
([0.5], [0]),
|
||||
([1], [0]),
|
||||
([1.5], [1]),
|
||||
([2], [1]),
|
||||
([2.5], [-1]),
|
||||
([3], [-1]),
|
||||
([3.5], [2]),
|
||||
([4], [2]),
|
||||
([4.5], [-1]),
|
||||
([1, 2], [0, 1]),
|
||||
([1, 2, 3], [0, 1, -1]),
|
||||
([1, 2, 3, 4], [0, 1, -1, 2]),
|
||||
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_int_and_float(self, query, expected):
|
||||
tuples = [(0, 1), (1, 2), (3, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
|
||||
def test_get_indexer_length_one(self, item, closed):
|
||||
# GH 17284
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
result = index.get_indexer(item)
|
||||
expected = np.array([0] * len(item), dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("size", [1, 5])
|
||||
def test_get_indexer_length_one_interval(self, size, closed):
|
||||
# GH 17284
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
result = index.get_indexer([Interval(0, 5, closed)] * size)
|
||||
expected = np.array([0] * size, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"target",
|
||||
[
|
||||
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
|
||||
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
|
||||
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
|
||||
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
|
||||
["foo", "foo", "bar", "baz"],
|
||||
],
|
||||
)
|
||||
def test_get_indexer_categorical(self, target, ordered):
|
||||
# GH 30063: categorical and non-categorical results should be consistent
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
categorical_target = CategoricalIndex(target, ordered=ordered)
|
||||
|
||||
result = index.get_indexer(categorical_target)
|
||||
expected = index.get_indexer(target)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_categorical_with_nans(self):
|
||||
# GH#41934 nans in both index and in target
|
||||
ii = IntervalIndex.from_breaks(range(5))
|
||||
ii2 = ii.append(IntervalIndex([np.nan]))
|
||||
ci2 = CategoricalIndex(ii2)
|
||||
|
||||
result = ii2.get_indexer(ci2)
|
||||
expected = np.arange(5, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# not-all-matches
|
||||
result = ii2[1:].get_indexer(ci2[::-1])
|
||||
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# non-unique target, non-unique nans
|
||||
result = ii2.get_indexer(ci2.append(ci2))
|
||||
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_datetime(self):
|
||||
ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4))
|
||||
# TODO: with mismatched resolution get_indexer currently raises;
|
||||
# this should probably coerce?
|
||||
target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]")
|
||||
result = ii.get_indexer(target)
|
||||
expected = np.array([0], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = ii.get_indexer(target.astype(str))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/47772
|
||||
result = ii.get_indexer(target.asi8)
|
||||
expected = np.array([-1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed",
|
||||
[
|
||||
([(0, 2), (1, 3), (3, 4)], "neither"),
|
||||
([(0, 5), (1, 4), (6, 7)], "left"),
|
||||
([(0, 1), (0, 1), (1, 2)], "right"),
|
||||
([(0, 1), (2, 3), (3, 4)], "both"),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_errors(self, tuples, closed):
|
||||
# IntervalIndex needs non-overlapping for uniqueness when querying
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
|
||||
msg = (
|
||||
"cannot handle overlapping indices; use "
|
||||
"IntervalIndex.get_indexer_non_unique"
|
||||
)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
index.get_indexer([0, 2])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], ([-1], [0])),
|
||||
([0], ([0], [])),
|
||||
([0.5], ([0], [])),
|
||||
([1], ([0, 1], [])),
|
||||
([1.5], ([0, 1], [])),
|
||||
([2], ([0, 1, 2], [])),
|
||||
([2.5], ([1, 2], [])),
|
||||
([3], ([2], [])),
|
||||
([3.5], ([2], [])),
|
||||
([4], ([-1], [0])),
|
||||
([4.5], ([-1], [0])),
|
||||
([1, 2], ([0, 1, 0, 1, 2], [])),
|
||||
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
|
||||
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
|
||||
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
|
||||
tuples = [(0, 2.5), (1, 3), (2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="left")
|
||||
|
||||
result_indexer, result_missing = index.get_indexer_non_unique(query)
|
||||
expected_indexer = np.array(expected[0], dtype="intp")
|
||||
expected_missing = np.array(expected[1], dtype="intp")
|
||||
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
# TODO we may also want to test get_indexer for the case when
|
||||
# the intervals are duplicated, decreasing, non-monotonic, etc..
|
||||
|
||||
def test_get_indexer_non_monotonic(self):
|
||||
# GH 16410
|
||||
idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
|
||||
idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
|
||||
result = idx1.get_indexer(idx2)
|
||||
expected = np.array([2, 0, -1, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = idx1.get_indexer(idx1[1:])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
|
||||
assert not index._index_as_unique
|
||||
|
||||
result = index.get_indexer_for(other)
|
||||
expected = np.array([0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_index_non_unique_non_monotonic(self):
|
||||
# GH#44084 (root cause)
|
||||
index = IntervalIndex.from_tuples(
|
||||
[(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
|
||||
)
|
||||
|
||||
result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
|
||||
expected = np.array([1, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_multiindex_with_intervals(self):
|
||||
# GH#44084 (MultiIndex case as reported)
|
||||
interval_index = IntervalIndex.from_tuples(
|
||||
[(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
|
||||
)
|
||||
foo_index = Index([1, 2, 3], name="foo")
|
||||
|
||||
multi_index = MultiIndex.from_product([foo_index, interval_index])
|
||||
|
||||
result = multi_index.get_level_values("interval").get_indexer_for(
|
||||
[Interval(0.0, 1.0)]
|
||||
)
|
||||
expected = np.array([1, 4, 7], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [IntervalIndex, array, list])
|
||||
def test_get_indexer_interval_index(self, box):
|
||||
# GH#30178
|
||||
rng = period_range("2022-07-01", freq="D", periods=3)
|
||||
idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3))
|
||||
|
||||
actual = rng.get_indexer(idx)
|
||||
expected = np.array([-1, -1, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
def test_get_indexer_read_only(self):
|
||||
idx = interval_range(start=0, end=5)
|
||||
arr = np.array([1, 2])
|
||||
arr.flags.writeable = False
|
||||
result = idx.get_indexer(arr)
|
||||
expected = np.array([0, 1])
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
result = idx.get_indexer_non_unique(arr)[0]
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
class TestSliceLocs:
|
||||
def test_slice_locs_with_interval(self):
|
||||
# increasing monotonically
|
||||
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
|
||||
|
||||
# decreasing monotonically
|
||||
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
|
||||
|
||||
# sorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
# unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2))
|
||||
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(end=Interval(0, 2))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
|
||||
|
||||
# another unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
def test_slice_locs_with_ints_and_floats_succeeds(self):
|
||||
# increasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
|
||||
assert index.slice_locs(0, 1) == (0, 1)
|
||||
assert index.slice_locs(0, 2) == (0, 2)
|
||||
assert index.slice_locs(0, 3) == (0, 2)
|
||||
assert index.slice_locs(3, 1) == (2, 1)
|
||||
assert index.slice_locs(3, 4) == (2, 3)
|
||||
assert index.slice_locs(0, 4) == (0, 3)
|
||||
|
||||
# decreasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
|
||||
assert index.slice_locs(0, 1) == (3, 3)
|
||||
assert index.slice_locs(0, 2) == (3, 2)
|
||||
assert index.slice_locs(0, 3) == (3, 1)
|
||||
assert index.slice_locs(3, 1) == (1, 3)
|
||||
assert index.slice_locs(3, 4) == (1, 1)
|
||||
assert index.slice_locs(0, 4) == (3, 1)
|
||||
|
||||
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
[(0, 2), (1, 3), (2, 4)],
|
||||
[(2, 4), (1, 3), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4)],
|
||||
[(0, 2), (2, 4), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4), (1, 3)],
|
||||
],
|
||||
)
|
||||
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
|
||||
start, stop = query
|
||||
index = IntervalIndex.from_tuples(tuples)
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=(
|
||||
"'can only get slices from an IntervalIndex if bounds are "
|
||||
"non-overlapping and all monotonic increasing or decreasing'"
|
||||
),
|
||||
):
|
||||
index.slice_locs(start, stop)
|
||||
|
||||
|
||||
class TestPutmask:
|
||||
@pytest.mark.parametrize("tz", ["US/Pacific", None])
|
||||
def test_putmask_dt64(self, tz):
|
||||
# GH#37968
|
||||
dti = date_range("2016-01-01", periods=9, tz=tz)
|
||||
idx = IntervalIndex.from_breaks(dti)
|
||||
mask = np.zeros(idx.shape, dtype=bool)
|
||||
mask[0:3] = True
|
||||
|
||||
result = idx.putmask(mask, idx[-1])
|
||||
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_putmask_td64(self):
|
||||
# GH#37968
|
||||
dti = date_range("2016-01-01", periods=9)
|
||||
tdi = dti - dti[0]
|
||||
idx = IntervalIndex.from_breaks(tdi)
|
||||
mask = np.zeros(idx.shape, dtype=bool)
|
||||
mask[0:3] = True
|
||||
|
||||
result = idx.putmask(mask, idx[-1])
|
||||
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestContains:
|
||||
# .__contains__, not .contains
|
||||
|
||||
def test_contains_dunder(self):
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
|
||||
|
||||
# __contains__ requires perfect matches to intervals.
|
||||
assert 0 not in index
|
||||
assert 1 not in index
|
||||
assert 2 not in index
|
||||
|
||||
assert Interval(0, 1, closed="right") in index
|
||||
assert Interval(0, 2, closed="right") not in index
|
||||
assert Interval(0, 0.5, closed="right") not in index
|
||||
assert Interval(3, 5, closed="right") not in index
|
||||
assert Interval(-1, 0, closed="left") not in index
|
||||
assert Interval(0, 1, closed="left") not in index
|
||||
assert Interval(0, 1, closed="both") not in index
|
@ -0,0 +1,918 @@
|
||||
from itertools import permutations
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
isna,
|
||||
notna,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
|
||||
def create_index(self, closed="right"):
|
||||
return IntervalIndex.from_breaks(range(11), closed=closed)
|
||||
|
||||
def create_index_with_nan(self, closed="right"):
|
||||
mask = [True, False] + [True] * 8
|
||||
return IntervalIndex.from_arrays(
|
||||
np.where(mask, np.arange(10), np.nan),
|
||||
np.where(mask, np.arange(1, 11), np.nan),
|
||||
closed=closed,
|
||||
)
|
||||
|
||||
def test_properties(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
assert len(index) == 10
|
||||
assert index.size == 10
|
||||
assert index.shape == (10,)
|
||||
|
||||
tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
|
||||
tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
|
||||
tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
|
||||
|
||||
assert index.closed == closed
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed)
|
||||
for left, right in zip(range(10), range(1, 11))
|
||||
]
|
||||
expected = np.array(ivs, dtype=object)
|
||||
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
||||
|
||||
# with nans
|
||||
index = self.create_index_with_nan(closed=closed)
|
||||
assert len(index) == 10
|
||||
assert index.size == 10
|
||||
assert index.shape == (10,)
|
||||
|
||||
expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
|
||||
expected_right = expected_left + 1
|
||||
expected_mid = expected_left + 0.5
|
||||
tm.assert_index_equal(index.left, expected_left)
|
||||
tm.assert_index_equal(index.right, expected_right)
|
||||
tm.assert_index_equal(index.mid, expected_mid)
|
||||
|
||||
assert index.closed == closed
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed) if notna(left) else np.nan
|
||||
for left, right in zip(expected_left, expected_right)
|
||||
]
|
||||
expected = np.array(ivs, dtype=object)
|
||||
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
|
||||
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
|
||||
date_range("2017-01-01", "2017-01-04"),
|
||||
pytest.param(
|
||||
date_range("2017-01-01", "2017-01-04", unit="s"),
|
||||
marks=pytest.mark.xfail(reason="mismatched result unit"),
|
||||
),
|
||||
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]),
|
||||
],
|
||||
)
|
||||
def test_length(self, closed, breaks):
|
||||
# GH 18789
|
||||
index = IntervalIndex.from_breaks(breaks, closed=closed)
|
||||
result = index.length
|
||||
expected = Index(iv.length for iv in index)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# with NA
|
||||
index = index.insert(1, np.nan)
|
||||
result = index.length
|
||||
expected = Index(iv.length if notna(iv) else iv for iv in index)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_with_nans(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
assert index.hasnans is False
|
||||
|
||||
result = index.isna()
|
||||
expected = np.zeros(len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.notna()
|
||||
expected = np.ones(len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
index = self.create_index_with_nan(closed=closed)
|
||||
assert index.hasnans is True
|
||||
|
||||
result = index.isna()
|
||||
expected = np.array([False, True] + [False] * (len(index) - 2))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.notna()
|
||||
expected = np.array([True, False] + [True] * (len(index) - 2))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_copy(self, closed):
|
||||
expected = self.create_index(closed=closed)
|
||||
|
||||
result = expected.copy()
|
||||
assert result.equals(expected)
|
||||
|
||||
result = expected.copy(deep=True)
|
||||
assert result.equals(expected)
|
||||
assert result.left is not expected.left
|
||||
|
||||
def test_ensure_copied_data(self, closed):
|
||||
# exercise the copy flag in the constructor
|
||||
|
||||
# not copying
|
||||
index = self.create_index(closed=closed)
|
||||
result = IntervalIndex(index, copy=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.left.values, result.left.values, check_same="same"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.right.values, result.right.values, check_same="same"
|
||||
)
|
||||
|
||||
# by-definition make a copy
|
||||
result = IntervalIndex(np.array(index), copy=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.left.values, result.left.values, check_same="copy"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.right.values, result.right.values, check_same="copy"
|
||||
)
|
||||
|
||||
def test_delete(self, closed):
|
||||
breaks = np.arange(1, 11, dtype=np.int64)
|
||||
expected = IntervalIndex.from_breaks(breaks, closed=closed)
|
||||
result = self.create_index(closed=closed).delete(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
interval_range(0, periods=10, closed="neither"),
|
||||
interval_range(1.7, periods=8, freq=2.5, closed="both"),
|
||||
interval_range(Timestamp("20170101"), periods=12, closed="left"),
|
||||
interval_range(Timedelta("1 day"), periods=6, closed="right"),
|
||||
],
|
||||
)
|
||||
def test_insert(self, data):
|
||||
item = data[0]
|
||||
idx_item = IntervalIndex([item])
|
||||
|
||||
# start
|
||||
expected = idx_item.append(data)
|
||||
result = data.insert(0, item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# end
|
||||
expected = data.append(idx_item)
|
||||
result = data.insert(len(data), item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mid
|
||||
expected = data[:3].append(idx_item).append(data[3:])
|
||||
result = data.insert(3, item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# invalid type
|
||||
res = data.insert(1, "foo")
|
||||
expected = data.astype(object).insert(1, "foo")
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data._data.insert(1, "foo")
|
||||
|
||||
# invalid closed
|
||||
msg = "'value.closed' is 'left', expected 'right'."
|
||||
for closed in {"left", "right", "both", "neither"} - {item.closed}:
|
||||
msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
|
||||
bad_item = Interval(item.left, item.right, closed=closed)
|
||||
res = data.insert(1, bad_item)
|
||||
expected = data.astype(object).insert(1, bad_item)
|
||||
tm.assert_index_equal(res, expected)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data._data.insert(1, bad_item)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
na_idx = IntervalIndex([np.nan], closed=data.closed)
|
||||
for na in [np.nan, None, pd.NA]:
|
||||
expected = data[:1].append(na_idx).append(data[1:])
|
||||
result = data.insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if data.left.dtype.kind not in ["m", "M"]:
|
||||
# trying to insert pd.NaT into a numeric-dtyped Index should cast
|
||||
expected = data.astype(object).insert(1, pd.NaT)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data._data.insert(1, pd.NaT)
|
||||
|
||||
result = data.insert(1, pd.NaT)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_unique_interval(self, closed):
|
||||
"""
|
||||
Interval specific tests for is_unique in addition to base class tests
|
||||
"""
|
||||
# unique overlapping - distinct endpoints
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique overlapping - shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique nested
|
||||
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique NaN
|
||||
idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# non-unique NaN
|
||||
idx = IntervalIndex.from_tuples(
|
||||
[(np.nan, np.nan), (np.nan, np.nan)], closed=closed
|
||||
)
|
||||
assert idx.is_unique is False
|
||||
|
||||
def test_monotonic(self, closed):
|
||||
# increasing non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# unordered non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# increasing overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing overlapping
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# unordered overlapping
|
||||
idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# increasing overlapping shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing overlapping shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# stationary
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
idx = IntervalIndex([], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
def test_is_monotonic_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
|
||||
assert not index.is_monotonic_increasing
|
||||
assert not index._is_strictly_monotonic_increasing
|
||||
assert not index.is_monotonic_increasing
|
||||
assert not index._is_strictly_monotonic_decreasing
|
||||
assert not index.is_monotonic_decreasing
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_maybe_convert_i8(self, breaks):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
# intervalindex
|
||||
result = index._maybe_convert_i8(index)
|
||||
expected = IntervalIndex.from_breaks(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# interval
|
||||
interval = Interval(breaks[0], breaks[1])
|
||||
result = index._maybe_convert_i8(interval)
|
||||
expected = Interval(breaks[0]._value, breaks[1]._value)
|
||||
assert result == expected
|
||||
|
||||
# datetimelike index
|
||||
result = index._maybe_convert_i8(breaks)
|
||||
expected = Index(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# datetimelike scalar
|
||||
result = index._maybe_convert_i8(breaks[0])
|
||||
expected = breaks[0]._value
|
||||
assert result == expected
|
||||
|
||||
# list-like of datetimelike scalars
|
||||
result = index._maybe_convert_i8(list(breaks))
|
||||
expected = Index(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
|
||||
)
|
||||
def test_maybe_convert_i8_nat(self, breaks):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns")
|
||||
expected = Index([np.nan] * 3, dtype=np.float64)
|
||||
result = index._maybe_convert_i8(to_convert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
to_convert = to_convert.insert(0, breaks[0])
|
||||
expected = expected.insert(0, float(breaks[0]._value))
|
||||
result = index._maybe_convert_i8(to_convert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[lambda breaks: breaks, list],
|
||||
ids=["lambda", "list"],
|
||||
)
|
||||
def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
|
||||
# GH 20636
|
||||
breaks = np.arange(5, dtype=any_real_numpy_dtype)
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
key = make_key(breaks)
|
||||
|
||||
result = index._maybe_convert_i8(key)
|
||||
kind = breaks.dtype.kind
|
||||
expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
|
||||
expected = Index(key, dtype=expected_dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[
|
||||
IntervalIndex.from_breaks,
|
||||
lambda breaks: Interval(breaks[0], breaks[1]),
|
||||
lambda breaks: breaks[0],
|
||||
],
|
||||
ids=["IntervalIndex", "Interval", "scalar"],
|
||||
)
|
||||
def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
|
||||
# GH 20636
|
||||
breaks = np.arange(5, dtype=any_real_numpy_dtype)
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
key = make_key(breaks)
|
||||
|
||||
# test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
|
||||
result = index._maybe_convert_i8(key)
|
||||
assert result is key
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks1, breaks2",
|
||||
permutations(
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
2,
|
||||
),
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[
|
||||
IntervalIndex.from_breaks,
|
||||
lambda breaks: Interval(breaks[0], breaks[1]),
|
||||
lambda breaks: breaks,
|
||||
lambda breaks: breaks[0],
|
||||
list,
|
||||
],
|
||||
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
|
||||
)
|
||||
def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks1)
|
||||
key = make_key(breaks2)
|
||||
|
||||
msg = (
|
||||
f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
|
||||
f"values of dtype {breaks2.dtype}"
|
||||
)
|
||||
msg = re.escape(msg)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index._maybe_convert_i8(key)
|
||||
|
||||
def test_contains_method(self):
|
||||
# can select values that are IN the range of a value
|
||||
i = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
|
||||
expected = np.array([False, False], dtype="bool")
|
||||
actual = i.contains(0)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = i.contains(3)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
expected = np.array([True, False], dtype="bool")
|
||||
actual = i.contains(0.5)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = i.contains(1)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
# __contains__ not implemented for "interval in interval", follow
|
||||
# that for the contains method for now
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="contains not implemented for two"
|
||||
):
|
||||
i.contains(Interval(0, 1))
|
||||
|
||||
def test_dropna(self, closed):
|
||||
expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
|
||||
|
||||
ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
|
||||
result = ii.dropna()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
|
||||
result = ii.dropna()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_non_contiguous(self, closed):
|
||||
index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
target = [0.5, 1.5, 2.5]
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([0, -1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
assert 1.5 not in index
|
||||
|
||||
def test_isin(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
expected = np.array([True] + [False] * (len(index) - 1))
|
||||
result = index.isin(index[:1])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin([index[0]])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
|
||||
expected = np.array([True] * (len(index) - 1) + [False])
|
||||
result = index.isin(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin(other.tolist())
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for other_closed in ["right", "left", "both", "neither"]:
|
||||
other = self.create_index(closed=other_closed)
|
||||
expected = np.repeat(closed == other_closed, len(index))
|
||||
result = index.isin(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin(other.tolist())
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_comparison(self):
|
||||
actual = Interval(0, 1) < self.index
|
||||
expected = np.array([False, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = Interval(0.5, 1.5) < self.index
|
||||
expected = np.array([False, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index > Interval(0.5, 1.5)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == self.index
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index <= self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index >= self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index < self.index
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index > self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index.values == self.index
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index <= self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index != self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index > self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index.values > self.index
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
|
||||
# invalid comparisons
|
||||
actual = self.index == 0
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index == self.index.left
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'int' and '.*.Interval'",
|
||||
r"Invalid comparison between dtype=interval\[int64, right\] and ",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index > 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index <= 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index > np.arange(2)
|
||||
|
||||
msg = "Lengths must match to compare"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.index > np.arange(3)
|
||||
|
||||
def test_missing_values(self, closed):
|
||||
idx = Index(
|
||||
[np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
|
||||
)
|
||||
idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
|
||||
assert idx.equals(idx2)
|
||||
|
||||
msg = (
|
||||
"missing values must be missing in the same location both left "
|
||||
"and right sides"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(
|
||||
[np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
|
||||
|
||||
def test_sort_values(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
result = index.sort_values()
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.sort_values(ascending=False)
|
||||
tm.assert_index_equal(result, index[::-1])
|
||||
|
||||
# with nan
|
||||
index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
|
||||
|
||||
result = index.sort_values()
|
||||
expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.sort_values(ascending=False, na_position="first")
|
||||
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
def test_datetime(self, tz):
|
||||
start = Timestamp("2000-01-01", tz=tz)
|
||||
dates = date_range(start=start, periods=10)
|
||||
index = IntervalIndex.from_breaks(dates)
|
||||
|
||||
# test mid
|
||||
start = Timestamp("2000-01-01T12:00", tz=tz)
|
||||
expected = date_range(start=start, periods=9)
|
||||
tm.assert_index_equal(index.mid, expected)
|
||||
|
||||
# __contains__ doesn't check individual points
|
||||
assert Timestamp("2000-01-01", tz=tz) not in index
|
||||
assert Timestamp("2000-01-01T12", tz=tz) not in index
|
||||
assert Timestamp("2000-01-02", tz=tz) not in index
|
||||
iv_true = Interval(
|
||||
Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
|
||||
)
|
||||
iv_false = Interval(
|
||||
Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
|
||||
)
|
||||
assert iv_true in index
|
||||
assert iv_false not in index
|
||||
|
||||
# .contains does check individual points
|
||||
assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
|
||||
assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
|
||||
assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
|
||||
|
||||
# test get_indexer
|
||||
start = Timestamp("1999-12-31T12:00", tz=tz)
|
||||
target = date_range(start=start, periods=7, freq="12h")
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
start = Timestamp("2000-01-08T18:00", tz=tz)
|
||||
target = date_range(start=start, periods=7, freq="6h")
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
def test_append(self, closed):
|
||||
index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
|
||||
index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
|
||||
|
||||
result = index1.append(index2)
|
||||
expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index1.append([index1, index2])
|
||||
expected = IntervalIndex.from_arrays(
|
||||
[0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
||||
index_other_closed = IntervalIndex.from_arrays(
|
||||
[0, 1], [1, 2], closed=other_closed
|
||||
)
|
||||
result = index1.append(index_other_closed)
|
||||
expected = index1.astype(object).append(index_other_closed.astype(object))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_non_overlapping_monotonic(self, closed):
|
||||
# Should be True in all cases
|
||||
tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
# Should be False in all cases (overlapping)
|
||||
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
# Should be False in all cases (non-monotonic)
|
||||
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
# Should be False for closed='both', otherwise True (GH16560)
|
||||
if closed == "both":
|
||||
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
else:
|
||||
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, shift, na_value",
|
||||
[
|
||||
(0, 1, np.nan),
|
||||
(Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
|
||||
(Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping(self, start, shift, na_value, closed):
|
||||
# GH 23309
|
||||
# see test_interval_tree.py for extensive tests; interface tests here
|
||||
|
||||
# non-overlapping
|
||||
tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is False
|
||||
|
||||
# non-overlapping with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is False
|
||||
|
||||
# overlapping
|
||||
tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is True
|
||||
|
||||
# overlapping with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is True
|
||||
|
||||
# common endpoints
|
||||
tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
# common endpoints with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
# intervals with duplicate left values
|
||||
a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
|
||||
b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
|
||||
index = IntervalIndex.from_arrays(a, b, closed="right")
|
||||
result = index.is_overlapping
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
list(zip(range(10), range(1, 11))),
|
||||
list(
|
||||
zip(
|
||||
date_range("20170101", periods=10),
|
||||
date_range("20170101", periods=10),
|
||||
)
|
||||
),
|
||||
list(
|
||||
zip(
|
||||
timedelta_range("0 days", periods=10),
|
||||
timedelta_range("1 day", periods=10),
|
||||
)
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_tuples(self, tuples):
|
||||
# GH 18756
|
||||
idx = IntervalIndex.from_tuples(tuples)
|
||||
result = idx.to_tuples()
|
||||
expected = Index(com.asarray_tuplesafe(tuples))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
list(zip(range(10), range(1, 11))) + [np.nan],
|
||||
list(
|
||||
zip(
|
||||
date_range("20170101", periods=10),
|
||||
date_range("20170101", periods=10),
|
||||
)
|
||||
)
|
||||
+ [np.nan],
|
||||
list(
|
||||
zip(
|
||||
timedelta_range("0 days", periods=10),
|
||||
timedelta_range("1 day", periods=10),
|
||||
)
|
||||
)
|
||||
+ [np.nan],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("na_tuple", [True, False])
|
||||
def test_to_tuples_na(self, tuples, na_tuple):
|
||||
# GH 18756
|
||||
idx = IntervalIndex.from_tuples(tuples)
|
||||
result = idx.to_tuples(na_tuple=na_tuple)
|
||||
|
||||
# check the non-NA portion
|
||||
expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
|
||||
result_notna = result[:-1]
|
||||
tm.assert_index_equal(result_notna, expected_notna)
|
||||
|
||||
# check the NA portion
|
||||
result_na = result[-1]
|
||||
if na_tuple:
|
||||
assert isinstance(result_na, tuple)
|
||||
assert len(result_na) == 2
|
||||
assert all(isna(x) for x in result_na)
|
||||
else:
|
||||
assert isna(result_na)
|
||||
|
||||
def test_nbytes(self):
|
||||
# GH 19209
|
||||
left = np.arange(0, 4, dtype="i8")
|
||||
right = np.arange(1, 5, dtype="i8")
|
||||
|
||||
result = IntervalIndex.from_arrays(left, right).nbytes
|
||||
expected = 64 # 4 * 8 * 2
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
||||
def test_set_closed(self, name, closed, new_closed):
|
||||
# GH 21670
|
||||
index = interval_range(0, 5, closed=closed, name=name)
|
||||
result = index.set_closed(new_closed)
|
||||
expected = interval_range(0, 5, closed=new_closed, name=name)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
|
||||
def test_set_closed_errors(self, bad_closed):
|
||||
# GH 21670
|
||||
index = interval_range(0, 5)
|
||||
msg = f"invalid option for 'closed': {bad_closed}"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.set_closed(bad_closed)
|
||||
|
||||
def test_is_all_dates(self):
|
||||
# GH 23576
|
||||
year_2017 = Interval(
|
||||
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
|
||||
)
|
||||
year_2017_index = IntervalIndex([year_2017])
|
||||
assert not year_2017_index._is_all_dates
|
||||
|
||||
|
||||
def test_dir():
|
||||
# GH#27571 dir(interval_index) should not raise
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
result = dir(index)
|
||||
assert "str" not in result
|
||||
|
||||
|
||||
def test_searchsorted_different_argument_classes(listlike_box):
|
||||
# https://github.com/pandas-dev/pandas/issues/32762
|
||||
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
||||
result = values.searchsorted(listlike_box(values))
|
||||
expected = np.array([0, 1], dtype=result.dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = values._data.searchsorted(listlike_box(values))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
|
||||
)
|
||||
def test_searchsorted_invalid_argument(arg):
|
||||
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
||||
msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
values.searchsorted(arg)
|
@ -0,0 +1,369 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
from pandas import (
|
||||
DateOffset,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Day
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalRange:
|
||||
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
|
||||
def test_constructor_numeric(self, closed, name, freq, periods):
|
||||
start, end = 0, 100
|
||||
breaks = np.arange(101, step=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)]
|
||||
)
|
||||
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
|
||||
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
|
||||
breaks = date_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
if not breaks.freq.n == 1 and tz is None:
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)]
|
||||
)
|
||||
def test_constructor_timedelta(self, closed, name, freq, periods):
|
||||
start, end = Timedelta("0 days"), Timedelta("100 days")
|
||||
breaks = timedelta_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, expected_endpoint",
|
||||
[
|
||||
(0, 10, 3, 9),
|
||||
(0, 10, 1.5, 9),
|
||||
(0.5, 10, 3, 9.5),
|
||||
(Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")),
|
||||
(
|
||||
Timestamp("2018-01-01"),
|
||||
Timestamp("2018-02-09"),
|
||||
"MS",
|
||||
Timestamp("2018-02-01"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-01-01", tz="US/Eastern"),
|
||||
Timestamp("2018-01-20", tz="US/Eastern"),
|
||||
"5D12h",
|
||||
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_early_truncation(self, start, end, freq, expected_endpoint):
|
||||
# index truncates early if freq causes end to be skipped
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
result_endpoint = result.right[-1]
|
||||
assert result_endpoint == expected_endpoint
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq",
|
||||
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
|
||||
)
|
||||
def test_no_invalid_float_truncation(self, start, end, freq):
|
||||
# GH 21161
|
||||
if freq is None:
|
||||
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
|
||||
else:
|
||||
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
result = interval_range(start=start, end=end, periods=4, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, mid, end",
|
||||
[
|
||||
(
|
||||
Timestamp("2018-03-10", tz="US/Eastern"),
|
||||
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-03-12", tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-11-03", tz="US/Eastern"),
|
||||
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-11-05", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_linspace_dst_transition(self, start, mid, end):
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
# accounts for the hour gained/lost during DST transition
|
||||
start = start.as_unit("ns")
|
||||
mid = mid.as_unit("ns")
|
||||
end = end.as_unit("ns")
|
||||
result = interval_range(start=start, end=end, periods=2)
|
||||
expected = IntervalIndex.from_breaks([start, mid, end])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", [2, 2.0])
|
||||
@pytest.mark.parametrize("end", [10, 10.0])
|
||||
@pytest.mark.parametrize("start", [0, 0.0])
|
||||
def test_float_subtype(self, start, end, freq):
|
||||
# Has float subtype if any of start/end/freq are float, even if all
|
||||
# resulting endpoints can safely be upcast to integers
|
||||
|
||||
# defined from start/end/freq
|
||||
index = interval_range(start=start, end=end, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from start/periods/freq
|
||||
index = interval_range(start=start, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from end/periods/freq
|
||||
index = interval_range(end=end, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
index = interval_range(start=start, end=end, periods=5)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end) else "float64"
|
||||
assert result == expected
|
||||
|
||||
def test_interval_range_fractional_period(self):
|
||||
# float value for periods
|
||||
expected = interval_range(start=0, periods=10)
|
||||
msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = interval_range(start=0, periods=10.5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_coverage(self):
|
||||
# equivalent timestamp-like start/end
|
||||
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timestamp
|
||||
equiv_freq = [
|
||||
"D",
|
||||
Day(),
|
||||
Timedelta(days=1),
|
||||
timedelta(days=1),
|
||||
DateOffset(days=1),
|
||||
]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent timedelta-like start/end
|
||||
start, end = Timedelta(days=1), Timedelta(days=10)
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timedelta
|
||||
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_errors(self):
|
||||
# not enough params
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, "
|
||||
"exactly three must be specified"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=5)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(periods=2)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range()
|
||||
|
||||
# too many params
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=5, periods=6, freq=1.5)
|
||||
|
||||
# mixed units
|
||||
msg = "start, end, freq need to be type compatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timestamp("20130101"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timedelta("1 day"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
|
||||
|
||||
# invalid periods
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, periods="foo")
|
||||
|
||||
# invalid start
|
||||
msg = "start must be numeric or datetime-like, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start="foo", periods=10)
|
||||
|
||||
# invalid end
|
||||
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Interval(0, 1), periods=10)
|
||||
|
||||
# invalid freq for datetime-like
|
||||
msg = "freq must be numeric or convertible to DateOffset, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
|
||||
|
||||
# mixed tz
|
||||
start = Timestamp("2017-01-01", tz="US/Eastern")
|
||||
end = Timestamp("2017-01-07", tz="US/Pacific")
|
||||
msg = "Start and end cannot both be tz-aware with different timezones"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=start, end=end)
|
||||
|
||||
def test_float_freq(self):
|
||||
# GH 54477
|
||||
result = interval_range(0, 1, freq=0.1)
|
||||
expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(0, 1, freq=0.6)
|
||||
expected = IntervalIndex.from_breaks([0, 0.6])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,208 @@
|
||||
from itertools import permutations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.interval import IntervalTree
|
||||
from pandas.compat import IS64
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def skipif_32bit(param):
|
||||
"""
|
||||
Skip parameters in a parametrize on 32bit systems. Specifically used
|
||||
here to skip leaf_size parameters related to GH 23440.
|
||||
"""
|
||||
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
|
||||
return pytest.param(param, marks=marks)
|
||||
|
||||
|
||||
@pytest.fixture(params=["int64", "float64", "uint64"])
|
||||
def dtype(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
|
||||
def leaf_size(request):
|
||||
"""
|
||||
Fixture to specify IntervalTree leaf_size parameter; to be used with the
|
||||
tree fixture.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.arange(5, dtype="int64"),
|
||||
np.arange(5, dtype="uint64"),
|
||||
np.arange(5, dtype="float64"),
|
||||
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
|
||||
]
|
||||
)
|
||||
def tree(request, leaf_size):
|
||||
left = request.param
|
||||
return IntervalTree(left, left + 2, leaf_size=leaf_size)
|
||||
|
||||
|
||||
class TestIntervalTree:
|
||||
def test_get_indexer(self, tree):
|
||||
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
|
||||
expected = np.array([0, 4, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([3.0]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, target_value, target_dtype",
|
||||
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
||||
)
|
||||
def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
|
||||
left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_non_unique(self, tree):
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
|
||||
|
||||
result = indexer[:1]
|
||||
expected = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[1:3])
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[3:])
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, target_value, target_dtype",
|
||||
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
||||
)
|
||||
def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
|
||||
left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
|
||||
tree = IntervalTree(left, right)
|
||||
target = np.array([target_value], dtype=target_dtype)
|
||||
|
||||
result_indexer, result_missing = tree.get_indexer_non_unique(target)
|
||||
expected_indexer = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
|
||||
expected_missing = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
def test_duplicates(self, dtype):
|
||||
left = np.array([0, 0, 0], dtype=dtype)
|
||||
tree = IntervalTree(left, left + 1)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([0.5]))
|
||||
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
|
||||
result = np.sort(indexer)
|
||||
expected = np.array([0, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
|
||||
)
|
||||
def test_get_indexer_closed(self, closed, leaf_size):
|
||||
x = np.arange(1000, dtype="float64")
|
||||
found = x.astype("intp")
|
||||
not_found = (-1 * np.ones(1000)).astype("intp")
|
||||
|
||||
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
|
||||
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
|
||||
|
||||
expected = found if tree.closed_left else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
|
||||
|
||||
expected = found if tree.closed_right else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
(np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
|
||||
(np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
|
||||
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
|
||||
(np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
|
||||
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
||||
def test_is_overlapping(self, closed, order, left, right, expected):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
||||
def test_is_overlapping_endpoints(self, closed, order):
|
||||
"""shared endpoints are marked as overlapping"""
|
||||
# GH 23309
|
||||
left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right",
|
||||
[
|
||||
(np.array([], dtype="int64"), np.array([], dtype="int64")),
|
||||
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
|
||||
(np.array([np.nan]), np.array([np.nan])),
|
||||
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping_trivial(self, closed, left, right):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left, right, closed=closed)
|
||||
assert tree.is_overlapping is False
|
||||
|
||||
@pytest.mark.skipif(not IS64, reason="GH 23440")
|
||||
def test_construction_overflow(self):
|
||||
# GH 25485
|
||||
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
# pivot should be average of left/right medians
|
||||
result = tree.root.pivot
|
||||
expected = (50 + np.iinfo(np.int64).max) / 2
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
([-np.inf, 1.0], [1.0, 2.0], 0.0),
|
||||
([-np.inf, -2.0], [-2.0, -1.0], -2.0),
|
||||
([-2.0, -1.0], [-1.0, np.inf], 0.0),
|
||||
([1.0, 2.0], [2.0, np.inf], 2.0),
|
||||
],
|
||||
)
|
||||
def test_inf_bound_infinite_recursion(self, left, right, expected):
|
||||
# GH 46658
|
||||
|
||||
tree = IntervalTree(left * 101, right * 101)
|
||||
|
||||
result = tree.root.pivot
|
||||
assert result == expected
|
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def range_index():
|
||||
return RangeIndex(3, name="range_index")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def interval_index():
|
||||
return IntervalIndex.from_tuples(
|
||||
[(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
|
||||
)
|
||||
|
||||
|
||||
def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
|
||||
# GH-45661
|
||||
multi_index = MultiIndex.from_product([interval_index, range_index])
|
||||
result = multi_index.join(interval_index)
|
||||
|
||||
tm.assert_index_equal(result, multi_index)
|
||||
|
||||
|
||||
def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
|
||||
# GH-45661
|
||||
multi_index = MultiIndex.from_product([interval_index, range_index])
|
||||
result = interval_index.join(multi_index)
|
||||
|
||||
tm.assert_index_equal(result, multi_index)
|
||||
|
||||
|
||||
def test_join_overlapping_interval_to_another_intervalindex(interval_index):
|
||||
# GH-45661
|
||||
flipped_interval_index = interval_index[::-1]
|
||||
result = interval_index.join(flipped_interval_index)
|
||||
|
||||
tm.assert_index_equal(result, interval_index)
|
@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
|
||||
from pandas import IntervalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestPickle:
|
||||
@pytest.mark.parametrize("closed", ["left", "right", "both"])
|
||||
def test_pickle_round_trip_closed(self, closed):
|
||||
# https://github.com/pandas-dev/pandas/issues/35658
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
|
||||
result = tm.round_trip_pickle(idx)
|
||||
tm.assert_index_equal(result, idx)
|
@ -0,0 +1,208 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def monotonic_index(start, end, dtype="int64", closed="right"):
|
||||
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
def empty_index(dtype="int64", closed="right"):
|
||||
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
def test_union(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(0, 13, closed=closed)
|
||||
result = index[::-1].union(other, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
result = other[::-1].union(index, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
tm.assert_index_equal(index.union(index, sort=sort), index)
|
||||
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
|
||||
|
||||
def test_union_empty_result(self, closed, sort):
|
||||
# GH 19101: empty result, same dtype
|
||||
index = empty_index(dtype="int64", closed=closed)
|
||||
result = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
|
||||
other = empty_index(dtype="float64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
expected = other
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = empty_index(dtype="uint64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = other.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(5, 11, closed=closed)
|
||||
result = index[::-1].intersection(other, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
result = other[::-1].intersection(index, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
tm.assert_index_equal(index.intersection(index, sort=sort), index)
|
||||
|
||||
# GH 26225: nested intervals
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225
|
||||
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(0, 2)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225: duplicate nan element
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
expected = IntervalIndex([np.nan])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_empty_result(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
other = monotonic_index(300, 314, closed=closed)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different numeric dtypes -> common dtype is float64
|
||||
other = monotonic_index(300, 314, dtype="float64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
expected = other[:0]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = monotonic_index(300, 314, dtype="uint64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_duplicates(self):
|
||||
# GH#38743
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference(self, closed, sort):
|
||||
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
|
||||
result = index.difference(index[:1], sort=sort)
|
||||
expected = index[1:]
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.difference(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
result = index[1:].symmetric_difference(index[:-1], sort=sort)
|
||||
expected = IntervalIndex([index[0], index[-1]])
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.symmetric_difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.symmetric_difference(other, sort=sort)
|
||||
expected = empty_index(dtype="float64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_incompatible_types(self, closed, op_name, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
set_op = getattr(index, op_name)
|
||||
|
||||
# TODO: standardize return type of non-union setops type(self vs other)
|
||||
# non-IntervalIndex
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
else:
|
||||
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
|
||||
result = set_op(Index([1, 2, 3]), sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mixed closed -> cast to object
|
||||
for other_closed in {"right", "left", "both", "neither"} - {closed}:
|
||||
other = monotonic_index(0, 11, closed=other_closed)
|
||||
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
result = set_op(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19016: incompatible dtypes -> cast to object
|
||||
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
|
||||
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
result = set_op(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
Reference in New Issue
Block a user