forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,139 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Fixture returning boolean array with valid and missing values."""
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def left_array():
|
||||
"""Fixture returning boolean array with valid and missing values."""
|
||||
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def right_array():
|
||||
"""Fixture returning boolean array with valid and missing values."""
|
||||
return pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[
|
||||
("add", [True, True, None, True, False, None, None, None, None]),
|
||||
("mul", [True, False, None, False, False, None, None, None, None]),
|
||||
],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(left_array, right_array, opname, exp):
|
||||
op = getattr(operator, opname)
|
||||
result = op(left_array, right_array)
|
||||
expected = pd.array(exp, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(left_array, right_array):
|
||||
msg = (
|
||||
r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), "
|
||||
r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left_array - right_array
|
||||
|
||||
|
||||
def test_div(left_array, right_array):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
# check that we are matching the non-masked Series behavior
|
||||
pd.Series(left_array._data) / pd.Series(right_array._data)
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
left_array / right_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname",
|
||||
[
|
||||
"floordiv",
|
||||
"mod",
|
||||
"pow",
|
||||
],
|
||||
)
|
||||
def test_op_int8(left_array, right_array, opname):
|
||||
op = getattr(operator, opname)
|
||||
if opname != "mod":
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
result = op(left_array, right_array)
|
||||
return
|
||||
result = op(left_array, right_array)
|
||||
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||||
# invalid ops
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||||
else:
|
||||
err = TypeError
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
# invalid scalars
|
||||
msg = (
|
||||
"did not contain a loop with signature matching types|"
|
||||
"BooleanArray cannot perform the operation|"
|
||||
"not supported for the input types, and the inputs could not be safely coerced "
|
||||
"to any supported types according to the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops("foo")
|
||||
msg = "|".join(
|
||||
[
|
||||
r"unsupported operand type\(s\) for",
|
||||
"Concatenation operation is not implemented for NumPy arrays",
|
||||
"has no kernel",
|
||||
]
|
||||
)
|
||||
with pytest.raises(err, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
if op not in ("__mul__", "__rmul__"):
|
||||
# TODO(extension) numpy's mul with object array sees booleans as numbers
|
||||
msg = "|".join(
|
||||
[
|
||||
r"unsupported operand type\(s\) for",
|
||||
"can only concatenate str",
|
||||
"not all arguments converted during string formatting",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(err, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype():
|
||||
# with missing values
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert NA to integer"):
|
||||
arr.astype("int64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert float NaN to"):
|
||||
arr.astype("bool")
|
||||
|
||||
result = arr.astype("float64")
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("str")
|
||||
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.astype("int64")
|
||||
expected = np.array([1, 0, 1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_boolean_array():
|
||||
# astype to BooleanArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("boolean")
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype(pd.BooleanDtype())
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
|
||||
|
||||
def test_astype_to_integer_array():
|
||||
# astype to IntegerArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("Int64")
|
||||
expected = pd.array([1, 0, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,60 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.tests.arrays.masked_shared import ComparisonOps
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Fixture returning boolean array with valid and missing data"""
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""Fixture returning BooleanDtype"""
|
||||
return pd.BooleanDtype()
|
||||
|
||||
|
||||
class TestComparisonOps(ComparisonOps):
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
self._compare_other(data, comparison_op, True)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
other = pd.array([True] * len(data), dtype="boolean")
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = np.array([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = pd.Series([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_array(self, comparison_op):
|
||||
op = comparison_op
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
result = op(a, b)
|
||||
|
||||
values = op(a._data, b._data)
|
||||
mask = a._mask | b._mask
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
result[0] = None
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
@ -0,0 +1,325 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.arrays.boolean import coerce_to_array
|
||||
|
||||
|
||||
def test_boolean_array_constructor():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.astype(int), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, None)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values.reshape(1, -1), mask)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values, mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_boolean_array_constructor_copy():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = BooleanArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
def test_to_boolean_array():
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, False])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, True], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True]), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, True])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_all_none():
|
||||
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
|
||||
|
||||
result = pd.array([None, None, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
|
||||
([True, np.nan], [True, None]),
|
||||
([True, pd.NA], [True, None]),
|
||||
([np.nan, np.nan], [None, None]),
|
||||
(np.array([np.nan, np.nan], dtype=float), [None, None]),
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_missing_indicators(a, b):
|
||||
result = pd.array(a, dtype="boolean")
|
||||
expected = pd.array(b, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
["1", "2"],
|
||||
# "foo",
|
||||
[1, 2],
|
||||
[1.0, 2.0],
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
np.array([1, 2]),
|
||||
np.array([1.0, 2.0]),
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_error(values):
|
||||
# error in converting existing arrays to BooleanArray
|
||||
msg = "Need to pass bool-like value"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(values, dtype="boolean")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_integer_array():
|
||||
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_float_array():
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_integer_like():
|
||||
# integers of 0's and 1's
|
||||
result = pd.array([1, 0, 1, 0], dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array([1, 0, 1, None], dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_coerce_to_array():
|
||||
# TODO this is currently not public API
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
# mixed missing from values and mask
|
||||
values = [True, False, None, False]
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True]), np.array([False, False, True, True])
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# raise errors for wrong dimension
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
# passing 2D values is OK as long as no mask
|
||||
coerce_to_array(values.reshape(1, -1))
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values.reshape(1, -1), mask=mask)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values, mask=mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_coerce_to_array_from_boolean_array():
|
||||
# passing BooleanArray to coerce_to_array
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
arr = BooleanArray(values, mask)
|
||||
result = BooleanArray(*coerce_to_array(arr))
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
# no copy
|
||||
assert result._data is arr._data
|
||||
assert result._mask is arr._mask
|
||||
|
||||
result = BooleanArray(*coerce_to_array(arr), copy=True)
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
assert result._data is not arr._data
|
||||
assert result._mask is not arr._mask
|
||||
|
||||
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
|
||||
coerce_to_array(arr, mask=mask)
|
||||
|
||||
|
||||
def test_coerce_to_numpy_array():
|
||||
# with missing values -> object dtype
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# also with no missing values -> object dtype
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# force bool dtype
|
||||
result = np.array(arr, dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
# with missing values will raise error
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
msg = (
|
||||
"cannot convert to 'bool'-dtype NumPy array with missing values. "
|
||||
"Specify an appropriate 'na_value' for this dtype."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.array(arr, dtype="bool")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings():
|
||||
result = BooleanArray._from_sequence_of_strings(
|
||||
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object),
|
||||
dtype="boolean",
|
||||
)
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True, False, False, False]),
|
||||
np.array([False, False, False, False, False, False, True]),
|
||||
)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings_invalid_string():
|
||||
with pytest.raises(ValueError, match="cannot be cast"):
|
||||
BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy(box):
|
||||
con = pd.Series if box else pd.array
|
||||
# default (with or without missing values) -> object dtype
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="str")
|
||||
expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values -> can convert to bool, otherwise raises
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
|
||||
# specify dtype and na_value
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=object, na_value=None)
|
||||
expected = np.array([True, False, None], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype=bool, na_value=False)
|
||||
expected = np.array([True, False, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="int64", na_value=-99)
|
||||
expected = np.array([1, 0, -99], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# converting to int or float without specifying na_value raises
|
||||
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
|
||||
arr.to_numpy(dtype="int64")
|
||||
|
||||
|
||||
def test_to_numpy_copy():
|
||||
# to_numpy can be zero-copy if no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(
|
||||
arr, pd.array([False, False, True], dtype="boolean")
|
||||
)
|
||||
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool, copy=True)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))
|
@ -0,0 +1,126 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor]
|
||||
)
|
||||
def test_ufuncs_binary(ufunc):
|
||||
# two BooleanArrays
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s, a)
|
||||
expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Boolean with numpy array
|
||||
arr = np.array([True, True, False])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a._data, arr), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# BooleanArray with scalar
|
||||
result = ufunc(a, True)
|
||||
expected = pd.array(ufunc(a._data, True), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(True, a)
|
||||
expected = pd.array(ufunc(True, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# not handled types
|
||||
msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ufunc(a, "test")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.logical_not])
|
||||
def test_ufuncs_unary(ufunc):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ufunc(ser)
|
||||
expected = pd.Series(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_numeric():
|
||||
# np.sqrt on np.bool_ returns float16, which we upcast to Float32
|
||||
# bc we do not have Float16
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
res = np.sqrt(arr)
|
||||
|
||||
expected = pd.array([1, 0, None], dtype="Float32")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[True, False], [True, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values, dtype="boolean")
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
if arr[-1] is pd.NA:
|
||||
expected = pd.NA
|
||||
else:
|
||||
expected = arr._data.sum()
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([True, False, pd.NA], dtype="boolean")
|
||||
result = arr.value_counts(dropna=False)
|
||||
expected = pd.Series([1, 1, 1], index=arr, dtype="Int64", name="count")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64", name="count")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64", name="proportion") / 2
|
||||
assert expected.index.dtype == "boolean"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_diff():
|
||||
a = pd.array(
|
||||
[True, True, False, False, True, None, True, None, False], dtype="boolean"
|
||||
)
|
||||
result = pd.core.algorithms.diff(a, 1)
|
||||
expected = pd.array(
|
||||
[None, False, True, False, True, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ser.diff()
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na", [None, np.nan, pd.NA])
|
||||
def test_setitem_missing_values(na):
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([True, None, None], dtype="boolean")
|
||||
arr[1] = na
|
||||
tm.assert_extension_array_equal(arr, expected)
|
@ -0,0 +1,254 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.ops.mask_ops import (
|
||||
kleene_and,
|
||||
kleene_or,
|
||||
kleene_xor,
|
||||
)
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
|
||||
|
||||
class TestLogicalOps(BaseOpsUtil):
|
||||
def test_numpy_scalars_ok(self, all_logical_operators):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
op = getattr(a, all_logical_operators)
|
||||
|
||||
tm.assert_extension_array_equal(op(True), op(np.bool_(True)))
|
||||
tm.assert_extension_array_equal(op(False), op(np.bool_(False)))
|
||||
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip("_")
|
||||
short_opname = short_opname if "xor" in short_opname else short_opname + "_"
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def test_empty_ok(self, all_logical_operators):
|
||||
a = pd.array([], dtype="boolean")
|
||||
op_name = all_logical_operators
|
||||
result = getattr(a, op_name)(True)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(False)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(pd.NA)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)]
|
||||
)
|
||||
def test_eq_mismatched_type(self, other):
|
||||
# GH-44499
|
||||
arr = pd.array([True, False])
|
||||
result = arr == other
|
||||
expected = pd.array([False, False])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr != other
|
||||
expected = pd.array([True, True])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_logical_length_mismatch_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Lengths must match"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)([True, False])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(np.array([True, False]))
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
|
||||
|
||||
def test_logical_nan_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Got float instead"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(a, op_name)(np.nan)
|
||||
|
||||
@pytest.mark.parametrize("other", ["a", 1])
|
||||
def test_non_bool_or_na_other_raises(self, other, all_logical_operators):
|
||||
a = pd.array([True, False], dtype="boolean")
|
||||
with pytest.raises(TypeError, match=str(type(other).__name__)):
|
||||
getattr(a, all_logical_operators)(other)
|
||||
|
||||
def test_kleene_or(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a | b
|
||||
expected = pd.array(
|
||||
[True, True, True, True, False, None, True, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [True, None, None]),
|
||||
(True, [True, True, True]),
|
||||
(np.bool_(True), [True, True, True]),
|
||||
(False, [True, False, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_or_scalar(self, other, expected):
|
||||
# TODO: test True & False
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a | other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_and(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a & b
|
||||
expected = pd.array(
|
||||
[True, False, None, False, False, False, None, False, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, False, None]),
|
||||
(True, [True, False, None]),
|
||||
(False, [False, False, False]),
|
||||
(np.bool_(True), [True, False, None]),
|
||||
(np.bool_(False), [False, False, False]),
|
||||
],
|
||||
)
|
||||
def test_kleene_and_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a & other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_xor(self):
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a ^ b
|
||||
expected = pd.array(
|
||||
[False, True, None, True, False, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, None, None]),
|
||||
(True, [False, True, None]),
|
||||
(np.bool_(True), [False, True, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_xor_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a ^ other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3])
|
||||
def test_no_masked_assumptions(self, other, all_logical_operators):
|
||||
# The logical operations should not assume that masked values are False!
|
||||
a = pd.arrays.BooleanArray(
|
||||
np.array([True, True, True, False, False, False, True, False, True]),
|
||||
np.array([False] * 6 + [True, True, True]),
|
||||
)
|
||||
b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
if isinstance(other, list):
|
||||
other = pd.array(other, dtype="boolean")
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
if isinstance(other, BooleanArray):
|
||||
other._data[other._mask] = True
|
||||
a._data[a._mask] = False
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
|
||||
def test_error_both_scalar(operation):
|
||||
msg = r"Either `left` or `right` need to be a np\.ndarray."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# masks need to be non-None, otherwise it ends up in an infinite recursion
|
||||
operation(True, True, np.zeros(1), np.zeros(1))
|
@ -0,0 +1,27 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestUnaryOps:
|
||||
def test_invert(self):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(~a, expected)
|
||||
|
||||
expected = pd.Series(expected, index=["a", "b", "c"], name="name")
|
||||
result = ~pd.Series(a, index=["a", "b", "c"], name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"])
|
||||
result = ~df
|
||||
expected = pd.DataFrame(
|
||||
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_abs(self):
|
||||
# matching numpy behavior, abs is the identity function
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = abs(arr)
|
||||
|
||||
tm.assert_extension_array_equal(result, arr)
|
@ -0,0 +1,62 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Fixture returning boolean array, with valid and missing values."""
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
|
||||
[
|
||||
([True, pd.NA], True, True, True, pd.NA),
|
||||
([False, pd.NA], False, False, pd.NA, False),
|
||||
([pd.NA], False, True, pd.NA, pd.NA),
|
||||
([], False, True, False, True),
|
||||
# GH-33253: all True / all False values buggy with skipna=False
|
||||
([True, True], True, True, True, True),
|
||||
([False, False], False, False, False, False),
|
||||
],
|
||||
)
|
||||
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
|
||||
# the methods return numpy scalars
|
||||
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
|
||||
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
|
||||
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
|
||||
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
|
||||
|
||||
for con in [pd.array, pd.Series]:
|
||||
a = con(values, dtype="boolean")
|
||||
assert a.any() is exp_any
|
||||
assert a.all() is exp_all
|
||||
assert a.any(skipna=False) is exp_any_noskip
|
||||
assert a.all(skipna=False) is exp_all_noskip
|
||||
|
||||
assert np.any(a.any()) is exp_any
|
||||
assert np.all(a.all()) is exp_all
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_reductions_return_types(dropna, data, all_numeric_reductions):
|
||||
op = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
if dropna:
|
||||
s = s.dropna()
|
||||
|
||||
if op in ("sum", "prod"):
|
||||
assert isinstance(getattr(s, op)(), np.int_)
|
||||
elif op == "count":
|
||||
# Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
|
||||
assert isinstance(getattr(s, op)(), np.integer)
|
||||
elif op in ("min", "max"):
|
||||
assert isinstance(getattr(s, op)(), np.bool_)
|
||||
else:
|
||||
# "mean", "std", "var", "median", "kurt", "skew"
|
||||
assert isinstance(getattr(s, op)(), np.float64)
|
@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_repr():
|
||||
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
|
||||
expected = " A\n0 True\n1 False\n2 <NA>"
|
||||
assert repr(df) == expected
|
||||
|
||||
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
|
||||
assert repr(df.A) == expected
|
||||
|
||||
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
|
||||
assert repr(df.A.array) == expected
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
@pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]])
|
||||
def test_factorize(categories, ordered):
|
||||
cat = pd.Categorical(
|
||||
["b", "b", "a", "c", None], categories=categories, ordered=ordered
|
||||
)
|
||||
codes, uniques = pd.factorize(cat)
|
||||
expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(
|
||||
["b", "a", "c"], categories=categories, ordered=ordered
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort():
|
||||
cat = pd.Categorical(["b", "b", None, "a"])
|
||||
codes, uniques = pd.factorize(cat, sort=True)
|
||||
expected_codes = np.array([1, 1, -1, 0], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(["a", "b"])
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort_ordered():
|
||||
cat = pd.Categorical(
|
||||
["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
codes, uniques = pd.factorize(cat, sort=True)
|
||||
expected_codes = np.array([0, 0, -1, 1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(
|
||||
["b", "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_isin_cats():
|
||||
# GH2003
|
||||
cat = pd.Categorical(["a", "b", np.nan])
|
||||
|
||||
result = cat.isin(["a", np.nan])
|
||||
expected = np.array([True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
result = cat.isin(["a", "c"])
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", [[""], [None, ""], [pd.NaT, ""]])
|
||||
def test_isin_cats_corner_cases(value):
|
||||
# GH36550
|
||||
cat = pd.Categorical([""])
|
||||
result = cat.isin(value)
|
||||
expected = np.array([True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
|
||||
def test_isin_empty(empty):
|
||||
s = pd.Categorical(["a", "b"])
|
||||
expected = np.array([False, False], dtype=bool)
|
||||
|
||||
result = s.isin(empty)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
def test_diff():
|
||||
ser = pd.Series([1, 2, 3], dtype="category")
|
||||
|
||||
msg = "Convert to a suitable dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.diff()
|
||||
|
||||
df = ser.to_frame(name="A")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.diff()
|
@ -0,0 +1,349 @@
|
||||
import re
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
NaT,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_scalar
|
||||
|
||||
|
||||
class TestCategoricalAnalytics:
|
||||
@pytest.mark.parametrize("aggregation", ["min", "max"])
|
||||
def test_min_max_not_ordered_raises(self, aggregation):
|
||||
# unordered cats have no min/max
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=False)
|
||||
msg = f"Categorical is not ordered for operation {aggregation}"
|
||||
agg_func = getattr(cat, aggregation)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
agg_func()
|
||||
|
||||
ufunc = np.minimum if aggregation == "min" else np.maximum
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ufunc.reduce(cat)
|
||||
|
||||
def test_min_max_ordered(self, index_or_series_or_array):
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
obj = index_or_series_or_array(cat)
|
||||
_min = obj.min()
|
||||
_max = obj.max()
|
||||
assert _min == "a"
|
||||
assert _max == "d"
|
||||
|
||||
assert np.minimum.reduce(obj) == "a"
|
||||
assert np.maximum.reduce(obj) == "d"
|
||||
# TODO: raises if we pass axis=0 (on Index and Categorical, not Series)
|
||||
|
||||
cat = Categorical(
|
||||
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
|
||||
)
|
||||
obj = index_or_series_or_array(cat)
|
||||
_min = obj.min()
|
||||
_max = obj.max()
|
||||
assert _min == "d"
|
||||
assert _max == "a"
|
||||
assert np.minimum.reduce(obj) == "d"
|
||||
assert np.maximum.reduce(obj) == "a"
|
||||
|
||||
def test_min_max_reduce(self):
|
||||
# GH52788
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
df = DataFrame(cat)
|
||||
|
||||
result_max = df.agg("max")
|
||||
expected_max = Series(Categorical(["d"], dtype=cat.dtype))
|
||||
tm.assert_series_equal(result_max, expected_max)
|
||||
|
||||
result_min = df.agg("min")
|
||||
expected_min = Series(Categorical(["a"], dtype=cat.dtype))
|
||||
tm.assert_series_equal(result_min, expected_min)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"categories,expected",
|
||||
[
|
||||
(list("ABC"), np.nan),
|
||||
([1, 2, 3], np.nan),
|
||||
pytest.param(
|
||||
Series(date_range("2020-01-01", periods=3), dtype="category"),
|
||||
NaT,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="https://github.com/pandas-dev/pandas/issues/29962"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("aggregation", ["min", "max"])
|
||||
def test_min_max_ordered_empty(self, categories, expected, aggregation):
|
||||
# GH 30227
|
||||
cat = Categorical([], categories=categories, ordered=True)
|
||||
|
||||
agg_func = getattr(cat, aggregation)
|
||||
result = agg_func()
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories",
|
||||
[(["a", "b", "c", np.nan], list("cba")), ([1, 2, 3, np.nan], [3, 2, 1])],
|
||||
)
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("function", ["min", "max"])
|
||||
def test_min_max_with_nan(self, values, categories, function, skipna):
|
||||
# GH 25303
|
||||
cat = Categorical(values, categories=categories, ordered=True)
|
||||
result = getattr(cat, function)(skipna=skipna)
|
||||
|
||||
if skipna is False:
|
||||
assert result is np.nan
|
||||
else:
|
||||
expected = categories[0] if function == "min" else categories[2]
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("function", ["min", "max"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_only_nan(self, function, skipna):
|
||||
# https://github.com/pandas-dev/pandas/issues/33450
|
||||
cat = Categorical([np.nan], categories=[1, 2], ordered=True)
|
||||
result = getattr(cat, function)(skipna=skipna)
|
||||
assert result is np.nan
|
||||
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_numeric_only_min_max_raises(self, method):
|
||||
# GH 25303
|
||||
cat = Categorical(
|
||||
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
|
||||
)
|
||||
with pytest.raises(TypeError, match=".* got an unexpected keyword"):
|
||||
getattr(cat, method)(numeric_only=True)
|
||||
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_numpy_min_max_raises(self, method):
|
||||
cat = Categorical(["a", "b", "c", "b"], ordered=False)
|
||||
msg = (
|
||||
f"Categorical is not ordered for operation {method}\n"
|
||||
"you can use .as_ordered() to change the Categorical to an ordered one"
|
||||
)
|
||||
method = getattr(np, method)
|
||||
with pytest.raises(TypeError, match=re.escape(msg)):
|
||||
method(cat)
|
||||
|
||||
@pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg):
|
||||
cat = Categorical(["a", "b", "c", "b"], ordered=True)
|
||||
msg = (
|
||||
f"the '{kwarg}' parameter is not supported in the pandas implementation "
|
||||
f"of {method}"
|
||||
)
|
||||
if kwarg == "axis":
|
||||
msg = r"`axis` must be fewer than the number of dimensions \(1\)"
|
||||
kwargs = {kwarg: 42}
|
||||
method = getattr(np, method)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
method(cat, **kwargs)
|
||||
|
||||
@pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")])
|
||||
def test_numpy_min_max_axis_equals_none(self, method, expected):
|
||||
cat = Categorical(["a", "b", "c", "b"], ordered=True)
|
||||
method = getattr(np, method)
|
||||
result = method(cat, axis=None)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values,categories,exp_mode",
|
||||
[
|
||||
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
|
||||
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
|
||||
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
|
||||
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
|
||||
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
],
|
||||
)
|
||||
def test_mode(self, values, categories, exp_mode):
|
||||
cat = Categorical(values, categories=categories, ordered=True)
|
||||
res = Series(cat).mode()._values
|
||||
exp = Categorical(exp_mode, categories=categories, ordered=True)
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
|
||||
def test_searchsorted(self, ordered):
|
||||
# https://github.com/pandas-dev/pandas/issues/8420
|
||||
# https://github.com/pandas-dev/pandas/issues/14522
|
||||
|
||||
cat = Categorical(
|
||||
["cheese", "milk", "apple", "bread", "bread"],
|
||||
categories=["cheese", "milk", "apple", "bread"],
|
||||
ordered=ordered,
|
||||
)
|
||||
ser = Series(cat)
|
||||
|
||||
# Searching for single item argument, side='left' (default)
|
||||
res_cat = cat.searchsorted("apple")
|
||||
assert res_cat == 2
|
||||
assert is_scalar(res_cat)
|
||||
|
||||
res_ser = ser.searchsorted("apple")
|
||||
assert res_ser == 2
|
||||
assert is_scalar(res_ser)
|
||||
|
||||
# Searching for single item array, side='left' (default)
|
||||
res_cat = cat.searchsorted(["bread"])
|
||||
res_ser = ser.searchsorted(["bread"])
|
||||
exp = np.array([3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for several items array, side='right'
|
||||
res_cat = cat.searchsorted(["apple", "bread"], side="right")
|
||||
res_ser = ser.searchsorted(["apple", "bread"], side="right")
|
||||
exp = np.array([3, 5], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for a single value that is not from the Categorical
|
||||
with pytest.raises(TypeError, match="cucumber"):
|
||||
cat.searchsorted("cucumber")
|
||||
with pytest.raises(TypeError, match="cucumber"):
|
||||
ser.searchsorted("cucumber")
|
||||
|
||||
# Searching for multiple values one of each is not from the Categorical
|
||||
msg = (
|
||||
"Cannot setitem on a Categorical with a new category, "
|
||||
"set the categories first"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.searchsorted(["bread", "cucumber"])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.searchsorted(["bread", "cucumber"])
|
||||
|
||||
def test_unique(self, ordered):
|
||||
# GH38140
|
||||
dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered)
|
||||
|
||||
# categories are reordered based on value when ordered=False
|
||||
cat = Categorical(["a", "b", "c"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
tm.assert_categorical_equal(res, cat)
|
||||
|
||||
cat = Categorical(["a", "b", "a", "a"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype))
|
||||
|
||||
cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["c", "a", "b"], dtype=dtype)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
# nan must be removed
|
||||
cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_index_series(self, ordered):
|
||||
# GH38140
|
||||
dtype = CategoricalDtype([3, 2, 1], ordered=ordered)
|
||||
|
||||
c = Categorical([3, 1, 2, 2, 1], dtype=dtype)
|
||||
# Categorical.unique sorts categories by appearance order
|
||||
# if ordered=False
|
||||
exp = Categorical([3, 1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([1, 1, 2, 2], dtype=dtype)
|
||||
exp = Categorical([1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
def test_shift(self):
|
||||
# GH 9416
|
||||
cat = Categorical(["a", "b", "c", "d", "a"])
|
||||
|
||||
# shift forward
|
||||
sp1 = cat.shift(1)
|
||||
xp1 = Categorical([np.nan, "a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(sp1, xp1)
|
||||
tm.assert_categorical_equal(cat[:-1], sp1[1:])
|
||||
|
||||
# shift back
|
||||
sn2 = cat.shift(-2)
|
||||
xp2 = Categorical(
|
||||
["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"]
|
||||
)
|
||||
tm.assert_categorical_equal(sn2, xp2)
|
||||
tm.assert_categorical_equal(cat[2:], sn2[:-2])
|
||||
|
||||
# shift by zero
|
||||
tm.assert_categorical_equal(cat, cat.shift(0))
|
||||
|
||||
def test_nbytes(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
|
||||
assert cat.nbytes == exp
|
||||
|
||||
def test_memory_usage(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
# .categories is an index, so we include the hashtable
|
||||
assert 0 < cat.nbytes <= cat.memory_usage()
|
||||
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
|
||||
|
||||
cat = Categorical(["foo", "foo", "bar"])
|
||||
assert cat.memory_usage(deep=True) > cat.nbytes
|
||||
|
||||
if not PYPY:
|
||||
# sys.getsizeof will call the .memory_usage with
|
||||
# deep=True, and add on some GC overhead
|
||||
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
|
||||
assert abs(diff) < 100
|
||||
|
||||
def test_map(self):
|
||||
c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = c.map(lambda x: x.lower(), na_action=None)
|
||||
exp = Categorical(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False)
|
||||
result = c.map(lambda x: x.lower(), na_action=None)
|
||||
exp = Categorical(list("ababc"), categories=list("abc"), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = c.map(lambda x: 1, na_action=None)
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
|
||||
|
||||
@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_inplace_raises(self, value):
|
||||
cat = Categorical(["A", "B", "B", "C", "A"])
|
||||
msg = (
|
||||
'For argument "inplace" expected type bool, '
|
||||
f"received type {type(value).__name__}"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.sort_values(inplace=value)
|
||||
|
||||
def test_quantile_empty(self):
|
||||
# make sure we have correct itemsize on resulting codes
|
||||
cat = Categorical(["A", "B"])
|
||||
idx = Index([0.0, 0.5])
|
||||
result = cat[:0]._quantile(idx, interpolation="linear")
|
||||
assert result._codes.dtype == np.int8
|
||||
|
||||
expected = cat.take([-1, -1], allow_fill=True)
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,501 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
StringDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.categorical import recode_for_categories
|
||||
|
||||
|
||||
class TestCategoricalAPI:
|
||||
def test_to_list_deprecated(self):
|
||||
# GH#51254
|
||||
cat1 = Categorical(list("acb"), ordered=False)
|
||||
msg = "Categorical.to_list is deprecated and will be removed"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
cat1.to_list()
|
||||
|
||||
def test_ordered_api(self):
|
||||
# GH 9347
|
||||
cat1 = Categorical(list("acb"), ordered=False)
|
||||
tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"]))
|
||||
assert not cat1.ordered
|
||||
|
||||
cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False)
|
||||
tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"]))
|
||||
assert not cat2.ordered
|
||||
|
||||
cat3 = Categorical(list("acb"), ordered=True)
|
||||
tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"]))
|
||||
assert cat3.ordered
|
||||
|
||||
cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True)
|
||||
tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"]))
|
||||
assert cat4.ordered
|
||||
|
||||
def test_set_ordered(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
cat2 = cat.as_unordered()
|
||||
assert not cat2.ordered
|
||||
cat2 = cat.as_ordered()
|
||||
assert cat2.ordered
|
||||
|
||||
assert cat2.set_ordered(True).ordered
|
||||
assert not cat2.set_ordered(False).ordered
|
||||
|
||||
# removed in 0.19.0
|
||||
msg = (
|
||||
"property 'ordered' of 'Categorical' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = True
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = False
|
||||
|
||||
def test_rename_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
|
||||
# inplace=False: the old one must not be changed
|
||||
res = cat.rename_categories([1, 2, 3])
|
||||
tm.assert_numpy_array_equal(
|
||||
res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
)
|
||||
tm.assert_index_equal(res.categories, Index([1, 2, 3]))
|
||||
|
||||
exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
|
||||
|
||||
exp_cat = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(cat.categories, exp_cat)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = cat.rename_categories(lambda x: x.upper())
|
||||
expected = Categorical(["A", "B", "C", "A"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]])
|
||||
def test_rename_categories_wrong_length_raises(self, new_categories):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
msg = (
|
||||
"new categories need to have the same number of items as the "
|
||||
"old categories!"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.rename_categories(new_categories)
|
||||
|
||||
def test_rename_categories_series(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/17981
|
||||
c = Categorical(["a", "b"])
|
||||
result = c.rename_categories(Series([0, 1], index=["a", "b"]))
|
||||
expected = Categorical([0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_rename_categories_dict(self):
|
||||
# GH 17336
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1})
|
||||
expected = Index([4, 3, 2, 1])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts of smaller length
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 1, "c": 3})
|
||||
|
||||
expected = Index([1, "b", 3, "d"])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with bigger length
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6})
|
||||
expected = Index([1, 2, 3, 4])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with no items from old categories
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"f": 1, "g": 3})
|
||||
|
||||
expected = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
def test_reorder_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(
|
||||
["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
res = cat.reorder_categories(["c", "b", "a"])
|
||||
# cat must be the same as before
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
# only res is changed
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"new_categories",
|
||||
[
|
||||
["a"], # not all "old" included in "new"
|
||||
["a", "b", "d"], # still not all "old" in "new"
|
||||
["a", "b", "c", "d"], # all "old" included in "new", but too long
|
||||
],
|
||||
)
|
||||
def test_reorder_categories_raises(self, new_categories):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
msg = "items in new_categories are not the same as in old categories"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.reorder_categories(new_categories)
|
||||
|
||||
def test_add_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(
|
||||
["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
|
||||
res = cat.add_categories("d")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.add_categories(["d"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# GH 9927
|
||||
cat = Categorical(list("abc"), ordered=True)
|
||||
expected = Categorical(list("abc"), categories=list("abcde"), ordered=True)
|
||||
# test with Series, np.array, index, list
|
||||
res = cat.add_categories(Series(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(np.array(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(Index(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(["d", "e"])
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
|
||||
def test_add_categories_existing_raises(self):
|
||||
# new is in old categories
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
msg = re.escape("new categories must not include old categories: {'d'}")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.add_categories(["d"])
|
||||
|
||||
def test_add_categories_losing_dtype_information(self):
|
||||
# GH#48812
|
||||
cat = Categorical(Series([1, 2], dtype="Int64"))
|
||||
ser = Series([4], dtype="Int64")
|
||||
result = cat.add_categories(ser)
|
||||
expected = Categorical(
|
||||
Series([1, 2], dtype="Int64"), categories=Series([1, 2, 4], dtype="Int64")
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
cat = Categorical(Series(["a", "b", "a"], dtype=StringDtype()))
|
||||
ser = Series(["d"], dtype=StringDtype())
|
||||
result = cat.add_categories(ser)
|
||||
expected = Categorical(
|
||||
Series(["a", "b", "a"], dtype=StringDtype()),
|
||||
categories=Series(["a", "b", "d"], dtype=StringDtype()),
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
|
||||
cat = cat.set_categories(["c", "b", "a"])
|
||||
res = cat.set_categories(["a", "b", "c"])
|
||||
# cat must be the same as before
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
# only res is changed
|
||||
exp_categories_back = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(res.categories, exp_categories_back)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_values)
|
||||
|
||||
# not all "old" included in "new" -> all not included ones are now
|
||||
# np.nan
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
res = cat.set_categories(["a"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8))
|
||||
|
||||
# still not all "old" in "new"
|
||||
res = cat.set_categories(["a", "b", "d"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
tm.assert_index_equal(res.categories, Index(["a", "b", "d"]))
|
||||
|
||||
# all "old" included in "new"
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
|
||||
# internals...
|
||||
c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True)
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8))
|
||||
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))
|
||||
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(np.asarray(c), exp)
|
||||
|
||||
# all "pointers" to '4' must be changed from 3 to 0,...
|
||||
c = c.set_categories([4, 3, 2, 1])
|
||||
|
||||
# positions are changed
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8))
|
||||
|
||||
# categories are now in new order
|
||||
tm.assert_index_equal(c.categories, Index([4, 3, 2, 1]))
|
||||
|
||||
# output is the same
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(np.asarray(c), exp)
|
||||
assert c.min() == 4
|
||||
assert c.max() == 1
|
||||
|
||||
# set_categories should set the ordering if specified
|
||||
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2))
|
||||
|
||||
# set_categories should pass thru the ordering
|
||||
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories, new_categories",
|
||||
[
|
||||
# No NaNs, same cats, same order
|
||||
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||||
# Same, unsorted
|
||||
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||||
# NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
# Introduce NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
# No overlap
|
||||
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_categories_many(self, values, categories, new_categories, ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c.set_categories(new_categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_rename_less(self):
|
||||
# GH 24675
|
||||
cat = Categorical(["A", "B"])
|
||||
result = cat.set_categories(["A"], rename=True)
|
||||
expected = Categorical(["A", np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_private(self):
|
||||
cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
cat._set_categories(["a", "c", "d", "e"])
|
||||
expected = Categorical(["a", "c", "d"], categories=list("acde"))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
# fastpath
|
||||
cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
cat._set_categories(["a", "c", "d", "e"], fastpath=True)
|
||||
expected = Categorical(["a", "c", "d"], categories=list("acde"))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
def test_remove_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True)
|
||||
|
||||
res = cat.remove_categories("c")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.remove_categories(["c"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
@pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]])
|
||||
def test_remove_categories_raises(self, removals):
|
||||
cat = Categorical(["a", "b", "a"])
|
||||
message = re.escape("removals must all be in old categories: {'c'}")
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
cat.remove_categories(removals)
|
||||
|
||||
def test_remove_unused_categories(self):
|
||||
c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"])
|
||||
exp_categories_all = Index(["a", "b", "c", "d", "e"])
|
||||
exp_categories_dropped = Index(["a", "b", "c", "d"])
|
||||
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, exp_categories_dropped)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
# with NaN values (GH11599)
|
||||
c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"])
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"])))
|
||||
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(res.codes, exp_codes)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
val = ["F", np.nan, "D", "B", "D", "F", np.nan]
|
||||
cat = Categorical(values=val, categories=list("ABCDEFG"))
|
||||
out = cat.remove_unused_categories()
|
||||
tm.assert_index_equal(out.categories, Index(["B", "D", "F"]))
|
||||
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(out.codes, exp_codes)
|
||||
assert out.tolist() == val
|
||||
|
||||
alpha = list("abcdefghijklmnopqrstuvwxyz")
|
||||
val = np.random.default_rng(2).choice(alpha[::2], 10000).astype("object")
|
||||
val[np.random.default_rng(2).choice(len(val), 100)] = np.nan
|
||||
|
||||
cat = Categorical(values=val, categories=alpha)
|
||||
out = cat.remove_unused_categories()
|
||||
assert out.tolist() == val.tolist()
|
||||
|
||||
|
||||
class TestCategoricalAPIWithFactor:
|
||||
def test_describe(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
# string type
|
||||
desc = factor.describe()
|
||||
assert factor.ordered
|
||||
exp_index = CategoricalIndex(
|
||||
["a", "b", "c"], name="categories", ordered=factor.ordered
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check unused categories
|
||||
cat = factor.copy()
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
desc = cat.describe()
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("abcd"), ordered=factor.ordered, name="categories"
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check an integer one
|
||||
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
|
||||
desc = cat.describe()
|
||||
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories")
|
||||
expected = DataFrame(
|
||||
{"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
# describe should work with NaN
|
||||
cat = Categorical([np.nan, 1, 2, 2])
|
||||
desc = cat.describe()
|
||||
expected = DataFrame(
|
||||
{"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]},
|
||||
index=CategoricalIndex(
|
||||
[1, 2, np.nan], categories=[1, 2], name="categories"
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
|
||||
class TestPrivateCategoricalAPI:
|
||||
def test_codes_immutable(self):
|
||||
# Codes should be read only
|
||||
c = Categorical(["a", "b", "c", "a", np.nan])
|
||||
exp = np.array([0, 1, 2, 0, -1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
# Assignments to codes should raise
|
||||
msg = (
|
||||
"property 'codes' of 'Categorical' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
c.codes = np.array([0, 1, 2, 0, 1], dtype="int8")
|
||||
|
||||
# changes in the codes array should raise
|
||||
codes = c.codes
|
||||
|
||||
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
||||
codes[4] = 1
|
||||
|
||||
# But even after getting the codes, the original array should still be
|
||||
# writeable!
|
||||
c[4] = "a"
|
||||
exp = np.array([0, 1, 2, 0, 0], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
c._codes[4] = 2
|
||||
exp = np.array([0, 1, 2, 0, 2], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"codes, old, new, expected",
|
||||
[
|
||||
([0, 1], ["a", "b"], ["a", "b"], [0, 1]),
|
||||
([0, 1], ["b", "a"], ["b", "a"], [0, 1]),
|
||||
([0, 1], ["a", "b"], ["b", "a"], [1, 0]),
|
||||
([0, 1], ["b", "a"], ["a", "b"], [1, 0]),
|
||||
([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]),
|
||||
([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]),
|
||||
([-1, -1], [], ["a", "b"], [-1, -1]),
|
||||
([1, 0], ["b", "a"], ["a", "b"], [0, 1]),
|
||||
],
|
||||
)
|
||||
def test_recode_to_categories(self, codes, old, new, expected):
|
||||
codes = np.asanyarray(codes, dtype=np.int8)
|
||||
expected = np.asanyarray(expected, dtype=np.int8)
|
||||
old = Index(old)
|
||||
new = Index(new)
|
||||
result = recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_recode_to_categories_large(self):
|
||||
N = 1000
|
||||
codes = np.arange(N)
|
||||
old = Index(codes)
|
||||
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
|
||||
new = Index(expected)
|
||||
result = recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,155 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Interval,
|
||||
NaT,
|
||||
Period,
|
||||
Timestamp,
|
||||
array,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
@pytest.mark.parametrize("cls", [Categorical, CategoricalIndex])
|
||||
@pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), NaT]])
|
||||
def test_astype_nan_to_int(self, cls, values):
|
||||
# GH#28406
|
||||
obj = cls(values)
|
||||
|
||||
msg = "Cannot (cast|convert)"
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
obj.astype(int)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expected",
|
||||
[
|
||||
array(["2019", "2020"], dtype="datetime64[ns, UTC]"),
|
||||
array([0, 0], dtype="timedelta64[ns]"),
|
||||
array([Period("2019"), Period("2020")], dtype="period[Y-DEC]"),
|
||||
array([Interval(0, 1), Interval(1, 2)], dtype="interval"),
|
||||
array([1, np.nan], dtype="Int64"),
|
||||
],
|
||||
)
|
||||
def test_astype_category_to_extension_dtype(self, expected):
|
||||
# GH#28668
|
||||
result = expected.astype("category").astype(expected.dtype)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(
|
||||
"datetime64[ns]",
|
||||
np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"),
|
||||
),
|
||||
(
|
||||
"datetime64[ns, MET]",
|
||||
DatetimeIndex([Timestamp("2015-01-01 00:00:00+0100", tz="MET")]).array,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_astype_to_datetime64(self, dtype, expected):
|
||||
# GH#28448
|
||||
result = Categorical(["2015-01-01"]).astype(dtype)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str_int_categories_to_nullable_int(self):
|
||||
# GH#39616
|
||||
dtype = CategoricalDtype([str(i) for i in range(5)])
|
||||
codes = np.random.default_rng(2).integers(5, size=20)
|
||||
arr = Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
res = arr.astype("Int64")
|
||||
expected = array(codes, dtype="Int64")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
def test_astype_str_int_categories_to_nullable_float(self):
|
||||
# GH#39616
|
||||
dtype = CategoricalDtype([str(i / 2) for i in range(5)])
|
||||
codes = np.random.default_rng(2).integers(5, size=20)
|
||||
arr = Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
res = arr.astype("Float64")
|
||||
expected = array(codes, dtype="Float64") / 2
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype(self, ordered):
|
||||
# string
|
||||
cat = Categorical(list("abbaaccc"), ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
msg = r"Cannot cast object|string dtype to float64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.astype(float)
|
||||
|
||||
# numeric
|
||||
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(int)
|
||||
expected = np.array(cat, dtype="int")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(float)
|
||||
expected = np.array(cat, dtype=float)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("cat_ordered", [True, False])
|
||||
def test_astype_category(self, dtype_ordered, cat_ordered):
|
||||
# GH#10696/GH#18593
|
||||
data = list("abcaacbab")
|
||||
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, dtype=dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = cat.astype("category")
|
||||
expected = cat
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_astype_object_datetime_categories(self):
|
||||
# GH#40754
|
||||
cat = Categorical(to_datetime(["2021-03-27", NaT]))
|
||||
result = cat.astype(object)
|
||||
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_astype_object_timestamp_categories(self):
|
||||
# GH#18024
|
||||
cat = Categorical([Timestamp("2014-01-01")])
|
||||
result = cat.astype(object)
|
||||
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_astype_category_readonly_mask_values(self):
|
||||
# GH#53658
|
||||
arr = array([0, 1, 2], dtype="Int64")
|
||||
arr._mask.flags["WRITEABLE"] = False
|
||||
result = arr.astype("category")
|
||||
expected = array([0, 1, 2], dtype="Int64").astype("category")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,783 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_float_dtype,
|
||||
is_integer_dtype,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalConstructors:
|
||||
def test_fastpath_deprecated(self):
|
||||
codes = np.array([1, 2, 3])
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False)
|
||||
msg = "The 'fastpath' keyword in Categorical is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
Categorical(codes, dtype=dtype, fastpath=True)
|
||||
|
||||
def test_categorical_from_cat_and_dtype_str_preserve_ordered(self):
|
||||
# GH#49309 we should preserve orderedness in `res`
|
||||
cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True)
|
||||
|
||||
res = Categorical(cat, dtype="category")
|
||||
assert res.dtype.ordered
|
||||
|
||||
def test_categorical_disallows_scalar(self):
|
||||
# GH#38433
|
||||
with pytest.raises(TypeError, match="Categorical input must be list-like"):
|
||||
Categorical("A", categories=["A", "B"])
|
||||
|
||||
def test_categorical_1d_only(self):
|
||||
# ndim > 1
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
Categorical(np.array([list("abcd")]))
|
||||
|
||||
def test_validate_ordered(self):
|
||||
# see gh-14058
|
||||
exp_msg = "'ordered' must either be 'True' or 'False'"
|
||||
exp_err = TypeError
|
||||
|
||||
# This should be a boolean.
|
||||
ordered = np.array([0, 1, 2])
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical([1, 2, 3], ordered=ordered)
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical.from_codes(
|
||||
[0, 0, 1], categories=["a", "b", "c"], ordered=ordered
|
||||
)
|
||||
|
||||
def test_constructor_empty(self):
|
||||
# GH 17248
|
||||
c = Categorical([])
|
||||
expected = Index([])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
c = Categorical([], categories=[1, 2, 3])
|
||||
expected = Index([1, 2, 3], dtype=np.int64)
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
def test_constructor_empty_boolean(self):
|
||||
# see gh-22702
|
||||
cat = Categorical([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_constructor_tuples(self):
|
||||
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
|
||||
result = Categorical(values)
|
||||
expected = Index([(1,), (1, 2)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
assert result.ordered is False
|
||||
|
||||
def test_constructor_tuples_datetimes(self):
|
||||
# numpy will auto reshape when all of the tuples are the
|
||||
# same len, so add an extra one with 2 items and slice it off
|
||||
values = np.array(
|
||||
[
|
||||
(Timestamp("2010-01-01"),),
|
||||
(Timestamp("2010-01-02"),),
|
||||
(Timestamp("2010-01-01"),),
|
||||
(Timestamp("2010-01-02"),),
|
||||
("a", "b"),
|
||||
],
|
||||
dtype=object,
|
||||
)[:-1]
|
||||
result = Categorical(values)
|
||||
expected = Index(
|
||||
[(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)],
|
||||
tupleize_cols=False,
|
||||
)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
|
||||
def test_constructor_unsortable(self):
|
||||
# it works!
|
||||
arr = np.array([1, 2, 3, datetime.now()], dtype="O")
|
||||
factor = Categorical(arr, ordered=False)
|
||||
assert not factor.ordered
|
||||
|
||||
# this however will raise as cannot be sorted
|
||||
msg = (
|
||||
"'values' is not ordered, please explicitly specify the "
|
||||
"categories order by passing in a categories argument."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(arr, ordered=True)
|
||||
|
||||
def test_constructor_interval(self):
|
||||
result = Categorical(
|
||||
[Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True
|
||||
)
|
||||
ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
|
||||
exp = Categorical(ii, ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
tm.assert_index_equal(result.categories, ii)
|
||||
|
||||
def test_constructor(self):
|
||||
exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
|
||||
c1 = Categorical(exp_arr)
|
||||
tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["c", "b", "a"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
|
||||
# categories must be unique
|
||||
msg = "Categorical categories must be unique"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([1, 2], [1, 2, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ["a", "b", "b"])
|
||||
|
||||
# The default should be unordered
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
assert not c1.ordered
|
||||
|
||||
# Categorical as input
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
|
||||
tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))
|
||||
|
||||
# Series of dtype category
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# Series
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# This should result in integer categories, not float!
|
||||
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
cat = Categorical([np.nan, 1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# this should result in floats
|
||||
cat = Categorical([np.nan, 1, 2.0, 3])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
cat = Categorical([np.nan, 1.0, 2.0, 3.0])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
# This doesn't work -> this would probably need some kind of "remember
|
||||
# the original type" feature to try to cast the array interface result
|
||||
# to...
|
||||
|
||||
# vals = np.asarray(cat[cat.notna()])
|
||||
# assert is_integer_dtype(vals)
|
||||
|
||||
# corner cases
|
||||
cat = Categorical([1])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
cat = Categorical(["a"])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == "a"
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# two arrays
|
||||
# - when the first is an integer dtype and the second is not
|
||||
# - when the resulting codes are all -1/NaN
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"])
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5])
|
||||
|
||||
# the next one are from the old docs
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])
|
||||
cat = Categorical([1, 2], categories=[1, 2, 3])
|
||||
|
||||
# this is a legitimate constructor
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True)
|
||||
|
||||
def test_constructor_with_existing_categories(self):
|
||||
# GH25318: constructing with pd.Series used to bogusly skip recoding
|
||||
# categories
|
||||
c0 = Categorical(["a", "b", "c", "a"])
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"])
|
||||
|
||||
c2 = Categorical(c0, categories=c1.categories)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c3 = Categorical(Series(c0), categories=c1.categories)
|
||||
tm.assert_categorical_equal(c1, c3)
|
||||
|
||||
def test_constructor_not_sequence(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16022
|
||||
msg = r"^Parameter 'categories' must be list-like, was"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(["a", "b"], categories="a")
|
||||
|
||||
def test_constructor_with_null(self):
|
||||
# Cannot have NaN in categories
|
||||
msg = "Categorical categories cannot be null"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(
|
||||
DatetimeIndex(["nat", "20160101"]),
|
||||
categories=[NaT, Timestamp("20160101")],
|
||||
)
|
||||
|
||||
def test_constructor_with_index(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
tm.assert_categorical_equal(ci.values, Categorical(ci))
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
tm.assert_categorical_equal(
|
||||
ci.values, Categorical(ci.astype(object), categories=ci.categories)
|
||||
)
|
||||
|
||||
def test_constructor_with_generator(self):
|
||||
# This was raising an Error in isna(single_val).any() because isna
|
||||
# returned a scalar for a generator
|
||||
|
||||
exp = Categorical([0, 1, 2])
|
||||
cat = Categorical(x for x in [0, 1, 2])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical(range(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]])
|
||||
|
||||
# check that categories accept generators and sequences
|
||||
cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical([0, 1, 2], categories=range(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
def test_constructor_with_rangeindex(self):
|
||||
# RangeIndex is preserved in Categories
|
||||
rng = Index(range(3))
|
||||
|
||||
cat = Categorical(rng)
|
||||
tm.assert_index_equal(cat.categories, rng, exact=True)
|
||||
|
||||
cat = Categorical([1, 2, 0], categories=rng)
|
||||
tm.assert_index_equal(cat.categories, rng, exact=True)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtl",
|
||||
[
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s"),
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"),
|
||||
timedelta_range("1 day", periods=5, freq="s"),
|
||||
],
|
||||
)
|
||||
def test_constructor_with_datetimelike(self, dtl):
|
||||
# see gh-12077
|
||||
# constructor with a datetimelike and NaT
|
||||
|
||||
s = Series(dtl)
|
||||
c = Categorical(s)
|
||||
|
||||
expected = type(dtl)(s)
|
||||
expected._data.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8"))
|
||||
|
||||
# with NaT
|
||||
s2 = s.copy()
|
||||
s2.iloc[-1] = NaT
|
||||
c = Categorical(s2)
|
||||
|
||||
expected = type(dtl)(s2.dropna())
|
||||
expected._data.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
result = repr(c)
|
||||
assert "NaT" in result
|
||||
|
||||
def test_constructor_from_index_series_datetimetz(self):
|
||||
idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx = idx._with_freq(None) # freq not preserved in result.categories
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_date_objects(self):
|
||||
# we dont cast date objects to timestamps, matching Index constructor
|
||||
v = date.today()
|
||||
|
||||
cat = Categorical([v, v])
|
||||
assert cat.categories.dtype == object
|
||||
assert type(cat.categories[0]) is date
|
||||
|
||||
def test_constructor_from_index_series_timedelta(self):
|
||||
idx = timedelta_range("1 days", freq="D", periods=3)
|
||||
idx = idx._with_freq(None) # freq not preserved in result.categories
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_period(self):
|
||||
idx = period_range("2015-01-01", freq="D", periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
np.array([1.0, 1.2, 1.8, np.nan]),
|
||||
np.array([1, 2, 3], dtype="int64"),
|
||||
["a", "b", "c", np.nan],
|
||||
[pd.Period("2014-01"), pd.Period("2014-02"), NaT],
|
||||
[Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT],
|
||||
[
|
||||
Timestamp("2014-01-01", tz="US/Eastern"),
|
||||
Timestamp("2014-01-02", tz="US/Eastern"),
|
||||
NaT,
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_constructor_invariant(self, values):
|
||||
# GH 14190
|
||||
c = Categorical(values)
|
||||
c2 = Categorical(c)
|
||||
tm.assert_categorical_equal(c, c2)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_constructor_with_dtype(self, ordered):
|
||||
categories = ["b", "a", "c"]
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
result = Categorical(["a", "b", "a", "c"], dtype=dtype)
|
||||
expected = Categorical(
|
||||
["a", "b", "a", "c"], categories=categories, ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert result.ordered is ordered
|
||||
|
||||
def test_constructor_dtype_and_others_raises(self):
|
||||
dtype = CategoricalDtype(["a", "b"], ordered=True)
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], categories=["a", "b"], dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ordered=True, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ordered=False, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]])
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_constructor_str_category(self, categories, ordered):
|
||||
result = Categorical(
|
||||
["a", "b"], categories=categories, ordered=ordered, dtype="category"
|
||||
)
|
||||
expected = Categorical(["a", "b"], categories=categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_str_unknown(self):
|
||||
with pytest.raises(ValueError, match="Unknown dtype"):
|
||||
Categorical([1, 2], dtype="foo")
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
|
||||
def test_constructor_np_strs(self):
|
||||
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
|
||||
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
|
||||
assert all(isinstance(x, np.str_) for x in cat.categories)
|
||||
|
||||
def test_constructor_from_categorical_with_dtype(self):
|
||||
dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
|
||||
values = Categorical(["a", "b", "d"])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use dtype.categories, not values.categories
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_with_unknown_dtype(self):
|
||||
dtype = CategoricalDtype(None, ordered=True)
|
||||
values = Categorical(["a", "b", "d"])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use values.categories, not dtype.categories
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "d"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_string(self):
|
||||
values = Categorical(["a", "b", "d"])
|
||||
# use categories, ordered
|
||||
result = Categorical(
|
||||
values, categories=["a", "b", "c"], ordered=True, dtype="category"
|
||||
)
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# No string
|
||||
result = Categorical(values, categories=["a", "b", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
|
||||
result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list])
|
||||
def test_construction_with_null(self, klass, nulls_fixture):
|
||||
# https://github.com/pandas-dev/pandas/issues/31927
|
||||
values = klass(["a", nulls_fixture, "b"])
|
||||
result = Categorical(values)
|
||||
|
||||
dtype = CategoricalDtype(["a", "b"])
|
||||
codes = [0, -1, 1]
|
||||
expected = Categorical.from_codes(codes=codes, dtype=dtype)
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("validate", [True, False])
|
||||
def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype, validate):
|
||||
# GH#39649
|
||||
cats = pd.array(range(5), dtype=any_numeric_ea_dtype)
|
||||
codes = np.random.default_rng(2).integers(5, size=3)
|
||||
dtype = CategoricalDtype(cats)
|
||||
arr = Categorical.from_codes(codes, dtype=dtype, validate=validate)
|
||||
assert arr.categories.dtype == cats.dtype
|
||||
tm.assert_index_equal(arr.categories, Index(cats))
|
||||
|
||||
def test_from_codes_empty(self):
|
||||
cat = ["a", "b", "c"]
|
||||
result = Categorical.from_codes([], categories=cat)
|
||||
expected = Categorical([], categories=cat)
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("validate", [True, False])
|
||||
def test_from_codes_validate(self, validate):
|
||||
# GH53122
|
||||
dtype = CategoricalDtype(["a", "b"])
|
||||
if validate:
|
||||
with pytest.raises(ValueError, match="codes need to be between "):
|
||||
Categorical.from_codes([4, 5], dtype=dtype, validate=validate)
|
||||
else:
|
||||
# passes, though has incorrect codes, but that's the user responsibility
|
||||
Categorical.from_codes([4, 5], dtype=dtype, validate=validate)
|
||||
|
||||
def test_from_codes_too_few_categories(self):
|
||||
dtype = CategoricalDtype(categories=[1, 2])
|
||||
msg = "codes need to be between "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], dtype=dtype)
|
||||
|
||||
def test_from_codes_non_int_codes(self):
|
||||
dtype = CategoricalDtype(categories=[1, 2])
|
||||
msg = "codes need to be array-like integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], dtype=dtype)
|
||||
|
||||
def test_from_codes_non_unique_categories(self):
|
||||
with pytest.raises(ValueError, match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
|
||||
|
||||
def test_from_codes_nan_cat_included(self):
|
||||
with pytest.raises(ValueError, match="Categorical categories cannot be null"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
|
||||
|
||||
def test_from_codes_too_negative(self):
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
msg = r"codes need to be between -1 and len\(categories\)-1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], dtype=dtype)
|
||||
|
||||
def test_from_codes(self):
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
exp = Categorical(["a", "b", "c"], ordered=False)
|
||||
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
|
||||
def test_from_codes_with_categorical_categories(self, klass):
|
||||
# GH17884
|
||||
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
|
||||
result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
|
||||
def test_from_codes_with_non_unique_categorical_categories(self, klass):
|
||||
with pytest.raises(ValueError, match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1], klass(["a", "b", "a"]))
|
||||
|
||||
def test_from_codes_with_nan_code(self):
|
||||
# GH21767
|
||||
codes = [1, 2, np.nan]
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]])
|
||||
def test_from_codes_with_float(self, codes):
|
||||
# GH21767
|
||||
# float codes should raise even if values are equal to integers
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
|
||||
msg = "codes need to be array-like integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(codes, dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
def test_from_codes_with_dtype_raises(self):
|
||||
msg = "Cannot specify"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(
|
||||
[0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"])
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(
|
||||
[0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"])
|
||||
)
|
||||
|
||||
def test_from_codes_neither(self):
|
||||
msg = "Both were None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([0, 1])
|
||||
|
||||
def test_from_codes_with_nullable_int(self):
|
||||
codes = pd.array([0, 1], dtype="Int64")
|
||||
categories = ["a", "b"]
|
||||
|
||||
result = Categorical.from_codes(codes, categories=categories)
|
||||
expected = Categorical.from_codes(codes.to_numpy(int), categories=categories)
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_codes_with_nullable_int_na_raises(self):
|
||||
codes = pd.array([0, None], dtype="Int64")
|
||||
categories = ["a", "b"]
|
||||
|
||||
msg = "codes cannot contain NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(codes, categories=categories)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "category"])
|
||||
def test_from_inferred_categories(self, dtype):
|
||||
cats = ["a", "b"]
|
||||
codes = np.array([0, 0, 1, 1], dtype="i8")
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes(codes, cats)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "category"])
|
||||
def test_from_inferred_categories_sorts(self, dtype):
|
||||
cats = ["b", "a"]
|
||||
codes = np.array([0, 1, 1, 1], dtype="i8")
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_dtype(self):
|
||||
cats = ["a", "b", "d"]
|
||||
codes = np.array([0, 1, 0, 2], dtype="i8")
|
||||
dtype = CategoricalDtype(["c", "b", "a"], ordered=True)
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical(
|
||||
["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_coerces(self):
|
||||
cats = ["1", "2", "bad"]
|
||||
codes = np.array([0, 0, 1, 2], dtype="i8")
|
||||
dtype = CategoricalDtype([1, 2])
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical([1, 1, 2, np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [None, True, False])
|
||||
def test_construction_with_ordered(self, ordered):
|
||||
# GH 9347, 9190
|
||||
cat = Categorical([0, 1, 2], ordered=ordered)
|
||||
assert cat.ordered == bool(ordered)
|
||||
|
||||
def test_constructor_imaginary(self):
|
||||
values = [1, 2, 3 + 1j]
|
||||
c1 = Categorical(values)
|
||||
tm.assert_index_equal(c1.categories, Index(values))
|
||||
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
|
||||
|
||||
def test_constructor_string_and_tuples(self):
|
||||
# GH 21416
|
||||
c = Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object))
|
||||
expected_index = Index([("a", "b"), ("b", "a"), "c"])
|
||||
assert c.categories.equals(expected_index)
|
||||
|
||||
def test_interval(self):
|
||||
idx = pd.interval_range(0, 10, periods=10)
|
||||
cat = Categorical(idx, categories=idx)
|
||||
expected_codes = np.arange(10, dtype="int8")
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# infer categories
|
||||
cat = Categorical(idx)
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# list values
|
||||
cat = Categorical(list(idx))
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# list values, categories
|
||||
cat = Categorical(list(idx), categories=list(idx))
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# shuffled
|
||||
values = idx.take([1, 2, 0])
|
||||
cat = Categorical(values, categories=idx)
|
||||
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8"))
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# extra
|
||||
values = pd.interval_range(8, 11, periods=3)
|
||||
cat = Categorical(values, categories=idx)
|
||||
expected_codes = np.array([8, 9, -1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# overlapping
|
||||
idx = IntervalIndex([Interval(0, 2), Interval(0, 1)])
|
||||
cat = Categorical(idx, categories=idx)
|
||||
expected_codes = np.array([0, 1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
def test_categorical_extension_array_nullable(self, nulls_fixture):
|
||||
# GH:
|
||||
arr = pd.arrays.StringArray._from_sequence(
|
||||
[nulls_fixture] * 2, dtype=pd.StringDtype()
|
||||
)
|
||||
result = Categorical(arr)
|
||||
assert arr.dtype == result.categories.dtype
|
||||
expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_sequence_copy(self):
|
||||
cat = Categorical(np.arange(5).repeat(2))
|
||||
result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False)
|
||||
|
||||
# more generally, we'd be OK with a view
|
||||
assert result._codes is cat._codes
|
||||
|
||||
result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True)
|
||||
|
||||
assert not tm.shares_memory(result, cat)
|
||||
|
||||
def test_constructor_datetime64_non_nano(self):
|
||||
categories = np.arange(10).view("M8[D]")
|
||||
values = categories[::2].copy()
|
||||
|
||||
cat = Categorical(values, categories=categories)
|
||||
assert (cat == values).all()
|
||||
|
||||
def test_constructor_preserves_freq(self):
|
||||
# GH33830 freq retention in categorical
|
||||
dti = date_range("2016-01-01", periods=5)
|
||||
|
||||
expected = dti.freq
|
||||
|
||||
cat = Categorical(dti)
|
||||
result = cat.categories.freq
|
||||
|
||||
assert expected == result
|
@ -0,0 +1,139 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalDtypes:
|
||||
def test_categories_match_up_to_permutation(self):
|
||||
# test dtype comparisons between cats
|
||||
|
||||
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
|
||||
c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
|
||||
c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
|
||||
assert c1._categories_match_up_to_permutation(c1)
|
||||
assert c2._categories_match_up_to_permutation(c2)
|
||||
assert c3._categories_match_up_to_permutation(c3)
|
||||
assert c1._categories_match_up_to_permutation(c2)
|
||||
assert not c1._categories_match_up_to_permutation(c3)
|
||||
assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
|
||||
assert not c1._categories_match_up_to_permutation(c1.astype(object))
|
||||
assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
|
||||
assert c1._categories_match_up_to_permutation(
|
||||
CategoricalIndex(c1, categories=list("cab"))
|
||||
)
|
||||
assert not c1._categories_match_up_to_permutation(
|
||||
CategoricalIndex(c1, ordered=True)
|
||||
)
|
||||
|
||||
# GH 16659
|
||||
s1 = Series(c1)
|
||||
s2 = Series(c2)
|
||||
s3 = Series(c3)
|
||||
assert c1._categories_match_up_to_permutation(s1)
|
||||
assert c2._categories_match_up_to_permutation(s2)
|
||||
assert c3._categories_match_up_to_permutation(s3)
|
||||
assert c1._categories_match_up_to_permutation(s2)
|
||||
assert not c1._categories_match_up_to_permutation(s3)
|
||||
assert not c1._categories_match_up_to_permutation(s1.astype(object))
|
||||
|
||||
def test_set_dtype_same(self):
|
||||
c = Categorical(["a", "b", "c"])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, c)
|
||||
|
||||
def test_set_dtype_new_categories(self):
|
||||
c = Categorical(["a", "b", "c"])
|
||||
result = c._set_dtype(CategoricalDtype(list("abcd")))
|
||||
tm.assert_numpy_array_equal(result.codes, c.codes)
|
||||
tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories, new_categories",
|
||||
[
|
||||
# No NaNs, same cats, same order
|
||||
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||||
# Same, unsorted
|
||||
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||||
# NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
# Introduce NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
# No overlap
|
||||
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_dtype_many(self, values, categories, new_categories, ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c._set_dtype(expected.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_dtype_no_overlap(self):
|
||||
c = Categorical(["a", "b", "c"], ["d", "e"])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "b"]))
|
||||
expected = Categorical([None, None, None], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_codes_dtypes(self):
|
||||
# GH 8453
|
||||
result = Categorical(["foo", "bar", "baz"])
|
||||
assert result.codes.dtype == "int8"
|
||||
|
||||
result = Categorical([f"foo{i:05d}" for i in range(400)])
|
||||
assert result.codes.dtype == "int16"
|
||||
|
||||
result = Categorical([f"foo{i:05d}" for i in range(40000)])
|
||||
assert result.codes.dtype == "int32"
|
||||
|
||||
# adding cats
|
||||
result = Categorical(["foo", "bar", "baz"])
|
||||
assert result.codes.dtype == "int8"
|
||||
result = result.add_categories([f"foo{i:05d}" for i in range(400)])
|
||||
assert result.codes.dtype == "int16"
|
||||
|
||||
# removing cats
|
||||
result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
|
||||
assert result.codes.dtype == "int8"
|
||||
|
||||
def test_iter_python_types(self):
|
||||
# GH-19909
|
||||
cat = Categorical([1, 2])
|
||||
assert isinstance(next(iter(cat)), int)
|
||||
assert isinstance(cat.tolist()[0], int)
|
||||
|
||||
def test_iter_python_types_datetime(self):
|
||||
cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")])
|
||||
assert isinstance(next(iter(cat)), Timestamp)
|
||||
assert isinstance(cat.tolist()[0], Timestamp)
|
||||
|
||||
def test_interval_index_category(self):
|
||||
# GH 38316
|
||||
index = IntervalIndex.from_breaks(np.arange(3, dtype="uint64"))
|
||||
|
||||
result = CategoricalIndex(index).dtype.categories
|
||||
expected = IntervalIndex.from_arrays(
|
||||
[0, 1], [1, 2], dtype="interval[uint64, right]"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,388 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
class TestCategoricalIndexingWithFactor:
|
||||
def test_getitem(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
assert factor[0] == "a"
|
||||
assert factor[-1] == "c"
|
||||
|
||||
subf = factor[[0, 1, 2]]
|
||||
tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8))
|
||||
|
||||
subf = factor[np.asarray(factor) == "c"]
|
||||
tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8))
|
||||
|
||||
def test_setitem(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
# int/positional
|
||||
c = factor.copy()
|
||||
c[0] = "b"
|
||||
assert c[0] == "b"
|
||||
c[-1] = "a"
|
||||
assert c[-1] == "a"
|
||||
|
||||
# boolean
|
||||
c = factor.copy()
|
||||
indexer = np.zeros(len(c), dtype="bool")
|
||||
indexer[0] = True
|
||||
indexer[-1] = True
|
||||
c[indexer] = "c"
|
||||
expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
|
||||
tm.assert_categorical_equal(c, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])],
|
||||
)
|
||||
def test_setitem_same_but_unordered(self, other):
|
||||
# GH-24142
|
||||
target = Categorical(["a", "b"], categories=["a", "b"])
|
||||
mask = np.array([True, False])
|
||||
target[mask] = other[mask]
|
||||
expected = Categorical(["b", "b"], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(target, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
Categorical(["b", "a"], categories=["b", "a", "c"]),
|
||||
Categorical(["b", "a"], categories=["a", "b", "c"]),
|
||||
Categorical(["a", "a"], categories=["a"]),
|
||||
Categorical(["b", "b"], categories=["b"]),
|
||||
],
|
||||
)
|
||||
def test_setitem_different_unordered_raises(self, other):
|
||||
# GH-24142
|
||||
target = Categorical(["a", "b"], categories=["a", "b"])
|
||||
mask = np.array([True, False])
|
||||
msg = "Cannot set a Categorical with another, without identical categories"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
target[mask] = other[mask]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
Categorical(["b", "a"]),
|
||||
Categorical(["b", "a"], categories=["b", "a"], ordered=True),
|
||||
Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True),
|
||||
],
|
||||
)
|
||||
def test_setitem_same_ordered_raises(self, other):
|
||||
# Gh-24142
|
||||
target = Categorical(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
mask = np.array([True, False])
|
||||
msg = "Cannot set a Categorical with another, without identical categories"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
target[mask] = other[mask]
|
||||
|
||||
def test_setitem_tuple(self):
|
||||
# GH#20439
|
||||
cat = Categorical([(0, 1), (0, 2), (0, 1)])
|
||||
|
||||
# This should not raise
|
||||
cat[1] = cat[0]
|
||||
assert cat[1] == (0, 1)
|
||||
|
||||
def test_setitem_listlike(self):
|
||||
# GH#9469
|
||||
# properly coerce the input indexers
|
||||
|
||||
cat = Categorical(
|
||||
np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8)
|
||||
).add_categories([-1000])
|
||||
indexer = np.array([100000]).astype(np.int64)
|
||||
cat[indexer] = -1000
|
||||
|
||||
# we are asserting the code result here
|
||||
# which maps to the -1000 category
|
||||
result = cat.codes[np.array([100000]).astype(np.int64)]
|
||||
tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))
|
||||
|
||||
|
||||
class TestCategoricalIndexing:
|
||||
def test_getitem_slice(self):
|
||||
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
|
||||
sliced = cat[3]
|
||||
assert sliced == "d"
|
||||
|
||||
sliced = cat[3:5]
|
||||
expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(sliced, expected)
|
||||
|
||||
def test_getitem_listlike(self):
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
|
||||
c = Categorical(
|
||||
np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8)
|
||||
)
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
expected = c[np.array([100000]).astype(np.int64)].codes
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
idx1 = PeriodIndex(
|
||||
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"],
|
||||
freq="M",
|
||||
)
|
||||
|
||||
cat1 = Categorical(idx1)
|
||||
str(cat1)
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
|
||||
tm.assert_index_equal(cat1.categories, exp_idx)
|
||||
|
||||
idx2 = PeriodIndex(
|
||||
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"],
|
||||
freq="M",
|
||||
)
|
||||
cat2 = Categorical(idx2, ordered=True)
|
||||
str(cat2)
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
|
||||
exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
|
||||
tm.assert_index_equal(cat2.categories, exp_idx2)
|
||||
|
||||
idx3 = PeriodIndex(
|
||||
[
|
||||
"2013-12",
|
||||
"2013-11",
|
||||
"2013-10",
|
||||
"2013-09",
|
||||
"2013-08",
|
||||
"2013-07",
|
||||
"2013-05",
|
||||
],
|
||||
freq="M",
|
||||
)
|
||||
cat3 = Categorical(idx3, ordered=True)
|
||||
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(
|
||||
[
|
||||
"2013-05",
|
||||
"2013-07",
|
||||
"2013-08",
|
||||
"2013-09",
|
||||
"2013-10",
|
||||
"2013-11",
|
||||
"2013-12",
|
||||
],
|
||||
freq="M",
|
||||
)
|
||||
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
|
||||
tm.assert_index_equal(cat3.categories, exp_idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"null_val",
|
||||
[None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"],
|
||||
)
|
||||
def test_periodindex_on_null_types(self, null_val):
|
||||
# GH 46673
|
||||
result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D")
|
||||
expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]")
|
||||
assert result[2] is NaT
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]])
|
||||
def test_categories_assignments_wrong_length_raises(self, new_categories):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
msg = (
|
||||
"new categories need to have the same number of items "
|
||||
"as the old categories!"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.rename_categories(new_categories)
|
||||
|
||||
# Combinations of sorted/unique:
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]]
|
||||
)
|
||||
# Combinations of missing/unique
|
||||
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
|
||||
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
|
||||
@pytest.mark.parametrize("dtype", [None, "category", "key"])
|
||||
def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype):
|
||||
# GH 21448
|
||||
key = key_class(key_values, categories=range(1, 5))
|
||||
|
||||
if dtype == "key":
|
||||
dtype = key.dtype
|
||||
|
||||
# Test for flat index and CategoricalIndex with same/different cats:
|
||||
idx = Index(idx_values, dtype=dtype)
|
||||
expected, exp_miss = idx.get_indexer_non_unique(key_values)
|
||||
result, res_miss = idx.get_indexer_non_unique(key)
|
||||
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
tm.assert_numpy_array_equal(exp_miss, res_miss)
|
||||
|
||||
exp_unique = idx.unique().get_indexer(key_values)
|
||||
res_unique = idx.unique().get_indexer(key)
|
||||
tm.assert_numpy_array_equal(res_unique, exp_unique)
|
||||
|
||||
def test_where_unobserved_nan(self):
|
||||
ser = Series(Categorical(["a", "b"]))
|
||||
result = ser.where([True, False])
|
||||
expected = Series(Categorical(["a", None], categories=["a", "b"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# all NA
|
||||
ser = Series(Categorical(["a", "b"]))
|
||||
result = ser.where([False, False])
|
||||
expected = Series(Categorical([None, None], categories=["a", "b"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_unobserved_categories(self):
|
||||
ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
|
||||
result = ser.where([True, True, False], other="b")
|
||||
expected = Series(Categorical(["a", "b", "b"], categories=ser.cat.categories))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_other_categorical(self):
|
||||
ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
|
||||
other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"])
|
||||
result = ser.where([True, False, True], other)
|
||||
expected = Series(Categorical(["a", "c", "c"], dtype=ser.dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_new_category_raises(self):
|
||||
ser = Series(Categorical(["a", "b", "c"]))
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.where([True, False, True], "d")
|
||||
|
||||
def test_where_ordered_differs_rasies(self):
|
||||
ser = Series(
|
||||
Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True)
|
||||
)
|
||||
other = Categorical(
|
||||
["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
|
||||
)
|
||||
with pytest.raises(TypeError, match="without identical categories"):
|
||||
ser.where([True, False, True], other)
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains(self):
|
||||
# GH#21508
|
||||
cat = Categorical(list("aabbca"), categories=list("cab"))
|
||||
|
||||
assert "b" in cat
|
||||
assert "z" not in cat
|
||||
assert np.nan not in cat
|
||||
with pytest.raises(TypeError, match="unhashable type: 'list'"):
|
||||
assert [1] in cat
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in cat
|
||||
assert 1 not in cat
|
||||
|
||||
cat = Categorical(list("aabbca") + [np.nan], categories=list("cab"))
|
||||
assert np.nan in cat
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(Timestamp(1), False),
|
||||
(Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH#23705
|
||||
cat = Categorical(IntervalIndex.from_breaks(range(3)))
|
||||
result = item in cat
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
assert "a" not in cat
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in cat
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in cat
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean(index):
|
||||
ser = Series(range(3))
|
||||
idx = Categorical([True, False, True])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
assert com.is_bool_indexer(idx)
|
||||
result = ser[idx]
|
||||
expected = ser[idx.astype("object")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean_na_treated_as_false(index):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
ser = Series(range(3))
|
||||
idx = Categorical([True, False, None])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
result = ser[idx]
|
||||
expected = ser[idx.fillna(False)]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def non_coercible_categorical(monkeypatch):
|
||||
"""
|
||||
Monkeypatch Categorical.__array__ to ensure no implicit conversion.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When Categorical.__array__ is called.
|
||||
"""
|
||||
|
||||
# TODO(Categorical): identify other places where this may be
|
||||
# useful and move to a conftest.py
|
||||
def array(self, dtype=None):
|
||||
raise ValueError("I cannot be converted.")
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(Categorical, "__array__", array)
|
||||
yield
|
||||
|
||||
|
||||
def test_series_at():
|
||||
arr = Categorical(["a", "b", "c"])
|
||||
ser = Series(arr)
|
||||
result = ser.at[0]
|
||||
assert result == "a"
|
@ -0,0 +1,154 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "ignore"])
|
||||
def na_action(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories",
|
||||
[
|
||||
(list("abcbca"), list("cab")),
|
||||
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
||||
],
|
||||
ids=["string", "interval"],
|
||||
)
|
||||
def test_map_str(data, categories, ordered, na_action):
|
||||
# GH 31202 - override base class since we want to maintain categorical/ordered
|
||||
cat = Categorical(data, categories=categories, ordered=ordered)
|
||||
result = cat.map(str, na_action=na_action)
|
||||
expected = Categorical(
|
||||
map(str, data), categories=map(str, categories), ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_map(na_action):
|
||||
cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = cat.map(lambda x: x.lower(), na_action=na_action)
|
||||
exp = Categorical(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
result = cat.map(lambda x: x.lower(), na_action=na_action)
|
||||
exp = Categorical(list("ababc"), categories=list("bac"), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
# GH 12766: Return an index not an array
|
||||
result = cat.map(lambda x: 1, na_action=na_action)
|
||||
exp = Index(np.array([1] * 5, dtype=np.int64))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# change categories dtype
|
||||
cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
|
||||
def f(x):
|
||||
return {"A": 10, "B": 20, "C": 30}.get(x)
|
||||
|
||||
result = cat.map(f, na_action=na_action)
|
||||
exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
mapper = Series([10, 20, 30], index=["A", "B", "C"])
|
||||
result = cat.map(mapper, na_action=na_action)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
Categorical([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False] * 3),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_none(data, f, expected): # GH 24241
|
||||
values = Categorical(data)
|
||||
result = values.map(f, na_action=None)
|
||||
if isinstance(expected, Categorical):
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
|
||||
([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
Categorical([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_ignore(data, f, expected): # GH 24241
|
||||
values = Categorical(data)
|
||||
result = values.map(f, na_action="ignore")
|
||||
if data[1] == 1:
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_with_dict_or_series(na_action):
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cat = Categorical(orig_values)
|
||||
|
||||
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
||||
result = cat.map(mapper, na_action=na_action)
|
||||
|
||||
# Order of categories in result can be different
|
||||
expected = Categorical(new_values, categories=[3.0, 2, "one"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
|
||||
result = cat.map(mapper, na_action=na_action)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_na_action_no_default_deprecated():
|
||||
# GH51645
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
msg = (
|
||||
"The default value of 'ignore' for the `na_action` parameter in "
|
||||
"pandas.Categorical.map is deprecated and will be "
|
||||
"changed to 'None' in a future version. Please set na_action to the "
|
||||
"desired value to avoid seeing this warning"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
cat.map(lambda x: x)
|
@ -0,0 +1,216 @@
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalMissing:
|
||||
def test_isna(self):
|
||||
exp = np.array([False, False, True])
|
||||
cat = Categorical(["a", "b", np.nan])
|
||||
res = cat.isna()
|
||||
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_na_flags_int_categories(self):
|
||||
# #1457
|
||||
|
||||
categories = list(range(10))
|
||||
labels = np.random.default_rng(2).integers(0, 10, 20)
|
||||
labels[::5] = -1
|
||||
|
||||
cat = Categorical(labels, categories)
|
||||
repr(cat)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(cat), labels == -1)
|
||||
|
||||
def test_nan_handling(self):
|
||||
# Nans are represented as -1 in codes
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
c[1] = np.nan
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))
|
||||
|
||||
# Adding nan to categories should make assigned nan point to the
|
||||
# category!
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
|
||||
def test_set_dtype_nans(self):
|
||||
c = Categorical(["a", "b", np.nan])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "c"]))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
|
||||
|
||||
def test_set_item_nan(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
cat[1] = np.nan
|
||||
|
||||
exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fillna_kwargs, msg",
|
||||
[
|
||||
(
|
||||
{"value": 1, "method": "ffill"},
|
||||
"Cannot specify both 'value' and 'method'.",
|
||||
),
|
||||
({}, "Must specify a fill 'value' or 'method'."),
|
||||
({"method": "bad"}, "Invalid fill method. Expecting .* bad"),
|
||||
(
|
||||
{"value": Series([1, 2, 3, 4, "a"])},
|
||||
"Cannot setitem on a Categorical with a new category",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_fillna_raises(self, fillna_kwargs, msg):
|
||||
# https://github.com/pandas-dev/pandas/issues/19682
|
||||
# https://github.com/pandas-dev/pandas/issues/13628
|
||||
cat = Categorical([1, 2, 3, None, None])
|
||||
|
||||
if len(fillna_kwargs) == 1 and "value" in fillna_kwargs:
|
||||
err = TypeError
|
||||
else:
|
||||
err = ValueError
|
||||
|
||||
with pytest.raises(err, match=msg):
|
||||
cat.fillna(**fillna_kwargs)
|
||||
|
||||
@pytest.mark.parametrize("named", [True, False])
|
||||
def test_fillna_iterable_category(self, named):
|
||||
# https://github.com/pandas-dev/pandas/issues/21097
|
||||
if named:
|
||||
Point = collections.namedtuple("Point", "x y")
|
||||
else:
|
||||
Point = lambda *args: args # tuple
|
||||
cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object))
|
||||
result = cat.fillna(Point(0, 0))
|
||||
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# Case where the Point is not among our categories; we want ValueError,
|
||||
# not NotImplementedError GH#41914
|
||||
cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object))
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(Point(0, 0))
|
||||
|
||||
def test_fillna_array(self):
|
||||
# accept Categorical or ndarray value if it holds appropriate values
|
||||
cat = Categorical(["A", "B", "C", None, None])
|
||||
|
||||
other = cat.fillna("C")
|
||||
result = cat.fillna(other)
|
||||
tm.assert_categorical_equal(result, other)
|
||||
assert isna(cat[-1]) # didn't modify original inplace
|
||||
|
||||
other = np.array(["A", "B", "C", "B", "A"])
|
||||
result = cat.fillna(other)
|
||||
expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert isna(cat[-1]) # didn't modify original inplace
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected",
|
||||
[
|
||||
([1, 2, 3], np.array([False, False, False])),
|
||||
([1, 2, np.nan], np.array([False, False, True])),
|
||||
([1, 2, np.inf], np.array([False, False, True])),
|
||||
([1, 2, pd.NA], np.array([False, False, True])),
|
||||
],
|
||||
)
|
||||
def test_use_inf_as_na(self, values, expected):
|
||||
# https://github.com/pandas-dev/pandas/issues/33594
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("mode.use_inf_as_na", True):
|
||||
cat = Categorical(values)
|
||||
result = cat.isna()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = Series(cat).isna()
|
||||
expected = Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = DataFrame(cat).isna()
|
||||
expected = DataFrame(expected)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected",
|
||||
[
|
||||
([1, 2, 3], np.array([False, False, False])),
|
||||
([1, 2, np.nan], np.array([False, False, True])),
|
||||
([1, 2, np.inf], np.array([False, False, True])),
|
||||
([1, 2, pd.NA], np.array([False, False, True])),
|
||||
],
|
||||
)
|
||||
def test_use_inf_as_na_outside_context(self, values, expected):
|
||||
# https://github.com/pandas-dev/pandas/issues/33594
|
||||
# Using isna directly for Categorical will fail in general here
|
||||
cat = Categorical(values)
|
||||
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("mode.use_inf_as_na", True):
|
||||
result = isna(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = isna(Series(cat))
|
||||
expected = Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = isna(DataFrame(cat))
|
||||
expected = DataFrame(expected)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a1, a2, categories",
|
||||
[
|
||||
(["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]),
|
||||
([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]),
|
||||
],
|
||||
)
|
||||
def test_compare_categorical_with_missing(self, a1, a2, categories):
|
||||
# GH 28384
|
||||
cat_type = CategoricalDtype(categories)
|
||||
|
||||
# !=
|
||||
result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type)
|
||||
expected = Series(a1) != Series(a2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ==
|
||||
result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)
|
||||
expected = Series(a1) == Series(a2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_value, dtype",
|
||||
[
|
||||
(pd.NaT, "datetime64[ns]"),
|
||||
(None, "float64"),
|
||||
(np.nan, "float64"),
|
||||
(pd.NA, "float64"),
|
||||
],
|
||||
)
|
||||
def test_categorical_only_missing_values_no_cast(self, na_value, dtype):
|
||||
# GH#44900
|
||||
result = Categorical([na_value, na_value])
|
||||
tm.assert_index_equal(result.categories, Index([], dtype=dtype))
|
@ -0,0 +1,414 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalOpsWithFactor:
|
||||
def test_categories_none_comparisons(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(factor, factor)
|
||||
|
||||
def test_comparisons(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
result = factor[factor == "a"]
|
||||
expected = factor[np.asarray(factor) == "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor != "a"]
|
||||
expected = factor[np.asarray(factor) != "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor < "c"]
|
||||
expected = factor[np.asarray(factor) < "c"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor > "a"]
|
||||
expected = factor[np.asarray(factor) > "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor >= "b"]
|
||||
expected = factor[np.asarray(factor) >= "b"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor <= "b"]
|
||||
expected = factor[np.asarray(factor) <= "b"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
n = len(factor)
|
||||
|
||||
other = factor[np.random.default_rng(2).permutation(n)]
|
||||
result = factor == other
|
||||
expected = np.asarray(factor) == np.asarray(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = factor == "d"
|
||||
expected = np.zeros(len(factor), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# comparisons with categoricals
|
||||
cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
|
||||
cat_rev_base = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
cat = Categorical(["a", "b", "c"], ordered=True)
|
||||
cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = np.array([True, False, False])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > cat_rev
|
||||
|
||||
cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"])
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > cat_rev_base2
|
||||
|
||||
# Only categories with same ordering information can be compared
|
||||
cat_unordered = cat.set_ordered(False)
|
||||
assert not (cat > cat).any()
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > cat_unordered
|
||||
|
||||
# comparison (in both directions) with Series will raise
|
||||
s = Series(["b", "b", "b"], dtype=object)
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __gt__ with type "
|
||||
r"<class 'numpy\.ndarray'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat_rev
|
||||
|
||||
# comparison with numpy.array will raise in both direction, but only on
|
||||
# newer numpy versions
|
||||
a = np.array(["b", "b", "b"], dtype=object)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > a
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > a
|
||||
|
||||
# Make sure that unequal comparison take the categories order in
|
||||
# account
|
||||
cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True)
|
||||
exp = np.array([True, False, False])
|
||||
res = cat_rev > "b"
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# check that zero-dim array gets unboxed
|
||||
res = cat_rev > np.array("b")
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
|
||||
class TestCategoricalOps:
|
||||
@pytest.mark.parametrize(
|
||||
"categories",
|
||||
[["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]],
|
||||
)
|
||||
def test_not_equal_with_na(self, categories):
|
||||
# https://github.com/pandas-dev/pandas/issues/32276
|
||||
c1 = Categorical.from_codes([-1, 0], categories=categories)
|
||||
c2 = Categorical.from_codes([0, 1], categories=categories)
|
||||
|
||||
result = c1 != c2
|
||||
|
||||
assert result.all()
|
||||
|
||||
def test_compare_frame(self):
|
||||
# GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame
|
||||
data = ["a", "b", 2, "a"]
|
||||
cat = Categorical(data)
|
||||
|
||||
df = DataFrame(cat)
|
||||
|
||||
result = cat == df.T
|
||||
expected = DataFrame([[True, True, True, True]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = cat[::-1] != df.T
|
||||
expected = DataFrame([[False, True, True, False]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_compare_frame_raises(self, comparison_op):
|
||||
# alignment raises unless we transpose
|
||||
op = comparison_op
|
||||
cat = Categorical(["a", "b", 2, "a"])
|
||||
df = DataFrame(cat)
|
||||
msg = "Unable to coerce to Series, length must be 1: given 4"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
op(cat, df)
|
||||
|
||||
def test_datetime_categorical_comparison(self):
|
||||
dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True)
|
||||
tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True]))
|
||||
|
||||
def test_reflected_comparison_with_scalars(self):
|
||||
# GH8658
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True]))
|
||||
|
||||
def test_comparison_with_unknown_scalars(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
|
||||
msg = "Invalid comparison between dtype=category and int"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat < 4
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > 4
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
4 < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
4 > cat
|
||||
|
||||
tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False]))
|
||||
tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True]))
|
||||
|
||||
def test_comparison_with_tuple(self):
|
||||
cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object))
|
||||
|
||||
result = cat == "foo"
|
||||
expected = np.array([True, False, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat == (0, 1)
|
||||
expected = np.array([False, True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat != (0, 1)
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
||||
def test_comparison_of_ordered_categorical_with_nan_to_scalar(
|
||||
self, compare_operators_no_eq_ne
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/26504
|
||||
# BUG: fix ordered categorical comparison with missing values (#26504 )
|
||||
# and following comparisons with scalars in categories with missing
|
||||
# values should be evaluated as False
|
||||
|
||||
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
|
||||
scalar = 2
|
||||
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar)
|
||||
actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
||||
def test_comparison_of_ordered_categorical_with_nan_to_listlike(
|
||||
self, compare_operators_no_eq_ne
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/26504
|
||||
# and following comparisons of missing values in ordered Categorical
|
||||
# with listlike should be evaluated as False
|
||||
|
||||
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
|
||||
other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
|
||||
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
|
||||
actual = getattr(cat, compare_operators_no_eq_ne)(other)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,reverse,base",
|
||||
[(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])],
|
||||
)
|
||||
def test_comparisons(self, data, reverse, base):
|
||||
cat_rev = Series(Categorical(data, categories=reverse, ordered=True))
|
||||
cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True))
|
||||
cat = Series(Categorical(data, ordered=True))
|
||||
cat_base = Series(
|
||||
Categorical(base, categories=cat.cat.categories, ordered=True)
|
||||
)
|
||||
s = Series(base, dtype=object if base == list("bbb") else None)
|
||||
a = np.array(base)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = Series([True, False, False])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = Series([False, False, True])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = Series([False, False, True])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
scalar = base[1]
|
||||
res = cat > scalar
|
||||
exp = Series([False, False, True])
|
||||
exp2 = cat.values > scalar
|
||||
tm.assert_series_equal(res, exp)
|
||||
tm.assert_numpy_array_equal(res.values, exp2)
|
||||
res_rev = cat_rev > scalar
|
||||
exp_rev = Series([True, False, False])
|
||||
exp_rev2 = cat_rev.values > scalar
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > cat_rev
|
||||
|
||||
# categorical cannot be compared to Series or numpy array, and also
|
||||
# not the other way around
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __gt__ with type "
|
||||
r"<class 'numpy\.ndarray'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > a
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > a
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat_rev
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a < cat_rev
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ctor",
|
||||
[
|
||||
lambda *args, **kwargs: Categorical(*args, **kwargs),
|
||||
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
|
||||
],
|
||||
)
|
||||
def test_unordered_different_order_equal(self, ctor):
|
||||
# https://github.com/pandas-dev/pandas/issues/16014
|
||||
c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 == c2).all()
|
||||
|
||||
c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
|
||||
result = c1 == c2
|
||||
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
|
||||
|
||||
def test_unordered_different_categories_raises(self):
|
||||
c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False)
|
||||
|
||||
with pytest.raises(TypeError, match=("Categoricals can only be compared")):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_different_lengths(self):
|
||||
c1 = Categorical([], categories=["a", "b"])
|
||||
c2 = Categorical([], categories=["a"])
|
||||
|
||||
msg = "Categoricals can only be compared if 'categories' are the same."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_unordered_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
|
||||
# 349290078
|
||||
a = Categorical(["a"], categories=["a", "b"])
|
||||
b = Categorical(["b"], categories=["b", "a"])
|
||||
assert not a.equals(b)
|
||||
|
||||
def test_numeric_like_ops(self):
|
||||
df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)})
|
||||
labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=["value"], ascending=True)
|
||||
df["value_group"] = pd.cut(
|
||||
df.value, range(0, 10500, 500), right=False, labels=cat_labels
|
||||
)
|
||||
|
||||
# numeric ops should not succeed
|
||||
for op, str_rep in [
|
||||
("__add__", r"\+"),
|
||||
("__sub__", "-"),
|
||||
("__mul__", r"\*"),
|
||||
("__truediv__", "/"),
|
||||
]:
|
||||
msg = f"Series cannot perform the operation {str_rep}|unsupported operand"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(df, op)(df)
|
||||
|
||||
# reduction ops should not succeed (unless specifically defined, e.g.
|
||||
# min/max)
|
||||
s = df["value_group"]
|
||||
for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]:
|
||||
msg = f"does not support reduction '{op}'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(s, op)(numeric_only=False)
|
||||
|
||||
def test_numeric_like_ops_series(self):
|
||||
# numpy ops
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
with pytest.raises(TypeError, match="does not support reduction 'sum'"):
|
||||
np.sum(s)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, str_rep",
|
||||
[
|
||||
("__add__", r"\+"),
|
||||
("__sub__", "-"),
|
||||
("__mul__", r"\*"),
|
||||
("__truediv__", "/"),
|
||||
],
|
||||
)
|
||||
def test_numeric_like_ops_series_arith(self, op, str_rep):
|
||||
# numeric ops on a Series
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
msg = f"Series cannot perform the operation {str_rep}|unsupported operand"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(s, op)(2)
|
||||
|
||||
def test_numeric_like_ops_series_invalid(self):
|
||||
# invalid ufunc
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
msg = "Object with dtype category cannot perform the numpy op log"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.log(s)
|
@ -0,0 +1,111 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_replace,value,expected,flip_categories",
|
||||
[
|
||||
# one-to-one
|
||||
(1, 2, [2, 2, 3], False),
|
||||
(1, 4, [4, 2, 3], False),
|
||||
(4, 1, [1, 2, 3], False),
|
||||
(5, 6, [1, 2, 3], False),
|
||||
# many-to-one
|
||||
([1], 2, [2, 2, 3], False),
|
||||
([1, 2], 3, [3, 3, 3], False),
|
||||
([1, 2], 4, [4, 4, 3], False),
|
||||
((1, 2, 4), 5, [5, 5, 3], False),
|
||||
((5, 6), 2, [1, 2, 3], False),
|
||||
([1], [2], [2, 2, 3], False),
|
||||
([1, 4], [5, 2], [5, 2, 3], False),
|
||||
# GH49404: overlap between to_replace and value
|
||||
([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
|
||||
# GH50872, GH46884: replace with null
|
||||
(1, None, [None, 2, 3], False),
|
||||
(1, pd.NA, [None, 2, 3], False),
|
||||
# check_categorical sorts categories, which crashes on mixed dtypes
|
||||
(3, "4", [1, 2, "4"], False),
|
||||
([1, 2, "3"], "5", ["5", "5", 3], True),
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:.*with CategoricalDtype is deprecated:FutureWarning"
|
||||
)
|
||||
def test_replace_categorical_series(to_replace, value, expected, flip_categories):
|
||||
# GH 31720
|
||||
|
||||
ser = pd.Series([1, 2, 3], dtype="category")
|
||||
result = ser.replace(to_replace, value)
|
||||
expected = pd.Series(expected, dtype="category")
|
||||
ser.replace(to_replace, value, inplace=True)
|
||||
|
||||
if flip_categories:
|
||||
expected = expected.cat.set_categories(expected.cat.categories[::-1])
|
||||
|
||||
tm.assert_series_equal(expected, result, check_category_order=False)
|
||||
tm.assert_series_equal(expected, ser, check_category_order=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_replace, value, result, expected_error_msg",
|
||||
[
|
||||
("b", "c", ["a", "c"], "Categorical.categories are different"),
|
||||
("c", "d", ["a", "b"], None),
|
||||
# https://github.com/pandas-dev/pandas/issues/33288
|
||||
("a", "a", ["a", "b"], None),
|
||||
("b", None, ["a", None], "Categorical.categories length are different"),
|
||||
],
|
||||
)
|
||||
def test_replace_categorical(to_replace, value, result, expected_error_msg):
|
||||
# GH#26988
|
||||
cat = Categorical(["a", "b"])
|
||||
expected = Categorical(result)
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if expected_error_msg is not None else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
if to_replace == "b": # the "c" test is supposed to be unchanged
|
||||
with pytest.raises(AssertionError, match=expected_error_msg):
|
||||
# ensure non-inplace call does not affect original
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
ser = pd.Series(cat, copy=False)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.replace(to_replace, value, inplace=True)
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
|
||||
def test_replace_categorical_ea_dtype():
|
||||
# GH49404
|
||||
cat = Categorical(pd.array(["a", "b"], dtype="string"))
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
|
||||
expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_replace_maintain_ordering():
|
||||
# GH51016
|
||||
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
|
||||
ser = pd.Series([0, 1, 2], dtype=dtype)
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.replace(0, 2)
|
||||
expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
|
||||
expected = pd.Series([2, 1, 2], dtype=expected_dtype)
|
||||
tm.assert_series_equal(expected, result, check_category_order=True)
|
@ -0,0 +1,550 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
option_context,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalReprWithFactor:
|
||||
def test_print(self, using_infer_string):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
if using_infer_string:
|
||||
expected = [
|
||||
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
|
||||
"Categories (3, string): [a < b < c]",
|
||||
]
|
||||
else:
|
||||
expected = [
|
||||
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
|
||||
"Categories (3, object): ['a' < 'b' < 'c']",
|
||||
]
|
||||
expected = "\n".join(expected)
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestCategoricalRepr:
|
||||
def test_big_print(self):
|
||||
codes = np.array([0, 1, 2, 0, 1, 2] * 100)
|
||||
dtype = CategoricalDtype(categories=Index(["a", "b", "c"], dtype=object))
|
||||
factor = Categorical.from_codes(codes, dtype=dtype)
|
||||
expected = [
|
||||
"['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
|
||||
"Length: 600",
|
||||
"Categories (3, object): ['a', 'b', 'c']",
|
||||
]
|
||||
expected = "\n".join(expected)
|
||||
|
||||
actual = repr(factor)
|
||||
|
||||
assert actual == expected
|
||||
|
||||
def test_empty_print(self):
|
||||
factor = Categorical([], Index(["a", "b", "c"], dtype=object))
|
||||
expected = "[], Categories (3, object): ['a', 'b', 'c']"
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
assert expected == actual
|
||||
factor = Categorical([], Index(["a", "b", "c"], dtype=object), ordered=True)
|
||||
expected = "[], Categories (3, object): ['a' < 'b' < 'c']"
|
||||
actual = repr(factor)
|
||||
assert expected == actual
|
||||
|
||||
factor = Categorical([], [])
|
||||
expected = "[], Categories (0, object): []"
|
||||
assert expected == repr(factor)
|
||||
|
||||
def test_print_none_width(self):
|
||||
# GH10087
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = (
|
||||
"0 1\n1 2\n2 3\n3 4\n"
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
|
||||
)
|
||||
|
||||
with option_context("display.width", None):
|
||||
assert exp == repr(a)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
using_pyarrow_string_dtype(),
|
||||
reason="Change once infer_string is set to True by default",
|
||||
)
|
||||
def test_unicode_print(self):
|
||||
c = Categorical(["aaaaa", "bb", "cccc"] * 20)
|
||||
expected = """\
|
||||
['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc']
|
||||
Length: 60
|
||||
Categories (3, object): ['aaaaa', 'bb', 'cccc']"""
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
|
||||
expected = """\
|
||||
['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
|
||||
Length: 60
|
||||
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
# unicode option should not affect to Categorical, as it doesn't care
|
||||
# the repr width
|
||||
with option_context("display.unicode.east_asian_width", True):
|
||||
c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
|
||||
expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
|
||||
Length: 60
|
||||
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
def test_categorical_repr(self):
|
||||
c = Categorical([1, 2, 3])
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1, 2, 3, 4, 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20, dtype=np.int64))
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_ordered(self):
|
||||
c = Categorical([1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20, dtype=np.int64), ordered=True)
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx)
|
||||
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]"
|
||||
""
|
||||
)
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
c = Categorical(idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, "
|
||||
"2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, "
|
||||
"2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_int_with_nan(self):
|
||||
c = Categorical([1, 2, np.nan])
|
||||
c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]"""
|
||||
assert repr(c) == c_exp
|
||||
|
||||
s = Series([1, 2, np.nan], dtype="object").astype("category")
|
||||
s_exp = """0 1\n1 2\n2 NaN
|
||||
dtype: category
|
||||
Categories (2, int64): [1, 2]"""
|
||||
assert repr(s) == s_exp
|
||||
|
||||
def test_categorical_repr_period(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=20)
|
||||
c = Categorical(idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=20)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_index_repr(self):
|
||||
idx = CategoricalIndex(Categorical([1, 2, 3]))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64)))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_ordered(self):
|
||||
i = CategoricalIndex(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64), ordered=True))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
|
||||
'2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
|
||||
'2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period(self):
|
||||
# test all length
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=1)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=2)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=3)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx)))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
|
||||
'2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
|
||||
'2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_str_repr(self):
|
||||
# GH 33676
|
||||
result = repr(Categorical([1, "2", 3, 4]))
|
||||
expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']"
|
||||
assert result == expected
|
@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSort:
|
||||
def test_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(
|
||||
c.argsort(ascending=True), expected, check_dtype=False
|
||||
)
|
||||
|
||||
expected = expected[::-1]
|
||||
tm.assert_numpy_array_equal(
|
||||
c.argsort(ascending=False), expected, check_dtype=False
|
||||
)
|
||||
|
||||
def test_numpy_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False)
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
np.argsort(c, kind="mergesort"), expected, check_dtype=False
|
||||
)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, axis=0)
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, order="C")
|
||||
|
||||
def test_sort_values(self):
|
||||
# unordered cats are sortable
|
||||
cat = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat.sort_values()
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"], ordered=True)
|
||||
|
||||
# sort_values
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
cat = Categorical(
|
||||
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# sort (inplace order)
|
||||
cat1 = cat.copy()
|
||||
orig_codes = cat1._codes
|
||||
cat1.sort_values(inplace=True)
|
||||
assert cat1._codes is orig_codes
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(cat1.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# reverse
|
||||
cat = Categorical(["a", "c", "c", "b", "d"], ordered=True)
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
def test_sort_values_na_position(self):
|
||||
# see gh-12882
|
||||
cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True)
|
||||
exp_categories = Index([2, 5])
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values() # default arguments
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0])
|
||||
res = cat.sort_values(ascending=True, na_position="first")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0])
|
||||
res = cat.sort_values(ascending=False, na_position="first")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=True, na_position="last")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=False, na_position="last")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position="last")
|
||||
exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position="first")
|
||||
exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
@ -0,0 +1,26 @@
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class SubclassedCategorical(Categorical):
|
||||
pass
|
||||
|
||||
|
||||
class TestCategoricalSubclassing:
|
||||
def test_constructor(self):
|
||||
sc = SubclassedCategorical(["a", "b", "c"])
|
||||
assert isinstance(sc, SubclassedCategorical)
|
||||
tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"]))
|
||||
|
||||
def test_from_codes(self):
|
||||
sc = SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"])
|
||||
assert isinstance(sc, SubclassedCategorical)
|
||||
exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"])
|
||||
tm.assert_categorical_equal(sc, exp)
|
||||
|
||||
def test_map(self):
|
||||
sc = SubclassedCategorical(["a", "b", "c"])
|
||||
res = sc.map(lambda x: x.upper(), na_action=None)
|
||||
assert isinstance(res, SubclassedCategorical)
|
||||
exp = Categorical(["A", "B", "C"])
|
||||
tm.assert_categorical_equal(res, exp)
|
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def allow_fill(request):
|
||||
"""Boolean 'allow_fill' parameter for Categorical.take"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestTake:
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
|
||||
def test_take_default_allow_fill(self):
|
||||
cat = Categorical(["a", "b"])
|
||||
with tm.assert_produces_warning(None):
|
||||
result = cat.take([0, -1])
|
||||
|
||||
assert result.equals(cat)
|
||||
|
||||
def test_take_positive_no_warning(self):
|
||||
cat = Categorical(["a", "b"])
|
||||
with tm.assert_produces_warning(None):
|
||||
cat.take([0, 0])
|
||||
|
||||
def test_take_bounds(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = Categorical(["a", "b", "a"])
|
||||
if allow_fill:
|
||||
msg = "indices are out-of-bounds"
|
||||
else:
|
||||
msg = "index 4 is out of bounds for( axis 0 with)? size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
cat.take([4, 5], allow_fill=allow_fill)
|
||||
|
||||
def test_take_empty(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = Categorical([], categories=["a", "b"])
|
||||
if allow_fill:
|
||||
msg = "indices are out-of-bounds"
|
||||
else:
|
||||
msg = "cannot do a non-empty take from an empty axes"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
cat.take([0], allow_fill=allow_fill)
|
||||
|
||||
def test_positional_take(self, ordered):
|
||||
cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered)
|
||||
result = cat.take([0, 1, 2], allow_fill=False)
|
||||
expected = Categorical(
|
||||
["a", "a", "b"], categories=cat.categories, ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_positional_take_unobserved(self, ordered):
|
||||
cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered)
|
||||
result = cat.take([1, 0], allow_fill=False)
|
||||
expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_allow_fill(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = Categorical(["a", "a", "b"])
|
||||
result = cat.take([0, -1, -1], allow_fill=True)
|
||||
expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_with_negative_one(self):
|
||||
# -1 was a category
|
||||
cat = Categorical([-1, 0, 1])
|
||||
result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
|
||||
expected = Categorical([-1, -1, 0], categories=[-1, 0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
result = cat.take([0, 1, -1], fill_value="a", allow_fill=True)
|
||||
expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value_new_raises(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
xpr = r"Cannot setitem on a Categorical with a new category \(d\)"
|
||||
with pytest.raises(TypeError, match=xpr):
|
||||
cat.take([0, 1, -1], fill_value="d", allow_fill=True)
|
@ -0,0 +1,19 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalWarnings:
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip("IPython", minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; c = pd.Categorical([])"
|
||||
ip.run_cell(code)
|
||||
|
||||
# GH 31324 newer jedi version raises Deprecation warning;
|
||||
# appears resolved 2021-02-02
|
||||
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("c.", 1))
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import iNaT
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestDatetimeArrayConstructor:
|
||||
def test_from_sequence_invalid_type(self):
|
||||
mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
|
||||
with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
|
||||
DatetimeArray._from_sequence(mi, dtype="M8[ns]")
|
||||
|
||||
def test_only_1dim_accepted(self):
|
||||
arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]")
|
||||
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 3-dim, we allow 2D to sneak in for ops purposes GH#29853
|
||||
DatetimeArray(arr.reshape(2, 2, 1))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 0-dim
|
||||
DatetimeArray(arr[[0]].squeeze())
|
||||
|
||||
def test_freq_validation(self):
|
||||
# GH#24623 check that invalid instances cannot be created with the
|
||||
# public constructor
|
||||
arr = np.arange(5, dtype=np.int64) * 3600 * 10**9
|
||||
|
||||
msg = (
|
||||
"Inferred frequency h from passed values does not "
|
||||
"conform to passed frequency W-SUN"
|
||||
)
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, freq="W")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"meth",
|
||||
[
|
||||
DatetimeArray._from_sequence,
|
||||
pd.to_datetime,
|
||||
pd.DatetimeIndex,
|
||||
],
|
||||
)
|
||||
def test_mixing_naive_tzaware_raises(self, meth):
|
||||
# GH#24569
|
||||
arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
|
||||
|
||||
msg = (
|
||||
"Cannot mix tz-aware with tz-naive values|"
|
||||
"Tz-aware datetime.datetime cannot be converted "
|
||||
"to datetime64 unless utc=True"
|
||||
)
|
||||
|
||||
for obj in [arr, arr[::-1]]:
|
||||
# check that we raise regardless of whether naive is found
|
||||
# before aware or vice-versa
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
meth(obj)
|
||||
|
||||
def test_from_pandas_array(self):
|
||||
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9
|
||||
|
||||
result = DatetimeArray._from_sequence(arr, dtype="M8[ns]")._with_freq("infer")
|
||||
|
||||
expected = pd.date_range("1970-01-01", periods=5, freq="h")._data
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
def test_mismatched_timezone_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
arr = DatetimeArray(
|
||||
np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"),
|
||||
dtype=DatetimeTZDtype(tz="US/Central"),
|
||||
)
|
||||
dtype = DatetimeTZDtype(tz="US/Eastern")
|
||||
msg = r"dtype=datetime64\[ns.*\] does not match data dtype datetime64\[ns.*\]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray(arr, dtype=dtype)
|
||||
|
||||
# also with mismatched tzawareness
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray(arr, dtype=np.dtype("M8[ns]"))
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray(arr.tz_localize(None), dtype=arr.dtype)
|
||||
|
||||
def test_non_array_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="list"):
|
||||
DatetimeArray([1, 2, 3])
|
||||
|
||||
def test_bool_dtype_raises(self):
|
||||
arr = np.array([1, 2, 3], dtype="bool")
|
||||
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
msg = "Unexpected value for 'dtype': 'bool'. Must be"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr)
|
||||
|
||||
msg = r"dtype bool cannot be converted to datetime64\[ns\]"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray._from_sequence(arr, dtype="M8[ns]")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.DatetimeIndex(arr)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.to_datetime(arr)
|
||||
|
||||
def test_incorrect_dtype_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="m8[s]")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="M8[D]")
|
||||
|
||||
def test_mismatched_values_dtype_units(self):
|
||||
arr = np.array([1, 2, 3], dtype="M8[s]")
|
||||
dtype = np.dtype("M8[ns]")
|
||||
msg = "Values resolution does not match dtype."
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, dtype=dtype)
|
||||
|
||||
dtype2 = DatetimeTZDtype(tz="UTC", unit="ns")
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, dtype=dtype2)
|
||||
|
||||
def test_freq_infer_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Frequency inference"):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer")
|
||||
|
||||
def test_copy(self):
|
||||
data = np.array([1, 2, 3], dtype="M8[ns]")
|
||||
arr = DatetimeArray._from_sequence(data, copy=False)
|
||||
assert arr._ndarray is data
|
||||
|
||||
arr = DatetimeArray._from_sequence(data, copy=True)
|
||||
assert arr._ndarray is not data
|
||||
|
||||
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
|
||||
def test_numpy_datetime_unit(self, unit):
|
||||
data = np.array([1, 2, 3], dtype=f"M8[{unit}]")
|
||||
arr = DatetimeArray._from_sequence(data)
|
||||
assert arr.unit == unit
|
||||
assert arr[0].unit == unit
|
||||
|
||||
|
||||
class TestSequenceToDT64NS:
|
||||
def test_tz_dtype_mismatch_raises(self):
|
||||
arr = DatetimeArray._from_sequence(
|
||||
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
|
||||
)
|
||||
with pytest.raises(TypeError, match="data is already tz-aware"):
|
||||
DatetimeArray._from_sequence(arr, dtype=DatetimeTZDtype(tz="UTC"))
|
||||
|
||||
def test_tz_dtype_matches(self):
|
||||
dtype = DatetimeTZDtype(tz="US/Central")
|
||||
arr = DatetimeArray._from_sequence(["2000"], dtype=dtype)
|
||||
result = DatetimeArray._from_sequence(arr, dtype=dtype)
|
||||
tm.assert_equal(arr, result)
|
||||
|
||||
@pytest.mark.parametrize("order", ["F", "C"])
|
||||
def test_2d(self, order):
|
||||
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
|
||||
arr = np.array(dti, dtype=object).reshape(3, 2)
|
||||
if order == "F":
|
||||
arr = arr.T
|
||||
|
||||
res = DatetimeArray._from_sequence(arr, dtype=dti.dtype)
|
||||
expected = DatetimeArray._from_sequence(arr.ravel(), dtype=dti.dtype).reshape(
|
||||
arr.shape
|
||||
)
|
||||
tm.assert_datetime_array_equal(res, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Arrow interaction
|
||||
|
||||
|
||||
EXTREME_VALUES = [0, 123456789, None, iNaT, 2**63 - 1, -(2**63) + 1]
|
||||
FINE_TO_COARSE_SAFE = [123_000_000_000, None, -123_000_000_000]
|
||||
COARSE_TO_FINE_SAFE = [123, None, -123]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("pa_unit", "pd_unit", "pa_tz", "pd_tz", "data"),
|
||||
[
|
||||
("s", "s", "UTC", "UTC", EXTREME_VALUES),
|
||||
("ms", "ms", "UTC", "Europe/Berlin", EXTREME_VALUES),
|
||||
("us", "us", "US/Eastern", "UTC", EXTREME_VALUES),
|
||||
("ns", "ns", "US/Central", "Asia/Kolkata", EXTREME_VALUES),
|
||||
("ns", "s", "UTC", "UTC", FINE_TO_COARSE_SAFE),
|
||||
("us", "ms", "UTC", "Europe/Berlin", FINE_TO_COARSE_SAFE),
|
||||
("ms", "us", "US/Eastern", "UTC", COARSE_TO_FINE_SAFE),
|
||||
("s", "ns", "US/Central", "Asia/Kolkata", COARSE_TO_FINE_SAFE),
|
||||
],
|
||||
)
|
||||
def test_from_arrow_with_different_units_and_timezones_with(
|
||||
pa_unit, pd_unit, pa_tz, pd_tz, data
|
||||
):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
pa_type = pa.timestamp(pa_unit, tz=pa_tz)
|
||||
arr = pa.array(data, type=pa_type)
|
||||
dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz)
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = DatetimeArray._from_sequence(data, dtype=f"M8[{pa_unit}, UTC]").astype(
|
||||
dtype, copy=False
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("unit", "tz"),
|
||||
[
|
||||
("s", "UTC"),
|
||||
("ms", "Europe/Berlin"),
|
||||
("us", "US/Eastern"),
|
||||
("ns", "Asia/Kolkata"),
|
||||
("ns", "UTC"),
|
||||
],
|
||||
)
|
||||
def test_from_arrow_from_empty(unit, tz):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
data = []
|
||||
arr = pa.array(data)
|
||||
dtype = DatetimeTZDtype(unit=unit, tz=tz)
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]"))
|
||||
expected = expected.tz_localize(tz=tz)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_arrow_from_integers():
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
data = [0, 123456789, None, 2**63 - 1, iNaT, -123456789]
|
||||
arr = pa.array(data)
|
||||
dtype = DatetimeTZDtype(unit="ns", tz="UTC")
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]"))
|
||||
expected = expected.tz_localize("UTC")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestAccumulator:
|
||||
def test_accumulators_freq(self):
|
||||
# GH#50297
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)._with_freq("infer")
|
||||
result = arr._accumulate("cummin")
|
||||
expected = DatetimeArray._from_sequence(["2000-01-01"] * 3, dtype="M8[ns]")
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = arr._accumulate("cummax")
|
||||
expected = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("func", ["cumsum", "cumprod"])
|
||||
def test_accumulators_disallowed(self, func):
|
||||
# GH#50297
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)._with_freq("infer")
|
||||
with pytest.raises(TypeError, match=f"Accumulation {func}"):
|
||||
arr._accumulate(func)
|
@ -0,0 +1,183 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import NaT
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestReductions:
|
||||
@pytest.fixture(params=["s", "ms", "us", "ns"])
|
||||
def unit(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def arr1d(self, tz_naive_fixture):
|
||||
"""Fixture returning DatetimeArray with parametrized timezones"""
|
||||
tz = tz_naive_fixture
|
||||
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-03",
|
||||
"2000-01-03",
|
||||
"NaT",
|
||||
"2000-01-02",
|
||||
"2000-01-05",
|
||||
"2000-01-04",
|
||||
],
|
||||
dtype=dtype,
|
||||
)
|
||||
return arr
|
||||
|
||||
def test_min_max(self, arr1d, unit):
|
||||
arr = arr1d
|
||||
arr = arr.as_unit(unit)
|
||||
tz = arr.tz
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Timestamp("2000-01-02", tz=tz).as_unit(unit)
|
||||
assert result == expected
|
||||
assert result.unit == expected.unit
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Timestamp("2000-01-05", tz=tz).as_unit(unit)
|
||||
assert result == expected
|
||||
assert result.unit == expected.unit
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_empty(self, skipna, tz):
|
||||
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
|
||||
arr = DatetimeArray._from_sequence([], dtype=dtype)
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is NaT
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_median_empty(self, skipna, tz):
|
||||
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
|
||||
arr = DatetimeArray._from_sequence([], dtype=dtype)
|
||||
result = arr.median(skipna=skipna)
|
||||
assert result is NaT
|
||||
|
||||
arr = arr.reshape(0, 3)
|
||||
result = arr.median(axis=0, skipna=skipna)
|
||||
expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = arr.median(axis=1, skipna=skipna)
|
||||
expected = type(arr)._from_sequence([], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_median(self, arr1d):
|
||||
arr = arr1d
|
||||
|
||||
result = arr.median()
|
||||
assert result == arr[0]
|
||||
result = arr.median(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.dropna().median(skipna=False)
|
||||
assert result == arr[0]
|
||||
|
||||
result = arr.median(axis=0)
|
||||
assert result == arr[0]
|
||||
|
||||
def test_median_axis(self, arr1d):
|
||||
arr = arr1d
|
||||
assert arr.median(axis=0) == arr.median()
|
||||
assert arr.median(axis=0, skipna=False) is NaT
|
||||
|
||||
msg = r"abs\(axis\) must be less than ndim"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.median(axis=1)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
|
||||
def test_median_2d(self, arr1d):
|
||||
arr = arr1d.reshape(1, -1)
|
||||
|
||||
# axis = None
|
||||
assert arr.median() == arr1d.median()
|
||||
assert arr.median(skipna=False) is NaT
|
||||
|
||||
# axis = 0
|
||||
result = arr.median(axis=0)
|
||||
expected = arr1d
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# Since column 3 is all-NaT, we get NaT there with or without skipna
|
||||
result = arr.median(axis=0, skipna=False)
|
||||
expected = arr1d
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = arr.median(axis=1)
|
||||
expected = type(arr)._from_sequence([arr1d.median()], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = arr.median(axis=1, skipna=False)
|
||||
expected = type(arr)._from_sequence([NaT], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_mean(self, arr1d):
|
||||
arr = arr1d
|
||||
|
||||
# manually verified result
|
||||
expected = arr[0] + 0.4 * pd.Timedelta(days=1)
|
||||
|
||||
result = arr.mean()
|
||||
assert result == expected
|
||||
result = arr.mean(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.dropna().mean(skipna=False)
|
||||
assert result == expected
|
||||
|
||||
result = arr.mean(axis=0)
|
||||
assert result == expected
|
||||
|
||||
def test_mean_2d(self):
|
||||
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
|
||||
dta = dti._data.reshape(3, 2)
|
||||
|
||||
result = dta.mean(axis=0)
|
||||
expected = dta[1]
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = dta.mean(axis=1)
|
||||
expected = dta[:, 0] + pd.Timedelta(hours=12)
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = dta.mean(axis=None)
|
||||
expected = dti.mean()
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_mean_empty(self, arr1d, skipna):
|
||||
arr = arr1d[:0]
|
||||
|
||||
assert arr.mean(skipna=skipna) is NaT
|
||||
|
||||
arr2d = arr.reshape(0, 3)
|
||||
result = arr2d.mean(axis=0, skipna=skipna)
|
||||
expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = arr2d.mean(axis=1, skipna=skipna)
|
||||
expected = arr # i.e. 1D, empty
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = arr2d.mean(axis=None, skipna=skipna)
|
||||
assert result is NaT
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,48 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[Float32Dtype, Float64Dtype])
|
||||
def dtype(request):
|
||||
"""Parametrized fixture returning a float 'dtype'"""
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
"""Fixture returning 'data' array according to parametrized float 'dtype'"""
|
||||
return pd.array(
|
||||
list(np.arange(0.1, 0.9, 0.1))
|
||||
+ [pd.NA]
|
||||
+ list(np.arange(1, 9.8, 0.1))
|
||||
+ [pd.NA]
|
||||
+ [9.9, 10.0],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
"""
|
||||
Fixture returning array with missing data according to parametrized float
|
||||
'dtype'.
|
||||
"""
|
||||
return pd.array([np.nan, 0.1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture returning 'data' or 'data_missing' float arrays.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
@ -0,0 +1,244 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[
|
||||
("add", [1.1, 2.2, None, None, 5.5]),
|
||||
("mul", [0.1, 0.4, None, None, 2.5]),
|
||||
("sub", [0.9, 1.8, None, None, 4.5]),
|
||||
("truediv", [10.0, 10.0, None, None, 10.0]),
|
||||
("floordiv", [9.0, 9.0, None, None, 10.0]),
|
||||
("mod", [0.1, 0.2, None, None, 0.0]),
|
||||
],
|
||||
ids=["add", "mul", "sub", "div", "floordiv", "mod"],
|
||||
)
|
||||
def test_array_op(dtype, opname, exp):
|
||||
a = pd.array([1.0, 2.0, None, 4.0, 5.0], dtype=dtype)
|
||||
b = pd.array([0.1, 0.2, 0.3, None, 0.5], dtype=dtype)
|
||||
|
||||
op = getattr(operator, opname)
|
||||
|
||||
result = op(a, b)
|
||||
expected = pd.array(exp, dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
|
||||
def test_divide_by_zero(dtype, zero, negative):
|
||||
# TODO pending NA/NaN discussion
|
||||
# https://github.com/pandas-dev/pandas/issues/32265/
|
||||
a = pd.array([0, 1, -1, None], dtype=dtype)
|
||||
result = a / zero
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype),
|
||||
np.array([False, False, False, True]),
|
||||
)
|
||||
if negative:
|
||||
expected *= -1
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_scalar(dtype):
|
||||
a = pd.array([-1, 0, 1, None, 2], dtype=dtype)
|
||||
result = a**0
|
||||
expected = pd.array([1, 1, 1, 1, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**1
|
||||
expected = pd.array([-1, 0, 1, None, 2], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**pd.NA
|
||||
expected = pd.array([None, None, 1, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**np.nan
|
||||
# TODO np.nan should be converted to pd.NA / missing before operation?
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
|
||||
mask=a._mask,
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# reversed
|
||||
a = a[1:] # Can't raise integers to negative powers.
|
||||
|
||||
result = 0**a
|
||||
expected = pd.array([1, 0, None, 0], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = 1**a
|
||||
expected = pd.array([1, 1, 1, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.NA**a
|
||||
expected = pd.array([1, None, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = np.nan**a
|
||||
expected = FloatingArray(
|
||||
np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_array(dtype):
|
||||
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None], dtype=dtype)
|
||||
result = a**b
|
||||
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_rpow_one_to_na():
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
# https://github.com/pandas-dev/pandas/issues/29997
|
||||
arr = pd.array([np.nan, np.nan], dtype="Float64")
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = pd.array([1.0, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_arith_zero_dim_ndarray(other):
|
||||
arr = pd.array([1, None, 2], dtype="Float64")
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||||
else:
|
||||
errs = TypeError
|
||||
|
||||
# invalid scalars
|
||||
msg = "|".join(
|
||||
[
|
||||
r"can only perform ops with numeric values",
|
||||
r"FloatingArray cannot perform the operation mod",
|
||||
"unsupported operand type",
|
||||
"not all arguments converted during string formatting",
|
||||
"can't multiply sequence by non-int of type 'float'",
|
||||
"ufunc 'subtract' cannot use operands with types dtype",
|
||||
r"can only concatenate str \(not \"float\"\) to str",
|
||||
"ufunc '.*' not supported for the input types, and the inputs could not",
|
||||
"ufunc '.*' did not contain a loop with signature matching types",
|
||||
"Concatenation operation is not implemented for NumPy arrays",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops("foo")
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only perform ops with numeric values",
|
||||
"cannot perform .* with this index type: DatetimeArray",
|
||||
"Addition/subtraction of integers and integer-arrays "
|
||||
"with DatetimeArray is no longer supported. *",
|
||||
"unsupported operand type",
|
||||
"not all arguments converted during string formatting",
|
||||
"can't multiply sequence by non-int of type 'float'",
|
||||
"ufunc 'subtract' cannot use operands with types dtype",
|
||||
(
|
||||
"ufunc 'add' cannot use operands with types "
|
||||
rf"dtype\('{tm.ENDIAN}M8\[ns\]'\)"
|
||||
),
|
||||
r"ufunc 'add' cannot use operands with types dtype\('float\d{2}'\)",
|
||||
"cannot subtract DatetimeArray from ndarray",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||||
|
||||
|
||||
# Various
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.array([1, 2, np.nan], dtype="Float64"),
|
||||
"B": pd.array([1, np.nan, 3], dtype="Float32"),
|
||||
"C": np.array([1, 2, 3], dtype="float64"),
|
||||
}
|
||||
)
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, None], dtype="boolean")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"source, neg_target, abs_target",
|
||||
[
|
||||
([1.1, 2.2, 3.3], [-1.1, -2.2, -3.3], [1.1, 2.2, 3.3]),
|
||||
([1.1, 2.2, None], [-1.1, -2.2, None], [1.1, 2.2, None]),
|
||||
([-1.1, 0.0, 1.1], [1.1, 0.0, -1.1], [1.1, 0.0, 1.1]),
|
||||
],
|
||||
)
|
||||
def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target):
|
||||
# GH38794
|
||||
dtype = float_ea_dtype
|
||||
arr = pd.array(source, dtype=dtype)
|
||||
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
|
||||
neg_target = pd.array(neg_target, dtype=dtype)
|
||||
abs_target = pd.array(abs_target, dtype=dtype)
|
||||
|
||||
tm.assert_extension_array_equal(neg_result, neg_target)
|
||||
tm.assert_extension_array_equal(pos_result, arr)
|
||||
assert not tm.shares_memory(pos_result, arr)
|
||||
tm.assert_extension_array_equal(abs_result, abs_target)
|
||||
|
||||
|
||||
def test_bitwise(dtype):
|
||||
left = pd.array([1, None, 3, 4], dtype=dtype)
|
||||
right = pd.array([None, 3, 5, 4], dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left | right
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left & right
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left ^ right
|
@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype():
|
||||
# with missing values
|
||||
arr = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert NA to integer"):
|
||||
arr.astype("int64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert float NaN to bool"):
|
||||
arr.astype("bool")
|
||||
|
||||
result = arr.astype("float64")
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
arr = pd.array([0.0, 1.0, 0.5], dtype="Float64")
|
||||
result = arr.astype("int64")
|
||||
expected = np.array([0, 1, 0], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("bool")
|
||||
expected = np.array([False, True, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_floating_array():
|
||||
# astype to FloatingArray
|
||||
arr = pd.array([0.0, 1.0, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("Float64")
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype(pd.Float64Dtype())
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype("Float32")
|
||||
expected = pd.array([0.0, 1.0, None], dtype="Float32")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_boolean_array():
|
||||
# astype to BooleanArray
|
||||
arr = pd.array([0.0, 1.0, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("boolean")
|
||||
expected = pd.array([False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = arr.astype(pd.BooleanDtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_integer_array():
|
||||
# astype to IntegerArray
|
||||
arr = pd.array([0.0, 1.5, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("Int64")
|
||||
expected = pd.array([0, 1, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_str():
|
||||
a = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
|
||||
|
||||
tm.assert_numpy_array_equal(a.astype(str), expected)
|
||||
tm.assert_numpy_array_equal(a.astype("str"), expected)
|
||||
|
||||
|
||||
def test_astype_copy():
|
||||
arr = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
orig = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
|
||||
# copy=True -> ensure both data and mask are actual copies
|
||||
result = arr.astype("Float64", copy=True)
|
||||
assert result is not arr
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
# copy=False
|
||||
result = arr.astype("Float64", copy=False)
|
||||
assert result is arr
|
||||
assert np.shares_memory(result._data, arr._data)
|
||||
assert np.shares_memory(result._mask, arr._mask)
|
||||
result[0] = 10
|
||||
assert arr[0] == 10
|
||||
result[0] = pd.NA
|
||||
assert arr[0] is pd.NA
|
||||
|
||||
# astype to different dtype -> always needs a copy -> even with copy=False
|
||||
# we need to ensure that also the mask is actually copied
|
||||
arr = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
orig = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("Float32", copy=False)
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
|
||||
def test_astype_object(dtype):
|
||||
arr = pd.array([1.0, pd.NA], dtype=dtype)
|
||||
|
||||
result = arr.astype(object)
|
||||
expected = np.array([1.0, pd.NA], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
# check exact element types
|
||||
assert isinstance(result[0], float)
|
||||
assert result[1] is pd.NA
|
||||
|
||||
|
||||
def test_Float64_conversion():
|
||||
# GH#40729
|
||||
testseries = pd.Series(["1", "2", "3", "4"], dtype="object")
|
||||
result = testseries.astype(pd.Float64Dtype())
|
||||
|
||||
expected = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=pd.Float64Dtype())
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
from pandas.tests.arrays.masked_shared import (
|
||||
ComparisonOps,
|
||||
NumericOps,
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(NumericOps, ComparisonOps):
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, -1.0, 0.0, 1])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_compare_with_integerarray(self, comparison_op):
|
||||
op = comparison_op
|
||||
a = pd.array([0, 1, None] * 3, dtype="Int64")
|
||||
b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Float64")
|
||||
other = b.astype("Int64")
|
||||
expected = op(a, other)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
expected = op(other, a)
|
||||
result = op(b, a)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals():
|
||||
# GH-30652
|
||||
# equals is generally tested in /tests/extension/base/methods, but this
|
||||
# specifically tests that two arrays of the same class but different dtype
|
||||
# do not evaluate equal
|
||||
a1 = pd.array([1, 2, None], dtype="Float64")
|
||||
a2 = pd.array([1, 2, None], dtype="Float32")
|
||||
assert a1.equals(a2) is False
|
||||
|
||||
|
||||
def test_equals_nan_vs_na():
|
||||
# GH#44382
|
||||
|
||||
mask = np.zeros(3, dtype=bool)
|
||||
data = np.array([1.0, np.nan, 3.0], dtype=np.float64)
|
||||
|
||||
left = FloatingArray(data, mask)
|
||||
assert left.equals(left)
|
||||
tm.assert_extension_array_equal(left, left)
|
||||
|
||||
assert left.equals(left.copy())
|
||||
assert left.equals(FloatingArray(data.copy(), mask.copy()))
|
||||
|
||||
mask2 = np.array([False, True, False], dtype=bool)
|
||||
data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
||||
right = FloatingArray(data2, mask2)
|
||||
assert right.equals(right)
|
||||
tm.assert_extension_array_equal(right, right)
|
||||
|
||||
assert not left.equals(right)
|
||||
|
||||
# with mask[1] = True, the only difference is data[1], which should
|
||||
# not matter for equals
|
||||
mask[1] = True
|
||||
assert left.equals(right)
|
@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Float64", "Float64"], "Float64"),
|
||||
(["Float32", "Float64"], "Float64"),
|
||||
(["Float32", "Float32"], "Float32"),
|
||||
],
|
||||
)
|
||||
def test_concat_series(to_concat_dtypes, result_dtype):
|
||||
result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
|
||||
expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,204 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_uses_pandas_na():
|
||||
a = pd.array([1, None], dtype=Float64Dtype())
|
||||
assert a[1] is pd.NA
|
||||
|
||||
|
||||
def test_floating_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype="float64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = FloatingArray(values, mask)
|
||||
expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
tm.assert_numpy_array_equal(result._data, values)
|
||||
tm.assert_numpy_array_equal(result._mask, mask)
|
||||
|
||||
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values.astype(int), mask)
|
||||
|
||||
msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values)
|
||||
|
||||
|
||||
def test_floating_array_disallows_float16():
|
||||
# GH#44715
|
||||
arr = np.array([1, 2], dtype=np.float16)
|
||||
mask = np.array([False, False])
|
||||
|
||||
msg = "FloatingArray does not support np.float16 dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(arr, mask)
|
||||
|
||||
|
||||
def test_floating_array_disallows_Float16_dtype(request):
|
||||
# GH#44715
|
||||
with pytest.raises(TypeError, match="data type 'Float16' not understood"):
|
||||
pd.array([1.0, 2.0], dtype="Float16")
|
||||
|
||||
|
||||
def test_floating_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype="float64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = FloatingArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = FloatingArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
def test_to_array():
|
||||
result = pd.array([0.1, 0.2, 0.3, 0.4])
|
||||
expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([1, None], [1, pd.NA]),
|
||||
([None], [pd.NA]),
|
||||
([None, np.nan], [pd.NA, pd.NA]),
|
||||
([1, np.nan], [1, pd.NA]),
|
||||
([np.nan], [pd.NA]),
|
||||
],
|
||||
)
|
||||
def test_to_array_none_is_nan(a, b):
|
||||
result = pd.array(a, dtype="Float64")
|
||||
expected = pd.array(b, dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_array_mixed_integer_float():
|
||||
result = pd.array([1, 2.0])
|
||||
expected = pd.array([1.0, 2.0], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.array([1, None, 2.0])
|
||||
expected = pd.array([1.0, None, 2.0], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
"foo",
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {"a": 1}],
|
||||
# GH#44514 all-NA case used to get quietly swapped out before checking ndim
|
||||
np.array([pd.NA] * 6, dtype=object).reshape(3, 2),
|
||||
],
|
||||
)
|
||||
def test_to_array_error(values):
|
||||
# error in converting existing arrays to FloatingArray
|
||||
msg = "|".join(
|
||||
[
|
||||
"cannot be converted to FloatingDtype",
|
||||
"values must be a 1D list-like",
|
||||
"Cannot pass scalar",
|
||||
r"float\(\) argument must be a string or a (real )?number, not 'dict'",
|
||||
"could not convert string to float: 'foo'",
|
||||
r"could not convert string to float: np\.str_\('foo'\)",
|
||||
]
|
||||
)
|
||||
with pytest.raises((TypeError, ValueError), match=msg):
|
||||
pd.array(values, dtype="Float64")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [["1", "2", None], ["1.5", "2", None]])
|
||||
def test_construct_from_float_strings(values):
|
||||
# see also test_to_integer_array_str
|
||||
expected = pd.array([float(values[0]), 2, None], dtype="Float64")
|
||||
|
||||
res = pd.array(values, dtype="Float64")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
res = FloatingArray._from_sequence(values)
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
|
||||
def test_to_array_inferred_dtype():
|
||||
# if values has dtype -> respect it
|
||||
result = pd.array(np.array([1, 2], dtype="float32"))
|
||||
assert result.dtype == Float32Dtype()
|
||||
|
||||
# if values have no dtype -> always float64
|
||||
result = pd.array([1.0, 2.0])
|
||||
assert result.dtype == Float64Dtype()
|
||||
|
||||
|
||||
def test_to_array_dtype_keyword():
|
||||
result = pd.array([1, 2], dtype="Float32")
|
||||
assert result.dtype == Float32Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64")
|
||||
assert result.dtype == Float64Dtype()
|
||||
|
||||
|
||||
def test_to_array_integer():
|
||||
result = pd.array([1, 2], dtype="Float64")
|
||||
expected = pd.array([1.0, 2.0], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# for integer dtypes, the itemsize is not preserved
|
||||
# TODO can we specify "floating" in general?
|
||||
result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64")
|
||||
assert result.dtype == Float64Dtype()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bool_values, values, target_dtype, expected_dtype",
|
||||
[
|
||||
([False, True], [0, 1], Float64Dtype(), Float64Dtype()),
|
||||
([False, True], [0, 1], "Float64", Float64Dtype()),
|
||||
([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()),
|
||||
],
|
||||
)
|
||||
def test_to_array_bool(bool_values, values, target_dtype, expected_dtype):
|
||||
result = pd.array(bool_values, dtype=target_dtype)
|
||||
assert result.dtype == expected_dtype
|
||||
expected = pd.array(values, dtype=target_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_from_float(data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,12 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_contains_nan():
|
||||
# GH#52840
|
||||
arr = pd.array(range(5)) / 0
|
||||
|
||||
assert np.isnan(arr._data[0])
|
||||
assert not arr.isna()[0]
|
||||
assert np.nan in arr
|
@ -0,0 +1,194 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
|
||||
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
|
||||
def test_ufuncs_single(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan], dtype="Float64")
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
|
||||
def test_ufuncs_single_float(ufunc):
|
||||
a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64")
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(ufunc(s.astype(float)), dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
|
||||
def test_ufuncs_binary_float(ufunc):
|
||||
# two FloatingArrays
|
||||
a = pd.array([1, 0.2, -3, np.nan], dtype="Float64")
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# FloatingArray with numpy array
|
||||
arr = np.array([1, 2, 3, 4])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# FloatingArray with scalar
|
||||
result = ufunc(a, 1)
|
||||
expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(1, a)
|
||||
expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values, dtype="Float64")
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
expected = arr.sum(skipna=False)
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")
|
||||
@pytest.mark.parametrize(
|
||||
"pandasmethname, kwargs",
|
||||
[
|
||||
("var", {"ddof": 0}),
|
||||
("var", {"ddof": 1}),
|
||||
("std", {"ddof": 0}),
|
||||
("std", {"ddof": 1}),
|
||||
("kurtosis", {}),
|
||||
("skew", {}),
|
||||
("sem", {}),
|
||||
],
|
||||
)
|
||||
def test_stat_method(pandasmethname, kwargs):
|
||||
s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
|
||||
pandasmeth = getattr(s, pandasmethname)
|
||||
result = pandasmeth(**kwargs)
|
||||
s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
|
||||
pandasmeth = getattr(s2, pandasmethname)
|
||||
expected = pandasmeth(**kwargs)
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
|
||||
result = arr.value_counts(dropna=False)
|
||||
idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype)
|
||||
assert idx.dtype == arr.dtype
|
||||
expected = pd.Series([2, 1, 1], index=idx, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_empty():
|
||||
ser = pd.Series([], dtype="Float64")
|
||||
result = ser.value_counts()
|
||||
idx = pd.Index([], dtype="Float64")
|
||||
assert idx.dtype == "Float64"
|
||||
expected = pd.Series([], index=idx, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
|
||||
assert expected.index.dtype == ser.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 4])
|
||||
def test_floating_array_sum(skipna, min_count, dtype):
|
||||
arr = pd.array([1, 2, 3, None], dtype=dtype)
|
||||
result = arr.sum(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 6.0
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)]
|
||||
)
|
||||
def test_floating_array_numpy_sum(values, expected):
|
||||
arr = pd.array(values, dtype="Float64")
|
||||
result = np.sum(arr)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
|
||||
def test_preserve_dtypes(op):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([0.1, None, 3.0], dtype="Float64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, np.float64)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_floating_array_min_max(skipna, method, dtype):
|
||||
arr = pd.array([0.0, 1.0, None], dtype=dtype)
|
||||
func = getattr(arr, method)
|
||||
result = func(skipna=skipna)
|
||||
if skipna:
|
||||
assert result == (0 if method == "min" else 1)
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 9])
|
||||
def test_floating_array_prod(skipna, min_count, dtype):
|
||||
arr = pd.array([1.0, 2.0, None], dtype=dtype)
|
||||
result = arr.prod(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 2
|
||||
else:
|
||||
assert result is pd.NA
|
@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
np.dtype(dtype.type).kind == "f"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[(Float32Dtype(), "Float32Dtype()"), (Float64Dtype(), "Float64Dtype()")],
|
||||
)
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(pd.array([1.0, None, 3.0]))
|
||||
expected = "<FloatingArray>\n[1.0, <NA>, 3.0]\nLength: 3, dtype: Float64"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = pd.array([1.0, 2.0, None] * 1000)
|
||||
expected = """<FloatingArray>
|
||||
[ 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0,
|
||||
...
|
||||
<NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>]
|
||||
Length: 3000, dtype: Float64"""
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = repr(df)
|
||||
expected = " A\n0 <NA>\n1 0.1"
|
||||
assert result == expected
|
@ -0,0 +1,132 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
# default (with or without missing values) -> object dtype
|
||||
arr = con([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([0.1, 0.2, None], dtype="Float64")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_float(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
# no missing values -> can convert to float, otherwise raises
|
||||
arr = con([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64")
|
||||
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([0.1, 0.2, None], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64")
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_int(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
# no missing values -> can convert to int, otherwise raises
|
||||
arr = con([1.0, 2.0, 3.0], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="int64")
|
||||
expected = np.array([1, 2, 3], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([1.0, 2.0, None], dtype="Float64")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
|
||||
result = arr.to_numpy(dtype="int64")
|
||||
|
||||
# automatic casting (floors the values)
|
||||
arr = con([0.1, 0.9, 1.1], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="int64")
|
||||
expected = np.array([0, 0, 1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_na_value(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
arr = con([0.0, 1.0, None], dtype="Float64")
|
||||
result = arr.to_numpy(dtype=object, na_value=None)
|
||||
expected = np.array([0.0, 1.0, None], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype=bool, na_value=False)
|
||||
expected = np.array([False, True, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="int64", na_value=-99)
|
||||
expected = np.array([0, 1, -99], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numpy_na_value_with_nan():
|
||||
# array with both NaN and NA -> only fill NA with `na_value`
|
||||
arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True]))
|
||||
result = arr.to_numpy(dtype="float64", na_value=-1)
|
||||
expected = np.array([0.0, np.nan, -1.0], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_dtype(box, dtype):
|
||||
con = pd.Series if box else pd.array
|
||||
arr = con([0.0, 1.0], dtype="Float64")
|
||||
|
||||
result = arr.to_numpy(dtype=dtype)
|
||||
expected = np.array([0, 1], dtype=dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_na_raises(box, dtype):
|
||||
con = pd.Series if box else pd.array
|
||||
arr = con([0.0, 1.0, None], dtype="Float64")
|
||||
with pytest.raises(ValueError, match=dtype):
|
||||
arr.to_numpy(dtype=dtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_string(box, dtype):
|
||||
con = pd.Series if box else pd.array
|
||||
arr = con([0.0, 1.0, None], dtype="Float64")
|
||||
|
||||
result = arr.to_numpy(dtype="str")
|
||||
expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numpy_copy():
|
||||
# to_numpy can be zero-copy if no missing values
|
||||
arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64")
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, pd.array([10, 0.2, 0.3], dtype="Float64"))
|
||||
|
||||
arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64", copy=True)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64"))
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user