forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
"""Parametrized fixture returning integer 'dtype'"""
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
"""
|
||||
Fixture returning 'data' array with valid and missing values according to
|
||||
parametrized integer 'dtype'.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
return pd.array(
|
||||
list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
"""
|
||||
Fixture returning array with exactly one NaN and one valid integer,
|
||||
according to parametrized integer 'dtype'.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
return pd.array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture returning 'data' or 'data_missing' integer arrays.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
@ -0,0 +1,385 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(dtype, opname, exp):
|
||||
a = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
b = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
|
||||
# array / array
|
||||
expected = pd.array(exp, dtype=dtype)
|
||||
|
||||
op = getattr(operator, opname)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
op = getattr(ops, "r" + opname)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a - b
|
||||
expected = pd.array([1, 1, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_div(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a / b
|
||||
expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
|
||||
def test_divide_by_zero(zero, negative):
|
||||
# https://github.com/pandas-dev/pandas/issues/27398, GH#22793
|
||||
a = pd.array([0, 1, -1, None], dtype="Int64")
|
||||
result = a / zero
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
|
||||
np.array([False, False, False, True]),
|
||||
)
|
||||
if negative:
|
||||
expected *= -1
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_floordiv(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a // b
|
||||
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
|
||||
expected = pd.array([0, 2, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype):
|
||||
# GH 48223: Aligns with non-masked floordiv
|
||||
# but differs from numpy
|
||||
# https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740
|
||||
ser = pd.Series([0, 1], dtype=any_int_ea_dtype)
|
||||
result = 1 // ser
|
||||
expected = pd.Series([np.inf, 1.0], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser_non_nullable = ser.astype(ser.dtype.numpy_dtype)
|
||||
result = 1 // ser_non_nullable
|
||||
expected = expected.astype(np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mod(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a % b
|
||||
expected = pd.array([0, 0, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_scalar():
|
||||
a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||||
result = a**0
|
||||
expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**1
|
||||
expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**pd.NA
|
||||
expected = pd.array([None, None, 1, None, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**np.nan
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
|
||||
np.array([False, False, False, True, False]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# reversed
|
||||
a = a[1:] # Can't raise integers to negative powers.
|
||||
|
||||
result = 0**a
|
||||
expected = pd.array([1, 0, None, 0], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = 1**a
|
||||
expected = pd.array([1, 1, 1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.NA**a
|
||||
expected = pd.array([1, None, None, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = np.nan**a
|
||||
expected = FloatingArray(
|
||||
np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
|
||||
np.array([False, False, True, False]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_array():
|
||||
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
|
||||
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
|
||||
result = a**b
|
||||
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_rpow_one_to_na():
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
# https://github.com/pandas-dev/pandas/issues/29997
|
||||
arr = pd.array([np.nan, np.nan], dtype="Int64")
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = pd.array([1.0, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_numpy_zero_dim_ndarray(other):
|
||||
arr = pd.array([1, None, 2])
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||||
else:
|
||||
errs = TypeError
|
||||
|
||||
# invalid scalars
|
||||
msg = "|".join(
|
||||
[
|
||||
r"can only perform ops with numeric values",
|
||||
r"IntegerArray cannot perform the operation mod",
|
||||
r"unsupported operand type",
|
||||
r"can only concatenate str \(not \"int\"\) to str",
|
||||
"not all arguments converted during string",
|
||||
"ufunc '.*' not supported for the input types, and the inputs could not",
|
||||
"ufunc '.*' did not contain a loop with signature matching types",
|
||||
"Addition/subtraction of integers and integer-arrays with Timestamp",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops("foo")
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
str_ser = pd.Series("foo", index=s.index)
|
||||
# with pytest.raises(TypeError, match=msg):
|
||||
if (
|
||||
all_arithmetic_operators
|
||||
in [
|
||||
"__mul__",
|
||||
"__rmul__",
|
||||
]
|
||||
and not using_infer_string
|
||||
): # (data[~data.isna()] >= 0).all():
|
||||
res = ops(str_ser)
|
||||
expected = pd.Series(["foo" * x for x in data], index=s.index)
|
||||
expected = expected.fillna(np.nan)
|
||||
# TODO: doing this fillna to keep tests passing as we make
|
||||
# assert_almost_equal stricter, but the expected with pd.NA seems
|
||||
# more-correct than np.nan here.
|
||||
tm.assert_series_equal(res, expected)
|
||||
else:
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(str_ser)
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only perform ops with numeric values",
|
||||
"cannot perform .* with this index type: DatetimeArray",
|
||||
"Addition/subtraction of integers and integer-arrays "
|
||||
"with DatetimeArray is no longer supported. *",
|
||||
"unsupported operand type",
|
||||
r"can only concatenate str \(not \"int\"\) to str",
|
||||
"not all arguments converted during string",
|
||||
"cannot subtract DatetimeArray from ndarray",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||||
|
||||
|
||||
# Various
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# TODO test unsigned overflow
|
||||
|
||||
|
||||
def test_arith_coerce_scalar(data, all_arithmetic_operators):
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
s = pd.Series(data)
|
||||
other = 0.01
|
||||
|
||||
result = op(s, other)
|
||||
expected = op(s.astype(float), other)
|
||||
expected = expected.astype("Float64")
|
||||
|
||||
# rmod results in NaN that wasn't NA in original nullable Series -> unmask it
|
||||
if all_arithmetic_operators == "__rmod__":
|
||||
mask = (s == 0).fillna(False).to_numpy(bool)
|
||||
expected.array._mask[mask] = False
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [1.0, np.array(1.0)])
|
||||
def test_arithmetic_conversion(all_arithmetic_operators, other):
|
||||
# if we have a float operand we should have a float result
|
||||
# if that is equal to an integer
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = op(s, other)
|
||||
assert result.dtype == "Float64"
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series([1, 2, np.nan], dtype="Int64"),
|
||||
"B": pd.Series([1, np.nan, 3], dtype="UInt8"),
|
||||
"C": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, None], dtype="boolean")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean"])
|
||||
def test_reduce_to_float(op):
|
||||
# some reduce ops always return float, even if the result
|
||||
# is a rounded number
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, float)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"source, neg_target, abs_target",
|
||||
[
|
||||
([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
|
||||
([1, 2, None], [-1, -2, None], [1, 2, None]),
|
||||
([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
|
||||
],
|
||||
)
|
||||
def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
|
||||
dtype = any_signed_int_ea_dtype
|
||||
arr = pd.array(source, dtype=dtype)
|
||||
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
|
||||
neg_target = pd.array(neg_target, dtype=dtype)
|
||||
abs_target = pd.array(abs_target, dtype=dtype)
|
||||
|
||||
tm.assert_extension_array_equal(neg_result, neg_target)
|
||||
tm.assert_extension_array_equal(pos_result, arr)
|
||||
assert not tm.shares_memory(pos_result, arr)
|
||||
tm.assert_extension_array_equal(abs_result, abs_target)
|
||||
|
||||
|
||||
def test_values_multiplying_large_series_by_NA():
|
||||
# GH#33701
|
||||
|
||||
result = pd.NA * pd.Series(np.zeros(10001))
|
||||
expected = pd.Series([pd.NA] * 10001)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_bitwise(dtype):
|
||||
left = pd.array([1, None, 3, 4], dtype=dtype)
|
||||
right = pd.array([None, 3, 5, 4], dtype=dtype)
|
||||
|
||||
result = left | right
|
||||
expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = left & right
|
||||
expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = left ^ right
|
||||
expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# TODO: desired behavior when operating with boolean? defer?
|
||||
|
||||
floats = right.astype("Float64")
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left | floats
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left & floats
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left ^ floats
|
@ -0,0 +1,39 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.arrays.masked_shared import (
|
||||
ComparisonOps,
|
||||
NumericOps,
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(NumericOps, ComparisonOps):
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_compare_to_int(self, dtype, comparison_op):
|
||||
# GH 28930
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
s1 = pd.Series([1, None, 3], dtype=dtype)
|
||||
s2 = pd.Series([1, None, 3], dtype="float")
|
||||
|
||||
method = getattr(s1, op_name)
|
||||
result = method(2)
|
||||
|
||||
method = getattr(s2, op_name)
|
||||
expected = method(2).astype("boolean")
|
||||
expected[s2.isna()] = pd.NA
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals():
|
||||
# GH-30652
|
||||
# equals is generally tested in /tests/extension/base/methods, but this
|
||||
# specifically tests that two arrays of the same class but different dtype
|
||||
# do not evaluate equal
|
||||
a1 = pd.array([1, 2, None], dtype="Int64")
|
||||
a2 = pd.array([1, 2, None], dtype="Int32")
|
||||
assert a1.equals(a2) is False
|
@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Int64", "Int64"], "Int64"),
|
||||
(["UInt64", "UInt64"], "UInt64"),
|
||||
(["Int8", "Int8"], "Int8"),
|
||||
(["Int8", "Int16"], "Int16"),
|
||||
(["UInt8", "Int8"], "Int16"),
|
||||
(["Int32", "UInt32"], "Int64"),
|
||||
(["Int64", "UInt64"], "Float64"),
|
||||
(["Int64", "boolean"], "object"),
|
||||
(["UInt8", "boolean"], "object"),
|
||||
],
|
||||
)
|
||||
def test_concat_series(to_concat_dtypes, result_dtype):
|
||||
# we expect the same dtypes as we would get with non-masked inputs,
|
||||
# just masked where available.
|
||||
|
||||
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
|
||||
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# order doesn't matter for result
|
||||
result = pd.concat(
|
||||
[pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]]
|
||||
)
|
||||
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Int64", "int64"], "Int64"),
|
||||
(["UInt64", "uint64"], "UInt64"),
|
||||
(["Int8", "int8"], "Int8"),
|
||||
(["Int8", "int16"], "Int16"),
|
||||
(["UInt8", "int8"], "Int16"),
|
||||
(["Int32", "uint32"], "Int64"),
|
||||
(["Int64", "uint64"], "Float64"),
|
||||
(["Int64", "bool"], "object"),
|
||||
(["UInt8", "bool"], "object"),
|
||||
],
|
||||
)
|
||||
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
|
||||
# we expect the same dtypes as we would get with non-masked inputs,
|
||||
# just masked where available.
|
||||
|
||||
s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
|
||||
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))
|
||||
result = pd.concat([s1, s2], ignore_index=True)
|
||||
expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# order doesn't matter for result
|
||||
result = pd.concat([s2, s1], ignore_index=True)
|
||||
expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,245 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_integer
|
||||
from pandas.core.arrays import IntegerArray
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[pd.array, IntegerArray._from_sequence])
|
||||
def constructor(request):
|
||||
"""Fixture returning parametrized IntegerArray from given sequence.
|
||||
|
||||
Used to test dtype conversions.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_uses_pandas_na():
|
||||
a = pd.array([1, None], dtype=Int64Dtype())
|
||||
assert a[1] is pd.NA
|
||||
|
||||
|
||||
def test_from_dtype_from_float(data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / array
|
||||
expected = pd.Series(data).dropna().reset_index(drop=True)
|
||||
dropped = np.array(data.dropna()).astype(np.dtype(dtype.type))
|
||||
result = pd.Series(dropped, dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_conversions(data_missing):
|
||||
# astype to object series
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df["A"].astype("object")
|
||||
expected = pd.Series(np.array([pd.NA, 1], dtype=object), name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# convert to object ndarray
|
||||
# we assert that we are exactly equal
|
||||
# including type conversions of scalars
|
||||
result = df["A"].astype("object").values
|
||||
expected = np.array([pd.NA, 1], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for r, e in zip(result, expected):
|
||||
if pd.isnull(r):
|
||||
assert pd.isnull(e)
|
||||
elif is_integer(r):
|
||||
assert r == e
|
||||
assert is_integer(e)
|
||||
else:
|
||||
assert r == e
|
||||
assert type(r) == type(e)
|
||||
|
||||
|
||||
def test_integer_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values.astype(float), mask)
|
||||
msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values)
|
||||
|
||||
|
||||
def test_integer_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = IntegerArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([1, None], [1, np.nan]),
|
||||
([None], [np.nan]),
|
||||
([None, np.nan], [np.nan, np.nan]),
|
||||
([np.nan, np.nan], [np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_none_is_nan(a, b):
|
||||
result = pd.array(a, dtype="Int64")
|
||||
expected = pd.array(b, dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
"foo",
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_error(values):
|
||||
# error in converting existing arrays to IntegerArrays
|
||||
msg = "|".join(
|
||||
[
|
||||
r"cannot be converted to IntegerDtype",
|
||||
r"invalid literal for int\(\) with base 10:",
|
||||
r"values must be a 1D list-like",
|
||||
r"Cannot pass scalar",
|
||||
r"int\(\) argument must be a string",
|
||||
]
|
||||
)
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
pd.array(values, dtype="Int64")
|
||||
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
IntegerArray._from_sequence(values)
|
||||
|
||||
|
||||
def test_to_integer_array_inferred_dtype(constructor):
|
||||
# if values has dtype -> respect it
|
||||
result = constructor(np.array([1, 2], dtype="int8"))
|
||||
assert result.dtype == Int8Dtype()
|
||||
result = constructor(np.array([1, 2], dtype="int32"))
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
# if values have no dtype -> always int64
|
||||
result = constructor([1, 2])
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_dtype_keyword(constructor):
|
||||
result = constructor([1, 2], dtype="Int8")
|
||||
assert result.dtype == Int8Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32")
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_float():
|
||||
result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64")
|
||||
expected = pd.array([1, 2], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
|
||||
IntegerArray._from_sequence([1.5, 2.0], dtype="Int64")
|
||||
|
||||
# for float dtypes, the itemsize is not preserved
|
||||
result = IntegerArray._from_sequence(
|
||||
np.array([1.0, 2.0], dtype="float32"), dtype="Int64"
|
||||
)
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_str():
|
||||
result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64")
|
||||
expected = pd.array([1, 2, np.nan], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
|
||||
):
|
||||
IntegerArray._from_sequence(["1", "2", ""], dtype="Int64")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
|
||||
):
|
||||
IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bool_values, int_values, target_dtype, expected_dtype",
|
||||
[
|
||||
([False, True], [0, 1], Int64Dtype(), Int64Dtype()),
|
||||
([False, True], [0, 1], "Int64", Int64Dtype()),
|
||||
([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_bool(
|
||||
constructor, bool_values, int_values, target_dtype, expected_dtype
|
||||
):
|
||||
result = constructor(bool_values, dtype=target_dtype)
|
||||
assert result.dtype == expected_dtype
|
||||
expected = pd.array(int_values, dtype=target_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, to_dtype, result_dtype",
|
||||
[
|
||||
(np.array([1], dtype="int64"), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), "int8", Int8Dtype),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array(values, to_dtype, result_dtype):
|
||||
# convert existing arrays to IntegerArrays
|
||||
result = IntegerArray._from_sequence(values, dtype=to_dtype)
|
||||
assert result.dtype == result_dtype()
|
||||
expected = pd.array(values, dtype=result_dtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_integer_array_from_boolean():
|
||||
# GH31104
|
||||
expected = pd.array(np.array([True, False]), dtype="Int64")
|
||||
result = pd.array(np.array([True, False], dtype=object), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,294 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCIndex
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
UInt32Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
|
||||
def test_preserve_dtypes(op):
|
||||
# for ops that enable (mean would actually work here
|
||||
# but generally it is a float return value)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
if op in {"sum", "prod", "min", "max"}:
|
||||
assert isinstance(result, np.int64)
|
||||
else:
|
||||
assert isinstance(result, int)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_nansafe():
|
||||
# see gh-22343
|
||||
arr = pd.array([np.nan, 1, 2], dtype="Int8")
|
||||
msg = "cannot convert NA to integer"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype("uint32")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_construct_index(all_data, dropna):
|
||||
# ensure that we do not coerce to different Index dtype or non-index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Index(pd.array(other, dtype=all_data.dtype))
|
||||
expected = pd.Index(other, dtype=all_data.dtype)
|
||||
assert all_data.dtype == expected.dtype # dont coerce to object
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_astype_index(all_data, dropna):
|
||||
# as an int/uint index to Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
dtype = all_data.dtype
|
||||
idx = pd.Index(np.array(other))
|
||||
assert isinstance(idx, ABCIndex)
|
||||
|
||||
result = idx.astype(dtype)
|
||||
expected = idx.astype(object).astype(dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype(all_data):
|
||||
all_data = all_data[:10]
|
||||
|
||||
ints = all_data[~all_data.isna()]
|
||||
mixed = all_data
|
||||
dtype = Int8Dtype()
|
||||
|
||||
# coerce to same type - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(ints)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(ints, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype.numpy_dtype)
|
||||
expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same type - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(mixed)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(mixed, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - mixed
|
||||
s = pd.Series(mixed)
|
||||
msg = "cannot convert NA to integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(all_data.dtype.numpy_dtype)
|
||||
|
||||
# coerce to object
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype("object")
|
||||
expected = pd.Series(np.asarray(mixed, dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_copy():
|
||||
arr = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
orig = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
|
||||
# copy=True -> ensure both data and mask are actual copies
|
||||
result = arr.astype("Int64", copy=True)
|
||||
assert result is not arr
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
# copy=False
|
||||
result = arr.astype("Int64", copy=False)
|
||||
assert result is arr
|
||||
assert np.shares_memory(result._data, arr._data)
|
||||
assert np.shares_memory(result._mask, arr._mask)
|
||||
result[0] = 10
|
||||
assert arr[0] == 10
|
||||
result[0] = pd.NA
|
||||
assert arr[0] is pd.NA
|
||||
|
||||
# astype to different dtype -> always needs a copy -> even with copy=False
|
||||
# we need to ensure that also the mask is actually copied
|
||||
arr = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
orig = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
|
||||
result = arr.astype("Int32", copy=False)
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
|
||||
def test_astype_to_larger_numpy():
|
||||
a = pd.array([1, 2], dtype="Int32")
|
||||
result = a.astype("int64")
|
||||
expected = np.array([1, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
a = pd.array([1, 2], dtype="UInt32")
|
||||
result = a.astype("uint64")
|
||||
expected = np.array([1, 2], dtype="uint64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
|
||||
def test_astype_specific_casting(dtype):
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([1, 2, 3, None], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_floating():
|
||||
arr = pd.array([1, 2, None], dtype="Int64")
|
||||
result = arr.astype("Float64")
|
||||
expected = pd.array([1.0, 2.0, None], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_dt64():
|
||||
# GH#32435
|
||||
arr = pd.array([1, 2, 3, pd.NA]) * 10**9
|
||||
|
||||
result = arr.astype("datetime64[ns]")
|
||||
|
||||
expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_construct_cast_invalid(dtype):
|
||||
msg = "cannot safely"
|
||||
arr = [1.2, 2.3, 3.7]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
arr = [1.2, 2.3, 3.7, np.nan]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("in_series", [True, False])
|
||||
def test_to_numpy_na_nan(in_series):
|
||||
a = pd.array([0, 1, None], dtype="Int64")
|
||||
if in_series:
|
||||
a = pd.Series(a)
|
||||
|
||||
result = a.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([0.0, 1.0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = a.to_numpy(dtype="int64", na_value=-1)
|
||||
expected = np.array([0, 1, -1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = a.to_numpy(dtype="bool", na_value=False)
|
||||
expected = np.array([False, True, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("in_series", [True, False])
|
||||
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
|
||||
def test_to_numpy_dtype(dtype, in_series):
|
||||
a = pd.array([0, 1], dtype="Int64")
|
||||
if in_series:
|
||||
a = pd.Series(a)
|
||||
|
||||
result = a.to_numpy(dtype=dtype)
|
||||
expected = np.array([0, 1], dtype=dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "bool"])
|
||||
def test_to_numpy_na_raises(dtype):
|
||||
a = pd.array([0, 1, None], dtype="Int64")
|
||||
with pytest.raises(ValueError, match=dtype):
|
||||
a.to_numpy(dtype=dtype)
|
||||
|
||||
|
||||
def test_astype_str():
|
||||
a = pd.array([1, 2, None], dtype="Int64")
|
||||
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
|
||||
|
||||
tm.assert_numpy_array_equal(a.astype(str), expected)
|
||||
tm.assert_numpy_array_equal(a.astype("str"), expected)
|
||||
|
||||
|
||||
def test_astype_boolean():
|
||||
# https://github.com/pandas-dev/pandas/issues/31102
|
||||
a = pd.array([1, 0, -1, 2, None], dtype="Int64")
|
||||
result = a.astype("boolean")
|
||||
expected = pd.array([True, False, True, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,203 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
|
||||
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
|
||||
def test_ufuncs_single_int(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
|
||||
def test_ufuncs_single_float(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(a)
|
||||
expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
|
||||
def test_ufuncs_binary_int(ufunc):
|
||||
# two IntegerArrays
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with numpy array
|
||||
arr = np.array([1, 2, 3, 4])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with scalar
|
||||
result = ufunc(a, 1)
|
||||
expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(1, a)
|
||||
expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_binary_output():
|
||||
a = pd.array([1, 2, np.nan])
|
||||
result = np.modf(a)
|
||||
expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
|
||||
expected = (pd.array(expected[0]), pd.array(expected[1]))
|
||||
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
|
||||
for x, y in zip(result, expected):
|
||||
tm.assert_extension_array_equal(x, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values)
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
expected = arr.sum(skipna=False)
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pandasmethname, kwargs",
|
||||
[
|
||||
("var", {"ddof": 0}),
|
||||
("var", {"ddof": 1}),
|
||||
("std", {"ddof": 0}),
|
||||
("std", {"ddof": 1}),
|
||||
("kurtosis", {}),
|
||||
("skew", {}),
|
||||
("sem", {}),
|
||||
],
|
||||
)
|
||||
def test_stat_method(pandasmethname, kwargs):
|
||||
s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64")
|
||||
pandasmeth = getattr(s, pandasmethname)
|
||||
result = pandasmeth(**kwargs)
|
||||
s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64")
|
||||
pandasmeth = getattr(s2, pandasmethname)
|
||||
expected = pandasmeth(**kwargs)
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([1, 2, 1, pd.NA], dtype="Int64")
|
||||
result = arr.value_counts(dropna=False)
|
||||
ex_index = pd.Index([1, 2, pd.NA], dtype="Int64")
|
||||
assert ex_index.dtype == "Int64"
|
||||
expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([2, 1], index=arr[:2], dtype="Int64", name="count")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_empty():
|
||||
# https://github.com/pandas-dev/pandas/issues/33317
|
||||
ser = pd.Series([], dtype="Int64")
|
||||
result = ser.value_counts()
|
||||
idx = pd.Index([], dtype=ser.dtype)
|
||||
assert idx.dtype == ser.dtype
|
||||
expected = pd.Series([], index=idx, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
# GH 33172
|
||||
ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
|
||||
assert expected.index.dtype == ser.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 4])
|
||||
def test_integer_array_sum(skipna, min_count, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([1, 2, 3, None], dtype=dtype)
|
||||
result = arr.sum(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 6
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_integer_array_min_max(skipna, method, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([0, 1, None], dtype=dtype)
|
||||
func = getattr(arr, method)
|
||||
result = func(skipna=skipna)
|
||||
if skipna:
|
||||
assert result == (0 if method == "min" else 1)
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 9])
|
||||
def test_integer_array_prod(skipna, min_count, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([1, 2, None], dtype=dtype)
|
||||
result = arr.prod(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 2
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)]
|
||||
)
|
||||
def test_integer_array_numpy_sum(values, expected):
|
||||
arr = pd.array(values, dtype="Int64")
|
||||
result = np.sum(arr)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"])
|
||||
def test_dataframe_reductions(op):
|
||||
# https://github.com/pandas-dev/pandas/pull/32867
|
||||
# ensure the integers are not cast to float during reductions
|
||||
df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")})
|
||||
result = df.max()
|
||||
assert isinstance(result["a"], np.int64)
|
||||
|
||||
|
||||
# TODO(jreback) - these need testing / are broken
|
||||
|
||||
# shift
|
||||
|
||||
# set_index (destroys type)
|
@ -0,0 +1,19 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_array_setitem_nullable_boolean_mask():
|
||||
# GH 31446
|
||||
ser = pd.Series([1, 2], dtype="Int64")
|
||||
result = ser.where(ser > 1)
|
||||
expected = pd.Series([pd.NA, 2], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_setitem():
|
||||
# GH 31446
|
||||
arr = pd.Series([1, 2], dtype="Int64").array
|
||||
arr[arr > 1] = 1
|
||||
|
||||
expected = pd.array([1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(arr, expected)
|
@ -0,0 +1,125 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", np.int64(3)],
|
||||
["prod", np.int64(2)],
|
||||
["min", np.int64(1)],
|
||||
["max", np.int64(2)],
|
||||
["mean", np.float64(1.5)],
|
||||
["median", np.float64(1.5)],
|
||||
["var", np.float64(0.5)],
|
||||
["std", np.float64(0.5**0.5)],
|
||||
["skew", pd.NA],
|
||||
["kurt", pd.NA],
|
||||
["any", True],
|
||||
["all", True],
|
||||
],
|
||||
)
|
||||
def test_series_reductions(op, expected):
|
||||
ser = Series([1, 2], dtype="Int64")
|
||||
result = getattr(ser, op)()
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", Series([3], index=["a"], dtype="Int64")],
|
||||
["prod", Series([2], index=["a"], dtype="Int64")],
|
||||
["min", Series([1], index=["a"], dtype="Int64")],
|
||||
["max", Series([2], index=["a"], dtype="Int64")],
|
||||
["mean", Series([1.5], index=["a"], dtype="Float64")],
|
||||
["median", Series([1.5], index=["a"], dtype="Float64")],
|
||||
["var", Series([0.5], index=["a"], dtype="Float64")],
|
||||
["std", Series([0.5**0.5], index=["a"], dtype="Float64")],
|
||||
["skew", Series([pd.NA], index=["a"], dtype="Float64")],
|
||||
["kurt", Series([pd.NA], index=["a"], dtype="Float64")],
|
||||
["any", Series([True], index=["a"], dtype="boolean")],
|
||||
["all", Series([True], index=["a"], dtype="boolean")],
|
||||
],
|
||||
)
|
||||
def test_dataframe_reductions(op, expected):
|
||||
df = DataFrame({"a": array([1, 2], dtype="Int64")})
|
||||
result = getattr(df, op)()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", array([1, 3], dtype="Int64")],
|
||||
["prod", array([1, 3], dtype="Int64")],
|
||||
["min", array([1, 3], dtype="Int64")],
|
||||
["max", array([1, 3], dtype="Int64")],
|
||||
["mean", array([1, 3], dtype="Float64")],
|
||||
["median", array([1, 3], dtype="Float64")],
|
||||
["var", array([pd.NA], dtype="Float64")],
|
||||
["std", array([pd.NA], dtype="Float64")],
|
||||
["skew", array([pd.NA], dtype="Float64")],
|
||||
["any", array([True, True], dtype="boolean")],
|
||||
["all", array([True, True], dtype="boolean")],
|
||||
],
|
||||
)
|
||||
def test_groupby_reductions(op, expected):
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
expected = DataFrame(expected, index=pd.Index(["a", "b"], name="A"), columns=["B"])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", Series([4, 4], index=["B", "C"], dtype="Float64")],
|
||||
["prod", Series([3, 3], index=["B", "C"], dtype="Float64")],
|
||||
["min", Series([1, 1], index=["B", "C"], dtype="Float64")],
|
||||
["max", Series([3, 3], index=["B", "C"], dtype="Float64")],
|
||||
["mean", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
||||
["median", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
||||
["var", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
||||
["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")],
|
||||
["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
|
||||
["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
|
||||
["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
|
||||
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
|
||||
],
|
||||
)
|
||||
def test_mixed_reductions(op, expected, using_infer_string):
|
||||
if op in ["any", "all"] and using_infer_string:
|
||||
expected = expected.astype("bool")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# series
|
||||
result = getattr(df.C, op)()
|
||||
tm.assert_equal(result, expected["C"])
|
||||
|
||||
# frame
|
||||
if op in ["any", "all"]:
|
||||
result = getattr(df, op)()
|
||||
else:
|
||||
result = getattr(df, op)(numeric_only=True)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,67 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(Int8Dtype(), "Int8Dtype()"),
|
||||
(Int16Dtype(), "Int16Dtype()"),
|
||||
(Int32Dtype(), "Int32Dtype()"),
|
||||
(Int64Dtype(), "Int64Dtype()"),
|
||||
(UInt8Dtype(), "UInt8Dtype()"),
|
||||
(UInt16Dtype(), "UInt16Dtype()"),
|
||||
(UInt32Dtype(), "UInt32Dtype()"),
|
||||
(UInt64Dtype(), "UInt64Dtype()"),
|
||||
],
|
||||
)
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(pd.array([1, None, 3]))
|
||||
expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = pd.array([1, 2, None] * 1000)
|
||||
expected = (
|
||||
"<IntegerArray>\n"
|
||||
"[ 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>, 1,\n"
|
||||
" ...\n"
|
||||
" <NA>, 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>]\n"
|
||||
"Length: 3000, dtype: Int64"
|
||||
)
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = repr(df)
|
||||
expected = " A\n0 <NA>\n1 1"
|
||||
assert result == expected
|
Reference in New Issue
Block a user