forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,253 @@
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestSeriesAccessor:
|
||||
def test_to_dense(self):
|
||||
ser = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]")
|
||||
result = ser.sparse.to_dense()
|
||||
expected = pd.Series([0, 1, 0, 10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"])
|
||||
def test_get_attributes(self, attr):
|
||||
arr = SparseArray([0, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
result = getattr(ser.sparse, attr)
|
||||
expected = getattr(arr, attr)
|
||||
assert result == expected
|
||||
|
||||
def test_from_coo(self):
|
||||
scipy_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
row = [0, 3, 1, 0]
|
||||
col = [0, 3, 1, 2]
|
||||
data = [4, 5, 7, 9]
|
||||
|
||||
sp_array = scipy_sparse.coo_matrix((data, (row, col)))
|
||||
result = pd.Series.sparse.from_coo(sp_array)
|
||||
|
||||
index = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
np.array([0, 0, 1, 3], dtype=np.int32),
|
||||
np.array([0, 2, 1, 3], dtype=np.int32),
|
||||
],
|
||||
)
|
||||
expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort_labels, expected_rows, expected_cols, expected_values_pos",
|
||||
[
|
||||
(
|
||||
False,
|
||||
[("b", 2), ("a", 2), ("b", 1), ("a", 1)],
|
||||
[("z", 1), ("z", 2), ("x", 2), ("z", 0)],
|
||||
{1: (1, 0), 3: (3, 3)},
|
||||
),
|
||||
(
|
||||
True,
|
||||
[("a", 1), ("a", 2), ("b", 1), ("b", 2)],
|
||||
[("x", 2), ("z", 0), ("z", 1), ("z", 2)],
|
||||
{1: (1, 2), 3: (0, 1)},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_coo(
|
||||
self, sort_labels, expected_rows, expected_cols, expected_values_pos
|
||||
):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
values = SparseArray([0, np.nan, 1, 0, None, 3], fill_value=0)
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("b", 2, "z", 1),
|
||||
("a", 2, "z", 2),
|
||||
("a", 2, "z", 1),
|
||||
("a", 2, "x", 2),
|
||||
("b", 1, "z", 1),
|
||||
("a", 1, "z", 0),
|
||||
]
|
||||
)
|
||||
ss = pd.Series(values, index=index)
|
||||
|
||||
expected_A = np.zeros((4, 4))
|
||||
for value, (row, col) in expected_values_pos.items():
|
||||
expected_A[row, col] = value
|
||||
|
||||
A, rows, cols = ss.sparse.to_coo(
|
||||
row_levels=(0, 1), column_levels=(2, 3), sort_labels=sort_labels
|
||||
)
|
||||
assert isinstance(A, sp_sparse.coo_matrix)
|
||||
tm.assert_numpy_array_equal(A.toarray(), expected_A)
|
||||
assert rows == expected_rows
|
||||
assert cols == expected_cols
|
||||
|
||||
def test_non_sparse_raises(self):
|
||||
ser = pd.Series([1, 2, 3])
|
||||
with pytest.raises(AttributeError, match=".sparse"):
|
||||
ser.sparse.density
|
||||
|
||||
|
||||
class TestFrameAccessor:
|
||||
def test_accessor_raises(self):
|
||||
df = pd.DataFrame({"A": [0, 1]})
|
||||
with pytest.raises(AttributeError, match="sparse"):
|
||||
df.sparse
|
||||
|
||||
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
|
||||
@pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int64"])
|
||||
def test_from_spmatrix(self, format, labels, dtype):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
|
||||
|
||||
mat = sp_sparse.eye(10, format=format, dtype=dtype)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
|
||||
expected = pd.DataFrame(
|
||||
np.eye(10, dtype=dtype), index=labels, columns=labels
|
||||
).astype(sp_dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
|
||||
def test_from_spmatrix_including_explicit_zero(self, format):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.random(10, 2, density=0.5, format=format)
|
||||
mat.data[0] = 0
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat)
|
||||
dtype = SparseDtype("float64", 0.0)
|
||||
expected = pd.DataFrame(mat.todense()).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],
|
||||
)
|
||||
def test_from_spmatrix_columns(self, columns):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
dtype = SparseDtype("float64", 0.0)
|
||||
|
||||
mat = sp_sparse.random(10, 2, density=0.5)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
|
||||
expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
|
||||
)
|
||||
def test_to_coo(self, colnames):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
df = pd.DataFrame(
|
||||
{colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]"
|
||||
)
|
||||
result = df.sparse.to_coo()
|
||||
expected = sp_sparse.coo_matrix(np.asarray(df))
|
||||
assert (result != expected).nnz == 0
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [1, np.nan])
|
||||
def test_to_coo_nonzero_fill_val_raises(self, fill_value):
|
||||
pytest.importorskip("scipy")
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": SparseArray(
|
||||
[fill_value, fill_value, fill_value, 2], fill_value=fill_value
|
||||
),
|
||||
"B": SparseArray(
|
||||
[fill_value, 2, fill_value, fill_value], fill_value=fill_value
|
||||
),
|
||||
}
|
||||
)
|
||||
with pytest.raises(ValueError, match="fill value must be 0"):
|
||||
df.sparse.to_coo()
|
||||
|
||||
def test_to_coo_midx_categorical(self):
|
||||
# GH#50996
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
midx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.CategoricalIndex(list("ab"), name="x"),
|
||||
pd.CategoricalIndex([0, 1], name="y"),
|
||||
]
|
||||
)
|
||||
|
||||
ser = pd.Series(1, index=midx, dtype="Sparse[int]")
|
||||
result = ser.sparse.to_coo(row_levels=["x"], column_levels=["y"])[0]
|
||||
expected = sp_sparse.coo_matrix(
|
||||
(np.array([1, 1]), (np.array([0, 1]), np.array([0, 1]))), shape=(2, 2)
|
||||
)
|
||||
assert (result != expected).nnz == 0
|
||||
|
||||
def test_to_dense(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)),
|
||||
"B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)),
|
||||
"C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)),
|
||||
},
|
||||
index=["b", "a"],
|
||||
)
|
||||
result = df.sparse.to_dense()
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_density(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": SparseArray([1, 0, 2, 1], fill_value=0),
|
||||
"B": SparseArray([0, 1, 1, 1], fill_value=0),
|
||||
}
|
||||
)
|
||||
res = df.sparse.density
|
||||
expected = 0.75
|
||||
assert res == expected
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "float64"])
|
||||
@pytest.mark.parametrize("dense_index", [True, False])
|
||||
def test_series_from_coo(self, dtype, dense_index):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
A = sp_sparse.eye(3, format="coo", dtype=dtype)
|
||||
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
|
||||
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
np.array([0, 0], dtype=np.int32),
|
||||
np.array([1, 1], dtype=np.int32),
|
||||
np.array([2, 2], dtype=np.int32),
|
||||
],
|
||||
)
|
||||
expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
|
||||
if dense_index:
|
||||
expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_from_coo_incorrect_format_raises(self):
|
||||
# gh-26554
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
m = sp_sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
|
||||
with pytest.raises(
|
||||
TypeError, match="Expected coo_matrix. Got csr_matrix instead."
|
||||
):
|
||||
pd.Series.sparse.from_coo(m)
|
||||
|
||||
def test_with_column_named_sparse(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/30758
|
||||
df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])})
|
||||
assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor)
|
@ -0,0 +1,514 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
@pytest.fixture(params=["integer", "block"])
|
||||
def kind(request):
|
||||
"""kind kwarg to pass to SparseArray"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def mix(request):
|
||||
"""
|
||||
Fixture returning True or False, determining whether to operate
|
||||
op(sparse, dense) instead of op(sparse, sparse)
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSparseArrayArithmetics:
|
||||
def _assert(self, a, b):
|
||||
# We have to use tm.assert_sp_array_equal. See GH #45126
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op):
|
||||
# Check that arithmetic behavior matches non-Sparse Series arithmetic
|
||||
|
||||
if isinstance(a_dense, np.ndarray):
|
||||
expected = op(pd.Series(a_dense), b_dense).values
|
||||
elif isinstance(b_dense, np.ndarray):
|
||||
expected = op(a_dense, pd.Series(b_dense)).values
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
with np.errstate(invalid="ignore", divide="ignore"):
|
||||
if mix:
|
||||
result = op(a, b_dense).to_dense()
|
||||
else:
|
||||
result = op(a, b).to_dense()
|
||||
|
||||
self._assert(result, expected)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, SparseArray)
|
||||
assert isinstance(res.dtype, SparseDtype)
|
||||
assert res.dtype.subtype == np.bool_
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid="ignore"):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [0, 1, 3])
|
||||
@pytest.mark.parametrize("fill_value", [None, 0, 2])
|
||||
def test_float_scalar(
|
||||
self, kind, mix, all_arithmetic_functions, fill_value, scalar, request
|
||||
):
|
||||
op = all_arithmetic_functions
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
a = SparseArray(values, kind=kind, fill_value=fill_value)
|
||||
self._check_numeric_ops(a, scalar, values, scalar, mix, op)
|
||||
|
||||
def test_float_scalar_comparison(self, kind):
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions):
|
||||
# when sp_index are the same
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_same_index_with_nans(
|
||||
self, kind, mix, all_arithmetic_functions, request
|
||||
):
|
||||
# when sp_index are the same
|
||||
op = all_arithmetic_functions
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_same_index_comparison(self, kind):
|
||||
# when sp_index are the same
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_different_kind(self, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind="integer")
|
||||
b = SparseArray(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, kind="integer", fill_value=0)
|
||||
b = SparseArray(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind="integer", fill_value=0)
|
||||
b = SparseArray(rvalues, kind="block", fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind="integer", fill_value=1)
|
||||
b = SparseArray(rvalues, kind="block", fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_comparison(self, kind):
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = SparseArray(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype, fill_value=1)
|
||||
b = SparseArray(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_int_array_comparison(self, kind):
|
||||
dtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_same_index(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = np.array([True, False, True, True], dtype=np.bool_)
|
||||
rvalues = np.array([True, False, True, True], dtype=np.bool_)
|
||||
|
||||
a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_array_logical(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = np.array([True, False, True, False, True, True], dtype=np.bool_)
|
||||
rvalues = np.array([True, False, False, True, False, True], dtype=np.bool_)
|
||||
|
||||
a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request):
|
||||
op = all_arithmetic_functions
|
||||
rdtype = "int64"
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_mixed_array_comparison(self, kind):
|
||||
rdtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_xor(self):
|
||||
s = SparseArray([True, True, False, False])
|
||||
t = SparseArray([True, False, True, False])
|
||||
result = s ^ t
|
||||
sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32"))
|
||||
expected = SparseArray([False, True, True], sparse_index=sp_index)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.eq, operator.add])
|
||||
def test_with_list(op):
|
||||
arr = SparseArray([0, 1], fill_value=0)
|
||||
result = op(arr, [0, 1])
|
||||
expected = op(arr, SparseArray([0, 1]))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_with_dataframe():
|
||||
# GH#27910
|
||||
arr = SparseArray([0, 1], fill_value=0)
|
||||
df = pd.DataFrame([[1, 2], [3, 4]])
|
||||
result = arr.__add__(df)
|
||||
assert result is NotImplemented
|
||||
|
||||
|
||||
def test_with_zerodim_ndarray():
|
||||
# GH#27910
|
||||
arr = SparseArray([0, 1], fill_value=0)
|
||||
|
||||
result = arr * np.array(2)
|
||||
expected = arr * 2
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.exp])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])]
|
||||
)
|
||||
def test_ufuncs(ufunc, arr):
|
||||
result = ufunc(arr)
|
||||
fill_value = ufunc(arr.fill_value)
|
||||
expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
(SparseArray([0, 0, 0]), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.greater])
|
||||
def test_binary_ufuncs(ufunc, a, b):
|
||||
# can't say anything about fill value here.
|
||||
result = ufunc(a, b)
|
||||
expected = ufunc(np.asarray(a), np.asarray(b))
|
||||
assert isinstance(result, SparseArray)
|
||||
tm.assert_numpy_array_equal(np.asarray(result), expected)
|
||||
|
||||
|
||||
def test_ndarray_inplace():
|
||||
sparray = SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
ndarray += sparray
|
||||
expected = np.array([0, 3, 2, 3])
|
||||
tm.assert_numpy_array_equal(ndarray, expected)
|
||||
|
||||
|
||||
def test_sparray_inplace():
|
||||
sparray = SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
sparray += ndarray
|
||||
expected = SparseArray([0, 3, 2, 3], fill_value=0)
|
||||
tm.assert_sp_array_equal(sparray, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cons", [list, np.array, SparseArray])
|
||||
def test_mismatched_length_cmp_op(cons):
|
||||
left = SparseArray([True, True])
|
||||
right = cons([True, True, True])
|
||||
with pytest.raises(ValueError, match="operands have mismatched length"):
|
||||
left & right
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"])
|
||||
@pytest.mark.parametrize("fill_value", [np.nan, 3])
|
||||
def test_binary_operators(op, fill_value):
|
||||
op = getattr(operator, op)
|
||||
data1 = np.random.default_rng(2).standard_normal(20)
|
||||
data2 = np.random.default_rng(2).standard_normal(20)
|
||||
|
||||
data1[::2] = fill_value
|
||||
data2[::3] = fill_value
|
||||
|
||||
first = SparseArray(data1, fill_value=fill_value)
|
||||
second = SparseArray(data2, fill_value=fill_value)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
res = op(first, second)
|
||||
exp = SparseArray(
|
||||
op(first.to_dense(), second.to_dense()), fill_value=first.fill_value
|
||||
)
|
||||
assert isinstance(res, SparseArray)
|
||||
tm.assert_almost_equal(res.to_dense(), exp.to_dense())
|
||||
|
||||
res2 = op(first, second.to_dense())
|
||||
assert isinstance(res2, SparseArray)
|
||||
tm.assert_sp_array_equal(res, res2)
|
||||
|
||||
res3 = op(first.to_dense(), second)
|
||||
assert isinstance(res3, SparseArray)
|
||||
tm.assert_sp_array_equal(res, res3)
|
||||
|
||||
res4 = op(first, 4)
|
||||
assert isinstance(res4, SparseArray)
|
||||
|
||||
# Ignore this if the actual op raises (e.g. pow).
|
||||
try:
|
||||
exp = op(first.to_dense(), 4)
|
||||
exp_fv = op(first.fill_value, 4)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
tm.assert_almost_equal(res4.fill_value, exp_fv)
|
||||
tm.assert_almost_equal(res4.to_dense(), exp)
|
@ -0,0 +1,480 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.sparse import IntIndex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
SparseDtype,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr_data():
|
||||
"""Fixture returning numpy array with valid and missing entries"""
|
||||
return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr(arr_data):
|
||||
"""Fixture returning SparseArray from 'arr_data'"""
|
||||
return SparseArray(arr_data)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def zarr():
|
||||
"""Fixture returning SparseArray with integer entries and 'fill_value=0'"""
|
||||
return SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
|
||||
|
||||
class TestSparseArray:
|
||||
@pytest.mark.parametrize("fill_value", [0, None, np.nan])
|
||||
def test_shift_fill_value(self, fill_value):
|
||||
# GH #24128
|
||||
sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0)
|
||||
res = sparse.shift(1, fill_value=fill_value)
|
||||
if isna(fill_value):
|
||||
fill_value = res.dtype.na_value
|
||||
exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_set_fill_value(self):
|
||||
arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan)
|
||||
arr.fill_value = 2
|
||||
assert arr.fill_value == 2
|
||||
|
||||
arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
|
||||
arr.fill_value = 2
|
||||
assert arr.fill_value == 2
|
||||
|
||||
msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr.fill_value = 3.1
|
||||
assert arr.fill_value == 3.1
|
||||
|
||||
arr.fill_value = np.nan
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_)
|
||||
arr.fill_value = True
|
||||
assert arr.fill_value is True
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr.fill_value = 0
|
||||
|
||||
arr.fill_value = np.nan
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
@pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)])
|
||||
def test_set_fill_invalid_non_scalar(self, val):
|
||||
arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_)
|
||||
msg = "fill_value must be a scalar"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.fill_value = val
|
||||
|
||||
def test_copy(self, arr):
|
||||
arr2 = arr.copy()
|
||||
assert arr2.sp_values is not arr.sp_values
|
||||
assert arr2.sp_index is arr.sp_index
|
||||
|
||||
def test_values_asarray(self, arr_data, arr):
|
||||
tm.assert_almost_equal(arr.to_dense(), arr_data)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,shape,dtype",
|
||||
[
|
||||
([0, 0, 0, 0, 0], (5,), None),
|
||||
([], (0,), None),
|
||||
([0], (1,), None),
|
||||
(["A", "A", np.nan, "B"], (4,), object),
|
||||
],
|
||||
)
|
||||
def test_shape(self, data, shape, dtype):
|
||||
# GH 21126
|
||||
out = SparseArray(data, dtype=dtype)
|
||||
assert out.shape == shape
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals",
|
||||
[
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
[1, np.nan, np.nan, 3, np.nan],
|
||||
[1, np.nan, 0, 3, 0],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill_value", [None, 0])
|
||||
def test_dense_repr(self, vals, fill_value):
|
||||
vals = np.array(vals)
|
||||
arr = SparseArray(vals, fill_value=fill_value)
|
||||
|
||||
res = arr.to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
@pytest.mark.parametrize("fix", ["arr", "zarr"])
|
||||
def test_pickle(self, fix, request):
|
||||
obj = request.getfixturevalue(fix)
|
||||
unpickled = tm.round_trip_pickle(obj)
|
||||
tm.assert_sp_array_equal(unpickled, obj)
|
||||
|
||||
def test_generator_warnings(self):
|
||||
sp_arr = SparseArray([1, 2, 3])
|
||||
with tm.assert_produces_warning(None):
|
||||
for _ in sp_arr:
|
||||
pass
|
||||
|
||||
def test_where_retain_fill_value(self):
|
||||
# GH#45691 don't lose fill_value on _where
|
||||
arr = SparseArray([np.nan, 1.0], fill_value=0)
|
||||
|
||||
mask = np.array([True, False])
|
||||
|
||||
res = arr._where(~mask, 1)
|
||||
exp = SparseArray([1, 1.0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
ser = pd.Series(arr)
|
||||
res = ser.where(~mask, 1)
|
||||
tm.assert_series_equal(res, pd.Series(exp))
|
||||
|
||||
def test_fillna(self):
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, 0, 3, 0])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([np.nan, np.nan, np.nan, np.nan])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# float dtype's fill_value is np.nan, replaced by -1
|
||||
s = SparseArray([0.0, 0.0, 0.0, 0.0])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# int dtype shouldn't have missing. No changes.
|
||||
s = SparseArray([0, 0, 0, 0])
|
||||
assert s.dtype == SparseDtype(np.int64)
|
||||
assert s.fill_value == 0
|
||||
res = s.fillna(-1)
|
||||
tm.assert_sp_array_equal(res, s)
|
||||
|
||||
s = SparseArray([0, 0, 0, 0], fill_value=0)
|
||||
assert s.dtype == SparseDtype(np.int64)
|
||||
assert s.fill_value == 0
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0, 0, 0, 0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# fill_value can be nan if there is no missing hole.
|
||||
# only fill_value will be changed
|
||||
s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
|
||||
assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
|
||||
assert np.isnan(s.fill_value)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0, 0, 0, 0], fill_value=-1)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_fillna_overlap(self):
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
|
||||
# filling with existing value doesn't replace existing value with
|
||||
# fill_value, i.e. existing 3 remains in sp_values
|
||||
res = s.fillna(3)
|
||||
exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
|
||||
tm.assert_numpy_array_equal(res.to_dense(), exp)
|
||||
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
|
||||
res = s.fillna(3)
|
||||
exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_nonzero(self):
|
||||
# Tests regression #21172.
|
||||
sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
|
||||
expected = np.array([2, 5, 9], dtype=np.int32)
|
||||
(result,) = sa.nonzero()
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
|
||||
(result,) = sa.nonzero()
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
class TestSparseArrayAnalytics:
|
||||
@pytest.mark.parametrize(
|
||||
"data,expected",
|
||||
[
|
||||
(
|
||||
np.array([1, 2, 3, 4, 5], dtype=float), # non-null data
|
||||
SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])),
|
||||
),
|
||||
(
|
||||
np.array([1, 2, np.nan, 4, 5], dtype=float), # null data
|
||||
SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("numpy", [True, False])
|
||||
def test_cumsum(self, data, expected, numpy):
|
||||
cumsum = np.cumsum if numpy else lambda s: s.cumsum()
|
||||
|
||||
out = cumsum(SparseArray(data))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = cumsum(SparseArray(data, fill_value=np.nan))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = cumsum(SparseArray(data, fill_value=2))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
if numpy: # numpy compatibility checks.
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.cumsum(SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.cumsum(SparseArray(data), out=out)
|
||||
else:
|
||||
axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid.
|
||||
msg = re.escape(f"axis(={axis}) out of bounds")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
SparseArray(data).cumsum(axis=axis)
|
||||
|
||||
def test_ufunc(self):
|
||||
# GH 13853 make sure ufunc is applied to fill_value
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray([1, np.nan, 2, np.nan, 2])
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1)
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
|
||||
exp = SparseArray([1, 1, 2, 2], fill_value=1)
|
||||
tm.assert_sp_array_equal(abs(sparse), exp)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), exp)
|
||||
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
|
||||
result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
def test_ufunc_args(self):
|
||||
# GH 13853 make sure ufunc is applied to fill_value, including its arg
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray([2, np.nan, 3, np.nan, -1])
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray([2, 0, 3, -1], fill_value=2)
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
|
||||
result = SparseArray([2, 0, 1, -1], fill_value=1)
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0.0, np.nan])
|
||||
def test_modf(self, fill_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/26946
|
||||
sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value)
|
||||
r1, r2 = np.modf(sparse)
|
||||
e1, e2 = np.modf(np.asarray(sparse))
|
||||
tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value))
|
||||
tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value))
|
||||
|
||||
def test_nbytes_integer(self):
|
||||
arr = SparseArray([1, 0, 0, 0, 2], kind="integer")
|
||||
result = arr.nbytes
|
||||
# (2 * 8) + 2 * 4
|
||||
assert result == 24
|
||||
|
||||
def test_nbytes_block(self):
|
||||
arr = SparseArray([1, 2, 0, 0, 0], kind="block")
|
||||
result = arr.nbytes
|
||||
# (2 * 8) + 4 + 4
|
||||
# sp_values, blocs, blengths
|
||||
assert result == 24
|
||||
|
||||
def test_asarray_datetime64(self):
|
||||
s = SparseArray(pd.to_datetime(["2012", None, None, "2013"]))
|
||||
np.asarray(s)
|
||||
|
||||
def test_density(self):
|
||||
arr = SparseArray([0, 1])
|
||||
assert arr.density == 0.5
|
||||
|
||||
def test_npoints(self):
|
||||
arr = SparseArray([0, 1])
|
||||
assert arr.npoints == 1
|
||||
|
||||
|
||||
def test_setting_fill_value_fillna_still_works():
|
||||
# This is why letting users update fill_value / dtype is bad
|
||||
# astype has the same problem.
|
||||
arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0)
|
||||
arr.fill_value = np.nan
|
||||
result = arr.isna()
|
||||
# Can't do direct comparison, since the sp_index will be different
|
||||
# So let's convert to ndarray and check there.
|
||||
result = np.asarray(result)
|
||||
|
||||
expected = np.array([False, True, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_setting_fill_value_updates():
|
||||
arr = SparseArray([0.0, np.nan], fill_value=0)
|
||||
arr.fill_value = np.nan
|
||||
# use private constructor to get the index right
|
||||
# otherwise both nans would be un-stored.
|
||||
expected = SparseArray._simple_new(
|
||||
sparse_array=np.array([np.nan]),
|
||||
sparse_index=IntIndex(2, [1]),
|
||||
dtype=SparseDtype(float, np.nan),
|
||||
)
|
||||
tm.assert_sp_array_equal(arr, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr,fill_value,loc",
|
||||
[
|
||||
([None, 1, 2], None, 0),
|
||||
([0, None, 2], None, 1),
|
||||
([0, 1, None], None, 2),
|
||||
([0, 1, 1, None, None], None, 3),
|
||||
([1, 1, 1, 2], None, -1),
|
||||
([], None, -1),
|
||||
([None, 1, 0, 0, None, 2], None, 0),
|
||||
([None, 1, 0, 0, None, 2], 1, 1),
|
||||
([None, 1, 0, 0, None, 2], 2, 5),
|
||||
([None, 1, 0, 0, None, 2], 3, -1),
|
||||
([None, 0, 0, 1, 2, 1], 0, 1),
|
||||
([None, 0, 0, 1, 2, 1], 1, 3),
|
||||
],
|
||||
)
|
||||
def test_first_fill_value_loc(arr, fill_value, loc):
|
||||
result = SparseArray(arr, fill_value=fill_value)._first_fill_value_loc()
|
||||
assert result == loc
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[
|
||||
[1, 2, np.nan, np.nan],
|
||||
[1, np.nan, 2, np.nan],
|
||||
[1, 2, np.nan],
|
||||
[np.nan, 1, 0, 0, np.nan, 2],
|
||||
[np.nan, 0, 0, 1, 2, 1],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill_value", [np.nan, 0, 1])
|
||||
def test_unique_na_fill(arr, fill_value):
|
||||
a = SparseArray(arr, fill_value=fill_value).unique()
|
||||
b = pd.Series(arr).unique()
|
||||
assert isinstance(a, SparseArray)
|
||||
a = np.asarray(a)
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
|
||||
def test_unique_all_sparse():
|
||||
# https://github.com/pandas-dev/pandas/issues/23168
|
||||
arr = SparseArray([0, 0])
|
||||
result = arr.unique()
|
||||
expected = SparseArray([0])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_map():
|
||||
arr = SparseArray([0, 1, 2])
|
||||
expected = SparseArray([10, 11, 12], fill_value=10)
|
||||
|
||||
# dict
|
||||
result = arr.map({0: 10, 1: 11, 2: 12})
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = arr.map(pd.Series({0: 10, 1: 11, 2: 12}))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# function
|
||||
result = arr.map(pd.Series({0: 10, 1: 11, 2: 12}))
|
||||
expected = SparseArray([10, 11, 12], fill_value=10)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_missing():
|
||||
arr = SparseArray([0, 1, 2])
|
||||
expected = SparseArray([10, 11, None], fill_value=10)
|
||||
|
||||
result = arr.map({0: 10, 1: 11})
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [np.nan, 1])
|
||||
def test_dropna(fill_value):
|
||||
# GH-28287
|
||||
arr = SparseArray([np.nan, 1], fill_value=fill_value)
|
||||
exp = SparseArray([1.0], fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(arr.dropna(), exp)
|
||||
|
||||
df = pd.DataFrame({"a": [0, 1], "b": arr})
|
||||
expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Index([1]))
|
||||
tm.assert_equal(df.dropna(), expected_df)
|
||||
|
||||
|
||||
def test_drop_duplicates_fill_value():
|
||||
# GH 11726
|
||||
df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0))
|
||||
result = df.drop_duplicates()
|
||||
expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_zero_sparse_column():
|
||||
# GH 27781
|
||||
df1 = pd.DataFrame({"A": SparseArray([0, 0, 0]), "B": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"A": SparseArray([0, 1, 0]), "B": [1, 2, 3]})
|
||||
result = df1.loc[df1["B"] != 2]
|
||||
expected = df2.loc[df2["B"] != 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.DataFrame({"A": SparseArray([0, 0]), "B": [1, 3]}, index=[0, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,133 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.sparse import IntIndex
|
||||
|
||||
from pandas import (
|
||||
SparseDtype,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype(self):
|
||||
# float -> float
|
||||
arr = SparseArray([None, None, 0, 2])
|
||||
result = arr.astype("Sparse[float32]")
|
||||
expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32"))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
dtype = SparseDtype("float64", fill_value=0)
|
||||
result = arr.astype(dtype)
|
||||
expected = SparseArray._simple_new(
|
||||
np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype
|
||||
)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
dtype = SparseDtype("int64", 0)
|
||||
result = arr.astype(dtype)
|
||||
expected = SparseArray._simple_new(
|
||||
np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype
|
||||
)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
|
||||
with pytest.raises(ValueError, match="NA"):
|
||||
arr.astype("Sparse[i8]")
|
||||
|
||||
def test_astype_bool(self):
|
||||
a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
|
||||
result = a.astype(bool)
|
||||
expected = np.array([1, 0, 0, 1], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# update fill value
|
||||
result = a.astype(SparseDtype(bool, False))
|
||||
expected = SparseArray(
|
||||
[True, False, False, True], dtype=SparseDtype(bool, False)
|
||||
)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_astype_all(self, any_real_numpy_dtype):
|
||||
vals = np.array([1, 2, 3])
|
||||
arr = SparseArray(vals, fill_value=1)
|
||||
typ = np.dtype(any_real_numpy_dtype)
|
||||
res = arr.astype(typ)
|
||||
tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr, dtype, expected",
|
||||
[
|
||||
(
|
||||
SparseArray([0, 1]),
|
||||
"float",
|
||||
SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)),
|
||||
),
|
||||
(SparseArray([0, 1]), bool, SparseArray([False, True])),
|
||||
(
|
||||
SparseArray([0, 1], fill_value=1),
|
||||
bool,
|
||||
SparseArray([False, True], dtype=SparseDtype(bool, True)),
|
||||
),
|
||||
pytest.param(
|
||||
SparseArray([0, 1]),
|
||||
"datetime64[ns]",
|
||||
SparseArray(
|
||||
np.array([0, 1], dtype="datetime64[ns]"),
|
||||
dtype=SparseDtype("datetime64[ns]", Timestamp("1970")),
|
||||
),
|
||||
),
|
||||
(
|
||||
SparseArray([0, 1, 10]),
|
||||
str,
|
||||
SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
|
||||
),
|
||||
(SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
|
||||
(
|
||||
SparseArray([0, 1, 0]),
|
||||
object,
|
||||
SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_astype_more(self, arr, dtype, expected):
|
||||
result = arr.astype(arr.dtype.update_dtype(dtype))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_astype_nan_raises(self):
|
||||
arr = SparseArray([1.0, np.nan])
|
||||
with pytest.raises(ValueError, match="Cannot convert non-finite"):
|
||||
arr.astype(int)
|
||||
|
||||
def test_astype_copy_false(self):
|
||||
# GH#34456 bug caused by using .view instead of .astype in astype_nansafe
|
||||
arr = SparseArray([1, 2, 3])
|
||||
|
||||
dtype = SparseDtype(float, 0)
|
||||
|
||||
result = arr.astype(dtype, copy=False)
|
||||
expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_astype_dt64_to_int64(self):
|
||||
# GH#49631 match non-sparse behavior
|
||||
values = np.array(["NaT", "2016-01-02", "2016-01-03"], dtype="M8[ns]")
|
||||
|
||||
arr = SparseArray(values)
|
||||
result = arr.astype("int64")
|
||||
expected = values.astype("int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# we should also be able to cast to equivalent Sparse[int64]
|
||||
dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min)
|
||||
result2 = arr.astype(dtype_int64)
|
||||
tm.assert_numpy_array_equal(result2.to_numpy(), expected)
|
||||
|
||||
# GH#50087 we should match the non-sparse behavior regardless of
|
||||
# if we have a fill_value other than NaT
|
||||
dtype = SparseDtype("datetime64[ns]", values[1])
|
||||
arr3 = SparseArray(values, dtype=dtype)
|
||||
result3 = arr3.astype("int64")
|
||||
tm.assert_numpy_array_equal(result3, expected)
|
@ -0,0 +1,62 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestSparseArrayConcat:
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_basic(self, kind):
|
||||
a = SparseArray([1, 0, 0, 2], kind=kind)
|
||||
b = SparseArray([1, 0, 2, 2], kind=kind)
|
||||
|
||||
result = SparseArray._concat_same_type([a, b])
|
||||
# Can't make any assertions about the sparse index itself
|
||||
# since we aren't don't merge sparse blocs across arrays
|
||||
# in to_concat
|
||||
expected = np.array([1, 2, 1, 2, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.sp_values, expected)
|
||||
assert result.kind == kind
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_uses_first_kind(self, kind):
|
||||
other = "integer" if kind == "block" else "block"
|
||||
a = SparseArray([1, 0, 0, 2], kind=kind)
|
||||
b = SparseArray([1, 0, 2, 2], kind=other)
|
||||
|
||||
result = SparseArray._concat_same_type([a, b])
|
||||
expected = np.array([1, 2, 1, 2, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.sp_values, expected)
|
||||
assert result.kind == kind
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected_dtype",
|
||||
[
|
||||
# compatible dtype -> preserve sparse
|
||||
(pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)),
|
||||
# (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)),
|
||||
# incompatible dtype -> Sparse[common dtype]
|
||||
(pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)),
|
||||
# incompatible dtype -> Sparse[object] dtype
|
||||
(pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)),
|
||||
# categorical with compatible categories -> dtype of the categories
|
||||
(pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")),
|
||||
(pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")),
|
||||
# categorical with incompatible categories -> object dtype
|
||||
(pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)),
|
||||
],
|
||||
)
|
||||
def test_concat_with_non_sparse(other, expected_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/34336
|
||||
s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
result = pd.concat([s_sparse, other], ignore_index=True)
|
||||
expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.concat([other, s_sparse], ignore_index=True)
|
||||
expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,285 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.sparse import IntIndex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
SparseDtype,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestConstructors:
|
||||
def test_constructor_dtype(self):
|
||||
arr = SparseArray([np.nan, 1, 2, np.nan])
|
||||
assert arr.dtype == SparseDtype(np.float64, np.nan)
|
||||
assert arr.dtype.subtype == np.float64
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
|
||||
assert arr.dtype == SparseDtype(np.float64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
|
||||
assert arr.dtype == SparseDtype(np.float64, np.nan)
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=None)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
def test_constructor_dtype_str(self):
|
||||
result = SparseArray([1, 2, 3], dtype="int")
|
||||
expected = SparseArray([1, 2, 3], dtype=int)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_constructor_sparse_dtype(self):
|
||||
result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
|
||||
expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
assert result.sp_values.dtype == np.dtype("int64")
|
||||
|
||||
def test_constructor_sparse_dtype_str(self):
|
||||
result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
|
||||
expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
assert result.sp_values.dtype == np.dtype("int32")
|
||||
|
||||
def test_constructor_object_dtype(self):
|
||||
# GH#11856
|
||||
arr = SparseArray(["A", "A", np.nan, "B"], dtype=object)
|
||||
assert arr.dtype == SparseDtype(object)
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A")
|
||||
assert arr.dtype == SparseDtype(object, "A")
|
||||
assert arr.fill_value == "A"
|
||||
|
||||
def test_constructor_object_dtype_bool_fill(self):
|
||||
# GH#17574
|
||||
data = [False, 0, 100.0, 0.0]
|
||||
arr = SparseArray(data, dtype=object, fill_value=False)
|
||||
assert arr.dtype == SparseDtype(object, False)
|
||||
assert arr.fill_value is False
|
||||
arr_expected = np.array(data, dtype=object)
|
||||
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
|
||||
assert np.fromiter(it, dtype=np.bool_).all()
|
||||
|
||||
@pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
|
||||
def test_constructor_na_dtype(self, dtype):
|
||||
with pytest.raises(ValueError, match="Cannot convert"):
|
||||
SparseArray([0, 1, np.nan], dtype=dtype)
|
||||
|
||||
def test_constructor_warns_when_losing_timezone(self):
|
||||
# GH#32501 warn when losing timezone information
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
|
||||
expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
result = SparseArray(dti)
|
||||
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
result = SparseArray(pd.Series(dti))
|
||||
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_constructor_spindex_dtype(self):
|
||||
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
|
||||
# TODO: actionable?
|
||||
# XXX: Behavior change: specifying SparseIndex no longer changes the
|
||||
# fill_value
|
||||
expected = SparseArray([0, 1, 2, 0], kind="integer")
|
||||
tm.assert_sp_array_equal(arr, expected)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(
|
||||
data=[1, 2, 3],
|
||||
sparse_index=IntIndex(4, [1, 2, 3]),
|
||||
dtype=np.int64,
|
||||
fill_value=0,
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(
|
||||
data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(
|
||||
data=[1, 2, 3],
|
||||
sparse_index=IntIndex(4, [1, 2, 3]),
|
||||
dtype=None,
|
||||
fill_value=0,
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 3], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
@pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
|
||||
def test_constructor_spindex_dtype_scalar(self, sparse_index):
|
||||
# scalar input
|
||||
msg = "Constructing SparseArray with scalar data is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
|
||||
exp = SparseArray([1], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
|
||||
exp = SparseArray([1], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
def test_constructor_spindex_dtype_scalar_broadcasts(self):
|
||||
arr = SparseArray(
|
||||
data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, fill_value",
|
||||
[
|
||||
(np.array([1, 2]), 0),
|
||||
(np.array([1.0, 2.0]), np.nan),
|
||||
([True, False], False),
|
||||
([pd.Timestamp("2017-01-01")], pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_constructor_inferred_fill_value(self, data, fill_value):
|
||||
result = SparseArray(data).fill_value
|
||||
|
||||
if isna(fill_value):
|
||||
assert isna(result)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
|
||||
@pytest.mark.parametrize("size", [0, 10])
|
||||
def test_from_spmatrix(self, size, format):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.random(size, 1, density=0.5, format=format)
|
||||
result = SparseArray.from_spmatrix(mat)
|
||||
|
||||
result = np.asarray(result)
|
||||
expected = mat.toarray().ravel()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
|
||||
def test_from_spmatrix_including_explicit_zero(self, format):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.random(10, 1, density=0.5, format=format)
|
||||
mat.data[0] = 0
|
||||
result = SparseArray.from_spmatrix(mat)
|
||||
|
||||
result = np.asarray(result)
|
||||
expected = mat.toarray().ravel()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_from_spmatrix_raises(self):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.eye(5, 4, format="csc")
|
||||
|
||||
with pytest.raises(ValueError, match="not '4'"):
|
||||
SparseArray.from_spmatrix(mat)
|
||||
|
||||
def test_constructor_from_too_large_array(self):
|
||||
with pytest.raises(TypeError, match="expected dimension <= 1 data"):
|
||||
SparseArray(np.arange(10).reshape((2, 5)))
|
||||
|
||||
def test_constructor_from_sparse(self):
|
||||
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
res = SparseArray(zarr)
|
||||
assert res.fill_value == 0
|
||||
tm.assert_almost_equal(res.sp_values, zarr.sp_values)
|
||||
|
||||
def test_constructor_copy(self):
|
||||
arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
||||
arr = SparseArray(arr_data)
|
||||
|
||||
cp = SparseArray(arr, copy=True)
|
||||
cp.sp_values[:3] = 0
|
||||
assert not (arr.sp_values[:3] == 0).any()
|
||||
|
||||
not_copy = SparseArray(arr)
|
||||
not_copy.sp_values[:3] = 0
|
||||
assert (arr.sp_values[:3] == 0).all()
|
||||
|
||||
def test_constructor_bool(self):
|
||||
# GH#10648
|
||||
data = np.array([False, False, True, True, False, False])
|
||||
arr = SparseArray(data, fill_value=False, dtype=bool)
|
||||
|
||||
assert arr.dtype == SparseDtype(bool)
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
|
||||
# Behavior change: np.asarray densifies.
|
||||
# tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
||||
tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32))
|
||||
|
||||
dense = arr.to_dense()
|
||||
assert dense.dtype == bool
|
||||
tm.assert_numpy_array_equal(dense, data)
|
||||
|
||||
def test_constructor_bool_fill_value(self):
|
||||
arr = SparseArray([True, False, True], dtype=None)
|
||||
assert arr.dtype == SparseDtype(np.bool_)
|
||||
assert not arr.fill_value
|
||||
|
||||
arr = SparseArray([True, False, True], dtype=np.bool_)
|
||||
assert arr.dtype == SparseDtype(np.bool_)
|
||||
assert not arr.fill_value
|
||||
|
||||
arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True)
|
||||
assert arr.dtype == SparseDtype(np.bool_, True)
|
||||
assert arr.fill_value
|
||||
|
||||
def test_constructor_float32(self):
|
||||
# GH#10648
|
||||
data = np.array([1.0, np.nan, 3], dtype=np.float32)
|
||||
arr = SparseArray(data, dtype=np.float32)
|
||||
|
||||
assert arr.dtype == SparseDtype(np.float32)
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32))
|
||||
# Behavior change: np.asarray densifies.
|
||||
# tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
||||
tm.assert_numpy_array_equal(
|
||||
arr.sp_index.indices, np.array([0, 2], dtype=np.int32)
|
||||
)
|
||||
|
||||
dense = arr.to_dense()
|
||||
assert dense.dtype == np.float32
|
||||
tm.assert_numpy_array_equal(dense, data)
|
@ -0,0 +1,224 @@
|
||||
import re
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", 0),
|
||||
("float", np.nan),
|
||||
("bool", False),
|
||||
("object", np.nan),
|
||||
("datetime64[ns]", np.datetime64("NaT", "ns")),
|
||||
("timedelta64[ns]", np.timedelta64("NaT", "ns")),
|
||||
],
|
||||
)
|
||||
def test_inferred_dtype(dtype, fill_value):
|
||||
sparse_dtype = SparseDtype(dtype)
|
||||
result = sparse_dtype.fill_value
|
||||
if pd.isna(fill_value):
|
||||
assert pd.isna(result) and type(result) == type(fill_value)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
|
||||
def test_from_sparse_dtype():
|
||||
dtype = SparseDtype("float", 0)
|
||||
result = SparseDtype(dtype)
|
||||
assert result.fill_value == 0
|
||||
|
||||
|
||||
def test_from_sparse_dtype_fill_value():
|
||||
dtype = SparseDtype("int", 1)
|
||||
result = SparseDtype(dtype, fill_value=2)
|
||||
expected = SparseDtype("int", 2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", None),
|
||||
("float", None),
|
||||
("bool", None),
|
||||
("object", None),
|
||||
("datetime64[ns]", None),
|
||||
("timedelta64[ns]", None),
|
||||
("int", np.nan),
|
||||
("float", 0),
|
||||
],
|
||||
)
|
||||
def test_equal(dtype, fill_value):
|
||||
a = SparseDtype(dtype, fill_value)
|
||||
b = SparseDtype(dtype, fill_value)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
def test_nans_equal():
|
||||
a = SparseDtype(float, float("nan"))
|
||||
b = SparseDtype(float, np.nan)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
with warnings.catch_warnings():
|
||||
msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated"
|
||||
warnings.filterwarnings("ignore", msg, category=FutureWarning)
|
||||
|
||||
tups = [
|
||||
(SparseDtype("float64"), SparseDtype("float32")),
|
||||
(SparseDtype("float64"), SparseDtype("float64", 0)),
|
||||
(SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
|
||||
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||||
(SparseDtype("float64"), np.dtype("float64")),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
tups,
|
||||
)
|
||||
def test_not_equal(a, b):
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_construct_from_string_raises():
|
||||
with pytest.raises(
|
||||
TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'"
|
||||
):
|
||||
SparseDtype.construct_from_string("not a dtype")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(SparseDtype(int), True),
|
||||
(SparseDtype(float), True),
|
||||
(SparseDtype(bool), True),
|
||||
(SparseDtype(object), False),
|
||||
(SparseDtype(str), False),
|
||||
],
|
||||
)
|
||||
def test_is_numeric(dtype, expected):
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
def test_str_uses_object():
|
||||
result = SparseDtype(str).subtype
|
||||
assert result == np.dtype("object")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[float64]", SparseDtype(np.dtype("float64"))),
|
||||
("Sparse[float32]", SparseDtype(np.dtype("float32"))),
|
||||
("Sparse[int]", SparseDtype(np.dtype("int"))),
|
||||
("Sparse[str]", SparseDtype(np.dtype("str"))),
|
||||
("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))),
|
||||
("Sparse", SparseDtype(np.dtype("float"), np.nan)),
|
||||
],
|
||||
)
|
||||
def test_construct_from_string(string, expected):
|
||||
result = SparseDtype.construct_from_string(string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected",
|
||||
[
|
||||
(SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True),
|
||||
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||||
(SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True),
|
||||
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||||
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||||
],
|
||||
)
|
||||
def test_hash_equal(a, b, expected):
|
||||
result = a == b
|
||||
assert result is expected
|
||||
|
||||
result = hash(a) == hash(b)
|
||||
assert result is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[int]", "int"),
|
||||
("Sparse[int, 0]", "int"),
|
||||
("Sparse[int64]", "int64"),
|
||||
("Sparse[int64, 0]", "int64"),
|
||||
("Sparse[datetime64[ns], 0]", "datetime64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_parse_subtype(string, expected):
|
||||
subtype, _ = SparseDtype._parse_subtype(string)
|
||||
assert subtype == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"]
|
||||
)
|
||||
def test_construct_from_string_fill_value_raises(string):
|
||||
with pytest.raises(TypeError, match="fill_value in the string is not"):
|
||||
SparseDtype.construct_from_string(string)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype, expected",
|
||||
[
|
||||
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||||
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||||
(SparseDtype(int, 1), str, SparseDtype(object, "1")),
|
||||
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||||
],
|
||||
)
|
||||
def test_update_dtype(original, dtype, expected):
|
||||
result = original.update_dtype(dtype)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype, expected_error_msg",
|
||||
[
|
||||
(
|
||||
SparseDtype(float, np.nan),
|
||||
int,
|
||||
re.escape("Cannot convert non-finite values (NA or inf) to integer"),
|
||||
),
|
||||
(
|
||||
SparseDtype(str, "abc"),
|
||||
int,
|
||||
r"invalid literal for int\(\) with base 10: ('abc'|np\.str_\('abc'\))",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_update_dtype_raises(original, dtype, expected_error_msg):
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
original.update_dtype(dtype)
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH-34352
|
||||
result = str(SparseDtype("int64", fill_value=0))
|
||||
expected = "Sparse[int64, 0]"
|
||||
assert result == expected
|
||||
|
||||
result = str(SparseDtype(object, fill_value="0"))
|
||||
expected = "Sparse[object, '0']"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sparse_dtype_subtype_must_be_numpy_dtype():
|
||||
# GH#53160
|
||||
msg = "SparseDtype subtype must be a numpy dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
SparseDtype("category", fill_value="c")
|
@ -0,0 +1,302 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr_data():
|
||||
return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr(arr_data):
|
||||
return SparseArray(arr_data)
|
||||
|
||||
|
||||
class TestGetitem:
|
||||
def test_getitem(self, arr):
|
||||
dense = arr.to_dense()
|
||||
for i, value in enumerate(arr):
|
||||
tm.assert_almost_equal(value, dense[i])
|
||||
tm.assert_almost_equal(arr[-i], dense[-i])
|
||||
|
||||
def test_getitem_arraylike_mask(self, arr):
|
||||
arr = SparseArray([0, 1, 2])
|
||||
result = arr[[True, False, True]]
|
||||
expected = SparseArray([0, 2])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"slc",
|
||||
[
|
||||
np.s_[:],
|
||||
np.s_[1:10],
|
||||
np.s_[1:100],
|
||||
np.s_[10:1],
|
||||
np.s_[:-3],
|
||||
np.s_[-5:-4],
|
||||
np.s_[:-12],
|
||||
np.s_[-12:],
|
||||
np.s_[2:],
|
||||
np.s_[2::3],
|
||||
np.s_[::2],
|
||||
np.s_[::-1],
|
||||
np.s_[::-2],
|
||||
np.s_[1:6:2],
|
||||
np.s_[:-6:-2],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
|
||||
)
|
||||
def test_getslice(self, slc, as_dense):
|
||||
as_dense = np.array(as_dense)
|
||||
arr = SparseArray(as_dense)
|
||||
|
||||
result = arr[slc]
|
||||
expected = SparseArray(as_dense[slc])
|
||||
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_getslice_tuple(self):
|
||||
dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
|
||||
|
||||
sparse = SparseArray(dense)
|
||||
res = sparse[(slice(4, None),)]
|
||||
exp = SparseArray(dense[4:])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
sparse = SparseArray(dense, fill_value=0)
|
||||
res = sparse[(slice(4, None),)]
|
||||
exp = SparseArray(dense[4:], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
msg = "too many indices for array"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse[4:, :]
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
# check numpy compat
|
||||
dense[4:, :]
|
||||
|
||||
def test_boolean_slice_empty(self):
|
||||
arr = SparseArray([0, 1, 2])
|
||||
res = arr[[False, False, False]]
|
||||
assert res.dtype == arr.dtype
|
||||
|
||||
def test_getitem_bool_sparse_array(self, arr):
|
||||
# GH 23122
|
||||
spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
|
||||
exp = SparseArray([np.nan, 2, np.nan, 5, 6])
|
||||
tm.assert_sp_array_equal(arr[spar_bool], exp)
|
||||
|
||||
spar_bool = ~spar_bool
|
||||
res = arr[spar_bool]
|
||||
exp = SparseArray([np.nan, 1, 3, 4, np.nan])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
spar_bool = SparseArray(
|
||||
[False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan
|
||||
)
|
||||
res = arr[spar_bool]
|
||||
exp = SparseArray([np.nan, 3, 5])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_getitem_bool_sparse_array_as_comparison(self):
|
||||
# GH 45110
|
||||
arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
|
||||
res = arr[arr > 2]
|
||||
exp = SparseArray([3.0, 4.0], fill_value=np.nan)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_get_item(self, arr):
|
||||
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
|
||||
assert np.isnan(arr[1])
|
||||
assert arr[2] == 1
|
||||
assert arr[7] == 5
|
||||
|
||||
assert zarr[0] == 0
|
||||
assert zarr[2] == 1
|
||||
assert zarr[7] == 5
|
||||
|
||||
errmsg = "must be an integer between -10 and 10"
|
||||
|
||||
with pytest.raises(IndexError, match=errmsg):
|
||||
arr[11]
|
||||
|
||||
with pytest.raises(IndexError, match=errmsg):
|
||||
arr[-11]
|
||||
|
||||
assert arr[-1] == arr[len(arr) - 1]
|
||||
|
||||
|
||||
class TestSetitem:
|
||||
def test_set_item(self, arr_data):
|
||||
arr = SparseArray(arr_data).copy()
|
||||
|
||||
def setitem():
|
||||
arr[5] = 3
|
||||
|
||||
def setslice():
|
||||
arr[1:5] = 2
|
||||
|
||||
with pytest.raises(TypeError, match="assignment via setitem"):
|
||||
setitem()
|
||||
|
||||
with pytest.raises(TypeError, match="assignment via setitem"):
|
||||
setslice()
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take_scalar_raises(self, arr):
|
||||
msg = "'indices' must be an array, not a scalar '2'."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.take(2)
|
||||
|
||||
def test_take(self, arr_data, arr):
|
||||
exp = SparseArray(np.take(arr_data, [2, 3]))
|
||||
tm.assert_sp_array_equal(arr.take([2, 3]), exp)
|
||||
|
||||
exp = SparseArray(np.take(arr_data, [0, 1, 2]))
|
||||
tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp)
|
||||
|
||||
def test_take_all_empty(self):
|
||||
sparse = pd.array([0, 0], dtype=SparseDtype("int64"))
|
||||
result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan)
|
||||
tm.assert_sp_array_equal(sparse, result)
|
||||
|
||||
def test_take_different_fill_value(self):
|
||||
# Take with a different fill value shouldn't overwrite the original
|
||||
sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0))
|
||||
result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan)
|
||||
expected = pd.array([0, np.nan], dtype=sparse.dtype)
|
||||
tm.assert_sp_array_equal(expected, result)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
data = np.array([1, np.nan, 0, 3, 0])
|
||||
sparse = SparseArray(data, fill_value=0)
|
||||
|
||||
exp = SparseArray(np.take(data, [0]), fill_value=0)
|
||||
tm.assert_sp_array_equal(sparse.take([0]), exp)
|
||||
|
||||
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
|
||||
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
|
||||
|
||||
def test_take_negative(self, arr_data, arr):
|
||||
exp = SparseArray(np.take(arr_data, [-1]))
|
||||
tm.assert_sp_array_equal(arr.take([-1]), exp)
|
||||
|
||||
exp = SparseArray(np.take(arr_data, [-4, -3, -2]))
|
||||
tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp)
|
||||
|
||||
def test_bad_take(self, arr):
|
||||
with pytest.raises(IndexError, match="bounds"):
|
||||
arr.take([11])
|
||||
|
||||
def test_take_filling(self):
|
||||
# similar tests as GH 12631
|
||||
sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([np.nan, np.nan, 4])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# TODO: actionable?
|
||||
# XXX: test change: fill_value=True -> allow_fill=True
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
|
||||
expected = SparseArray([np.nan, np.nan, np.nan])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, 4])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = "Invalid value in 'indices'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -2]), allow_fill=True)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -5]), allow_fill=True)
|
||||
|
||||
msg = "out of bounds value in 'indices'"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]), allow_fill=True)
|
||||
|
||||
def test_take_filling_fill_value(self):
|
||||
# same tests as GH#12631
|
||||
sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([0, np.nan, 4], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
|
||||
# TODO: actionable?
|
||||
# XXX: behavior change.
|
||||
# the old way of filling self.fill_value doesn't follow EA rules.
|
||||
# It's supposed to be self.dtype.na_value (nan in this case)
|
||||
expected = SparseArray([0, np.nan, np.nan], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = SparseArray([0, np.nan, 4], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = "Invalid value in 'indices'."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -2]), allow_fill=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -5]), allow_fill=True)
|
||||
|
||||
msg = "out of bounds value in 'indices'"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["block", "integer"])
|
||||
def test_take_filling_all_nan(self, kind):
|
||||
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind)
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = "out of bounds value in 'indices'"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_retain_fill_value(self):
|
||||
# GH#45691 don't lose fill_value on _where
|
||||
arr = SparseArray([np.nan, 1.0], fill_value=0)
|
||||
|
||||
mask = np.array([True, False])
|
||||
|
||||
res = arr._where(~mask, 1)
|
||||
exp = SparseArray([1, 1.0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
ser = pd.Series(arr)
|
||||
res = ser.where(~mask, 1)
|
||||
tm.assert_series_equal(res, pd.Series(exp))
|
@ -0,0 +1,551 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.sparse as splib
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import (
|
||||
BlockIndex,
|
||||
IntIndex,
|
||||
make_sparse_index,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_length():
|
||||
return 20
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
[
|
||||
[0, 7, 15],
|
||||
[3, 5, 5],
|
||||
[2, 9, 14],
|
||||
[2, 3, 5],
|
||||
[2, 9, 15],
|
||||
[1, 3, 4],
|
||||
],
|
||||
[
|
||||
[0, 5],
|
||||
[4, 4],
|
||||
[1],
|
||||
[4],
|
||||
[1],
|
||||
[3],
|
||||
],
|
||||
[
|
||||
[0],
|
||||
[10],
|
||||
[0, 5],
|
||||
[3, 7],
|
||||
[0, 5],
|
||||
[3, 5],
|
||||
],
|
||||
[
|
||||
[10],
|
||||
[5],
|
||||
[0, 12],
|
||||
[5, 3],
|
||||
[12],
|
||||
[3],
|
||||
],
|
||||
[
|
||||
[0, 10],
|
||||
[4, 6],
|
||||
[5, 17],
|
||||
[4, 2],
|
||||
[],
|
||||
[],
|
||||
],
|
||||
[
|
||||
[0],
|
||||
[5],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
],
|
||||
],
|
||||
ids=[
|
||||
"plain_case",
|
||||
"delete_blocks",
|
||||
"split_blocks",
|
||||
"skip_block",
|
||||
"no_intersect",
|
||||
"one_empty",
|
||||
],
|
||||
)
|
||||
def cases(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSparseIndexUnion:
|
||||
@pytest.mark.parametrize(
|
||||
"xloc, xlen, yloc, ylen, eloc, elen",
|
||||
[
|
||||
[[0], [5], [5], [4], [0], [9]],
|
||||
[[0, 10], [5, 5], [2, 17], [5, 2], [0, 10, 17], [7, 5, 2]],
|
||||
[[1], [5], [3], [5], [1], [7]],
|
||||
[[2, 10], [4, 4], [4], [8], [2], [12]],
|
||||
[[0, 5], [3, 5], [0], [7], [0], [10]],
|
||||
[[2, 10], [4, 4], [4, 13], [8, 4], [2], [15]],
|
||||
[[2], [15], [4, 9, 14], [3, 2, 2], [2], [15]],
|
||||
[[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]],
|
||||
],
|
||||
)
|
||||
def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen, test_length):
|
||||
# Case 1
|
||||
# x: ----
|
||||
# y: ----
|
||||
# r: --------
|
||||
# Case 2
|
||||
# x: ----- -----
|
||||
# y: ----- --
|
||||
# Case 3
|
||||
# x: ------
|
||||
# y: -------
|
||||
# r: ----------
|
||||
# Case 4
|
||||
# x: ------ -----
|
||||
# y: -------
|
||||
# r: -------------
|
||||
# Case 5
|
||||
# x: --- -----
|
||||
# y: -------
|
||||
# r: -------------
|
||||
# Case 6
|
||||
# x: ------ -----
|
||||
# y: ------- ---
|
||||
# r: -------------
|
||||
# Case 7
|
||||
# x: ----------------------
|
||||
# y: ---- ---- ---
|
||||
# r: ----------------------
|
||||
# Case 8
|
||||
# x: ---- ---
|
||||
# y: --- ---
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert isinstance(bresult, BlockIndex)
|
||||
tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(bresult.blengths, np.array(elen, dtype=np.int32))
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert isinstance(iresult, IntIndex)
|
||||
tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices)
|
||||
|
||||
def test_int_index_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect:
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self, cases, test_length):
|
||||
xloc, xlen, yloc, ylen, eloc, elen = cases
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
expected = BlockIndex(test_length, eloc, elen)
|
||||
longer_index = BlockIndex(test_length + 1, yloc, ylen)
|
||||
|
||||
result = xindex.intersect(yindex)
|
||||
assert result.equals(expected)
|
||||
result = xindex.to_int_index().intersect(yindex.to_int_index())
|
||||
assert result.equals(expected.to_int_index())
|
||||
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(Exception, match=msg):
|
||||
xindex.intersect(longer_index)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
xindex.to_int_index().intersect(longer_index.to_int_index())
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
[
|
||||
# Argument 2 to "IntIndex" has incompatible type "ndarray[Any,
|
||||
# dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]"
|
||||
IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type]
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type]
|
||||
IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type]
|
||||
IntIndex(5, np.array([], dtype=np.int32)), # type: ignore[arg-type]
|
||||
],
|
||||
)
|
||||
def test_intersect_identical(self, case):
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon:
|
||||
def test_int_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(
|
||||
4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer"
|
||||
)
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_lookup(self, kind):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_lookup_array(self, kind):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx, expected",
|
||||
[
|
||||
[0, -1],
|
||||
[5, 0],
|
||||
[7, 2],
|
||||
[8, -1],
|
||||
[9, -1],
|
||||
[10, -1],
|
||||
[11, -1],
|
||||
[12, 3],
|
||||
[17, 8],
|
||||
[18, -1],
|
||||
],
|
||||
)
|
||||
def test_lookup_basics(self, idx, expected):
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
assert bindex.lookup(idx) == expected
|
||||
|
||||
iindex = bindex.to_int_index()
|
||||
assert iindex.lookup(idx) == expected
|
||||
|
||||
|
||||
class TestBlockIndex:
|
||||
def test_block_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
@pytest.mark.parametrize("i", [5, 10, 100, 101])
|
||||
def test_make_block_boundary(self, i):
|
||||
idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block")
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
BlockIndex(0, locs, lengths)
|
||||
|
||||
# also OK even though empty
|
||||
BlockIndex(1, locs, lengths)
|
||||
|
||||
msg = "Block 0 extends beyond end"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [5], [10])
|
||||
|
||||
msg = "Block 0 overlaps"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex:
|
||||
def test_check_integrity(self):
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(
|
||||
4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer"
|
||||
)
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self, cases, test_length):
|
||||
xloc, xlen, yloc, ylen, _, _ = cases
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators:
|
||||
@pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"])
|
||||
def test_op(self, opname, cases, test_length):
|
||||
xloc, xlen, yloc, ylen, _, _ = cases
|
||||
sparse_op = getattr(splib, f"sparse_{opname}_float64")
|
||||
python_op = getattr(operator, opname)
|
||||
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10.0 + 1
|
||||
y = np.arange(yindex.npoints) * 100.0 + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(
|
||||
x, xindex, xfill, y, yindex, yfill
|
||||
)
|
||||
result_int_vals, ri_index, ifill = sparse_op(
|
||||
x, xdindex, xfill, y, ydindex, yfill
|
||||
)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(test_length)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(test_length)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals, series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
@ -0,0 +1,306 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NaT,
|
||||
SparseDtype,
|
||||
Timestamp,
|
||||
isna,
|
||||
)
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestReductions:
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([True, True, True], True, False),
|
||||
([1, 2, 1], 1, 0),
|
||||
([1.0, 2.0, 1.0], 1.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_all(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = SparseArray(data).all()
|
||||
assert out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).all()
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = SparseArray(data).all()
|
||||
assert not out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).all()
|
||||
assert not out
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([True, True, True], True, False),
|
||||
([1, 2, 1], 1, 0),
|
||||
([1.0, 2.0, 1.0], 1.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_numpy_all(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = np.all(SparseArray(data))
|
||||
assert out
|
||||
|
||||
out = np.all(SparseArray(data, fill_value=pos))
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = np.all(SparseArray(data))
|
||||
assert not out
|
||||
|
||||
out = np.all(SparseArray(data, fill_value=pos))
|
||||
assert not out
|
||||
|
||||
# raises with a different message on py2.
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.all(SparseArray(data), out=np.array([]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([False, True, False], True, False),
|
||||
([0, 2, 0], 2, 0),
|
||||
([0.0, 2.0, 0.0], 2.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_any(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = SparseArray(data).any()
|
||||
assert out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).any()
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = SparseArray(data).any()
|
||||
assert not out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).any()
|
||||
assert not out
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([False, True, False], True, False),
|
||||
([0, 2, 0], 2, 0),
|
||||
([0.0, 2.0, 0.0], 2.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_numpy_any(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = np.any(SparseArray(data))
|
||||
assert out
|
||||
|
||||
out = np.any(SparseArray(data, fill_value=pos))
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = np.any(SparseArray(data))
|
||||
assert not out
|
||||
|
||||
out = np.any(SparseArray(data, fill_value=pos))
|
||||
assert not out
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.any(SparseArray(data), out=out)
|
||||
|
||||
def test_sum(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = SparseArray(data).sum()
|
||||
assert out == 45.0
|
||||
|
||||
data[5] = np.nan
|
||||
out = SparseArray(data, fill_value=2).sum()
|
||||
assert out == 40.0
|
||||
|
||||
out = SparseArray(data, fill_value=np.nan).sum()
|
||||
assert out == 40.0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])],
|
||||
)
|
||||
@pytest.mark.parametrize("fill_value", [0, 1, np.nan])
|
||||
@pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
|
||||
def test_sum_min_count(self, arr, fill_value, min_count, expected):
|
||||
# GH#25777
|
||||
sparray = SparseArray(arr, fill_value=fill_value)
|
||||
result = sparray.sum(min_count=min_count)
|
||||
if np.isnan(expected):
|
||||
assert np.isnan(result)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
def test_bool_sum_min_count(self):
|
||||
spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
|
||||
res = spar_bool.sum(min_count=1)
|
||||
assert res == 5
|
||||
res = spar_bool.sum(min_count=11)
|
||||
assert isna(res)
|
||||
|
||||
def test_numpy_sum(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = np.sum(SparseArray(data))
|
||||
assert out == 45.0
|
||||
|
||||
data[5] = np.nan
|
||||
out = np.sum(SparseArray(data, fill_value=2))
|
||||
assert out == 40.0
|
||||
|
||||
out = np.sum(SparseArray(data, fill_value=np.nan))
|
||||
assert out == 40.0
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.sum(SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.sum(SparseArray(data), out=out)
|
||||
|
||||
def test_mean(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = SparseArray(data).mean()
|
||||
assert out == 4.5
|
||||
|
||||
data[5] = np.nan
|
||||
out = SparseArray(data).mean()
|
||||
assert out == 40.0 / 9
|
||||
|
||||
def test_numpy_mean(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = np.mean(SparseArray(data))
|
||||
assert out == 4.5
|
||||
|
||||
data[5] = np.nan
|
||||
out = np.mean(SparseArray(data))
|
||||
assert out == 40.0 / 9
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.mean(SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.mean(SparseArray(data), out=out)
|
||||
|
||||
|
||||
class TestMinMax:
|
||||
@pytest.mark.parametrize(
|
||||
"raw_data,max_expected,min_expected",
|
||||
[
|
||||
(np.arange(5.0), [4], [0]),
|
||||
(-np.arange(5.0), [0], [-4]),
|
||||
(np.array([0, 1, 2, np.nan, 4]), [4], [0]),
|
||||
(np.array([np.nan] * 5), [np.nan], [np.nan]),
|
||||
(np.array([]), [np.nan], [np.nan]),
|
||||
],
|
||||
)
|
||||
def test_nan_fill_value(self, raw_data, max_expected, min_expected):
|
||||
arr = SparseArray(raw_data)
|
||||
max_result = arr.max()
|
||||
min_result = arr.min()
|
||||
assert max_result in max_expected
|
||||
assert min_result in min_expected
|
||||
|
||||
max_result = arr.max(skipna=False)
|
||||
min_result = arr.min(skipna=False)
|
||||
if np.isnan(raw_data).any():
|
||||
assert np.isnan(max_result)
|
||||
assert np.isnan(min_result)
|
||||
else:
|
||||
assert max_result in max_expected
|
||||
assert min_result in min_expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_value,max_expected,min_expected",
|
||||
[
|
||||
(100, 100, 0),
|
||||
(-100, 1, -100),
|
||||
],
|
||||
)
|
||||
def test_fill_value(self, fill_value, max_expected, min_expected):
|
||||
arr = SparseArray(
|
||||
np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
|
||||
)
|
||||
max_result = arr.max()
|
||||
assert max_result == max_expected
|
||||
|
||||
min_result = arr.min()
|
||||
assert min_result == min_expected
|
||||
|
||||
def test_only_fill_value(self):
|
||||
fv = 100
|
||||
arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
|
||||
assert len(arr._valid_sp_values) == 0
|
||||
|
||||
assert arr.max() == fv
|
||||
assert arr.min() == fv
|
||||
assert arr.max(skipna=False) == fv
|
||||
assert arr.min(skipna=False) == fv
|
||||
|
||||
@pytest.mark.parametrize("func", ["min", "max"])
|
||||
@pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected",
|
||||
[
|
||||
(SparseDtype(np.float64, np.nan), np.nan),
|
||||
(SparseDtype(np.float64, 5.0), np.nan),
|
||||
(SparseDtype("datetime64[ns]", NaT), NaT),
|
||||
(SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT),
|
||||
],
|
||||
)
|
||||
def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
|
||||
arr = SparseArray(data, dtype=dtype)
|
||||
result = getattr(arr, func)()
|
||||
if expected is NaT:
|
||||
# TODO: pin down whether we wrap datetime64("NaT")
|
||||
assert result is NaT or np.isnat(result)
|
||||
else:
|
||||
assert np.isnan(result)
|
||||
|
||||
|
||||
class TestArgmaxArgmin:
|
||||
@pytest.mark.parametrize(
|
||||
"arr,argmax_expected,argmin_expected",
|
||||
[
|
||||
(SparseArray([1, 2, 0, 1, 2]), 1, 2),
|
||||
(SparseArray([-1, -2, 0, -1, -2]), 2, 1),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2),
|
||||
(SparseArray([0] * 10 + [-1], fill_value=0), 0, 10),
|
||||
(SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10),
|
||||
(SparseArray([0] * 10 + [-1], fill_value=1), 0, 10),
|
||||
(SparseArray([-1] + [0] * 10, fill_value=0), 1, 0),
|
||||
(SparseArray([1] + [0] * 10, fill_value=0), 0, 1),
|
||||
(SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0),
|
||||
(SparseArray([1] + [0] * 10, fill_value=1), 0, 1),
|
||||
],
|
||||
)
|
||||
def test_argmax_argmin(self, arr, argmax_expected, argmin_expected):
|
||||
argmax_result = arr.argmax()
|
||||
argmin_result = arr.argmin()
|
||||
assert argmax_result == argmax_expected
|
||||
assert argmin_result == argmin_expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr,method",
|
||||
[(SparseArray([]), "argmax"), (SparseArray([]), "argmin")],
|
||||
)
|
||||
def test_empty_array(self, arr, method):
|
||||
msg = f"attempt to get {method} of an empty sequence"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.argmax() if method == "argmax" else arr.argmin()
|
@ -0,0 +1,79 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import SparseArray
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.parametrize("op", [operator.pos, operator.neg])
|
||||
def test_unary_op(op, fill_value):
|
||||
arr = np.array([0, 1, np.nan, 2])
|
||||
sparray = SparseArray(arr, fill_value=fill_value)
|
||||
result = op(sparray)
|
||||
expected = SparseArray(op(arr), fill_value=op(fill_value))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False])
|
||||
def test_invert(fill_value):
|
||||
arr = np.array([True, False, False, True])
|
||||
sparray = SparseArray(arr, fill_value=fill_value)
|
||||
result = ~sparray
|
||||
expected = SparseArray(~arr, fill_value=not fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
result = ~pd.Series(sparray)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ~pd.DataFrame({"A": sparray})
|
||||
expected = pd.DataFrame({"A": expected})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestUnaryMethods:
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_neg_operator(self):
|
||||
arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8)
|
||||
res = -arr
|
||||
exp = SparseArray([1, 2, np.nan, -3], fill_value=np.nan, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8)
|
||||
res = -arr
|
||||
exp = SparseArray([1, 2, -1, -3], fill_value=1, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_abs_operator(self):
|
||||
arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8)
|
||||
res = abs(arr)
|
||||
exp = SparseArray([1, 2, np.nan, 3], fill_value=np.nan, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8)
|
||||
res = abs(arr)
|
||||
exp = SparseArray([1, 2, 1, 3], fill_value=1, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
def test_invert_operator(self):
|
||||
arr = SparseArray([False, True, False, True], fill_value=False, dtype=np.bool_)
|
||||
exp = SparseArray(
|
||||
np.invert([False, True, False, True]), fill_value=True, dtype=np.bool_
|
||||
)
|
||||
res = ~arr
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
arr = SparseArray([0, 1, 0, 2, 3, 0], fill_value=0, dtype=np.int32)
|
||||
res = ~arr
|
||||
exp = SparseArray([-1, -2, -1, -3, -4, -1], fill_value=-1, dtype=np.int32)
|
||||
tm.assert_sp_array_equal(exp, res)
|
Reference in New Issue
Block a user