venv
This commit is contained in:
@ -0,0 +1,131 @@
|
||||
"""
|
||||
Base test suite for extension arrays.
|
||||
|
||||
These tests are intended for third-party libraries to subclass to validate
|
||||
that their extension arrays and dtypes satisfy the interface. Moving or
|
||||
renaming the tests should not be done lightly.
|
||||
|
||||
Libraries are expected to implement a few pytest fixtures to provide data
|
||||
for the tests. The fixtures may be located in either
|
||||
|
||||
* The same module as your test class.
|
||||
* A ``conftest.py`` in the same directory as your test class.
|
||||
|
||||
The full list of fixtures may be found in the ``conftest.py`` next to this
|
||||
file.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pytest
|
||||
from pandas.tests.extension.base import BaseDtypeTests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return MyDtype()
|
||||
|
||||
|
||||
class TestMyDtype(BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
Your class ``TestDtype`` will inherit all the tests defined on
|
||||
``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
|
||||
wherever the test requires it. You're free to implement additional tests.
|
||||
|
||||
"""
|
||||
from pandas.tests.extension.base.accumulate import BaseAccumulateTests
|
||||
from pandas.tests.extension.base.casting import BaseCastingTests
|
||||
from pandas.tests.extension.base.constructors import BaseConstructorsTests
|
||||
from pandas.tests.extension.base.dim2 import ( # noqa: F401
|
||||
Dim2CompatTests,
|
||||
NDArrayBacked2DTests,
|
||||
)
|
||||
from pandas.tests.extension.base.dtype import BaseDtypeTests
|
||||
from pandas.tests.extension.base.getitem import BaseGetitemTests
|
||||
from pandas.tests.extension.base.groupby import BaseGroupbyTests
|
||||
from pandas.tests.extension.base.index import BaseIndexTests
|
||||
from pandas.tests.extension.base.interface import BaseInterfaceTests
|
||||
from pandas.tests.extension.base.io import BaseParsingTests
|
||||
from pandas.tests.extension.base.methods import BaseMethodsTests
|
||||
from pandas.tests.extension.base.missing import BaseMissingTests
|
||||
from pandas.tests.extension.base.ops import ( # noqa: F401
|
||||
BaseArithmeticOpsTests,
|
||||
BaseComparisonOpsTests,
|
||||
BaseOpsUtil,
|
||||
BaseUnaryOpsTests,
|
||||
)
|
||||
from pandas.tests.extension.base.printing import BasePrintingTests
|
||||
from pandas.tests.extension.base.reduce import BaseReduceTests
|
||||
from pandas.tests.extension.base.reshaping import BaseReshapingTests
|
||||
from pandas.tests.extension.base.setitem import BaseSetitemTests
|
||||
|
||||
|
||||
# One test class that you can inherit as an alternative to inheriting all the
|
||||
# test classes above.
|
||||
# Note 1) this excludes Dim2CompatTests and NDArrayBacked2DTests.
|
||||
# Note 2) this uses BaseReduceTests and and _not_ BaseBooleanReduceTests,
|
||||
# BaseNoReduceTests, or BaseNumericReduceTests
|
||||
class ExtensionTests(
|
||||
BaseAccumulateTests,
|
||||
BaseCastingTests,
|
||||
BaseConstructorsTests,
|
||||
BaseDtypeTests,
|
||||
BaseGetitemTests,
|
||||
BaseGroupbyTests,
|
||||
BaseIndexTests,
|
||||
BaseInterfaceTests,
|
||||
BaseParsingTests,
|
||||
BaseMethodsTests,
|
||||
BaseMissingTests,
|
||||
BaseArithmeticOpsTests,
|
||||
BaseComparisonOpsTests,
|
||||
BaseUnaryOpsTests,
|
||||
BasePrintingTests,
|
||||
BaseReduceTests,
|
||||
BaseReshapingTests,
|
||||
BaseSetitemTests,
|
||||
Dim2CompatTests,
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
import warnings
|
||||
|
||||
if name == "BaseNoReduceTests":
|
||||
warnings.warn(
|
||||
"BaseNoReduceTests is deprecated and will be removed in a "
|
||||
"future version. Use BaseReduceTests and override "
|
||||
"`_supports_reduction` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
from pandas.tests.extension.base.reduce import BaseNoReduceTests
|
||||
|
||||
return BaseNoReduceTests
|
||||
|
||||
elif name == "BaseNumericReduceTests":
|
||||
warnings.warn(
|
||||
"BaseNumericReduceTests is deprecated and will be removed in a "
|
||||
"future version. Use BaseReduceTests and override "
|
||||
"`_supports_reduction` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
from pandas.tests.extension.base.reduce import BaseNumericReduceTests
|
||||
|
||||
return BaseNumericReduceTests
|
||||
|
||||
elif name == "BaseBooleanReduceTests":
|
||||
warnings.warn(
|
||||
"BaseBooleanReduceTests is deprecated and will be removed in a "
|
||||
"future version. Use BaseReduceTests and override "
|
||||
"`_supports_reduction` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
from pandas.tests.extension.base.reduce import BaseBooleanReduceTests
|
||||
|
||||
return BaseBooleanReduceTests
|
||||
|
||||
raise AttributeError(
|
||||
f"module 'pandas.tests.extension.base' has no attribute '{name}'"
|
||||
)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,39 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseAccumulateTests:
|
||||
"""
|
||||
Accumulation specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
|
||||
def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
|
||||
# Do we expect this accumulation to be supported for this dtype?
|
||||
# We default to assuming "no"; subclass authors should override here.
|
||||
return False
|
||||
|
||||
def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
try:
|
||||
alt = ser.astype("float64")
|
||||
except TypeError:
|
||||
# e.g. Period can't be cast to float64
|
||||
alt = ser.astype(object)
|
||||
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
expected = getattr(alt, op_name)(skipna=skipna)
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_accumulate_series(self, data, all_numeric_accumulations, skipna):
|
||||
op_name = all_numeric_accumulations
|
||||
ser = pd.Series(data)
|
||||
|
||||
if self._supports_accumulation(ser, op_name):
|
||||
self.check_accumulate(ser, op_name, skipna)
|
||||
else:
|
||||
with pytest.raises((NotImplementedError, TypeError)):
|
||||
# TODO: require TypeError for things that will _never_ work?
|
||||
getattr(ser, op_name)(skipna=skipna)
|
@ -0,0 +1,2 @@
|
||||
class BaseExtensionTests:
|
||||
pass
|
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.internals.blocks import NumpyBlock
|
||||
|
||||
|
||||
class BaseCastingTests:
|
||||
"""Casting to and from ExtensionDtypes"""
|
||||
|
||||
def test_astype_object_series(self, all_data):
|
||||
ser = pd.Series(all_data, name="A")
|
||||
result = ser.astype(object)
|
||||
assert result.dtype == np.dtype(object)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
blk = result._mgr.blocks[0]
|
||||
assert isinstance(blk, NumpyBlock)
|
||||
assert blk.is_object
|
||||
assert isinstance(result._mgr.array, np.ndarray)
|
||||
assert result._mgr.array.dtype == np.dtype(object)
|
||||
|
||||
def test_astype_object_frame(self, all_data):
|
||||
df = pd.DataFrame({"A": all_data})
|
||||
|
||||
result = df.astype(object)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
blk = result._mgr.blocks[0]
|
||||
assert isinstance(blk, NumpyBlock), type(blk)
|
||||
assert blk.is_object
|
||||
assert isinstance(result._mgr.arrays[0], np.ndarray)
|
||||
assert result._mgr.arrays[0].dtype == np.dtype(object)
|
||||
|
||||
# check that we can compare the dtypes
|
||||
comp = result.dtypes == df.dtypes
|
||||
assert not comp.any()
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = pd.Series(data).tolist()
|
||||
expected = list(data)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str(self, data):
|
||||
result = pd.Series(data[:5]).astype(str)
|
||||
expected = pd.Series([str(x) for x in data[:5]], dtype=str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"nullable_string_dtype",
|
||||
[
|
||||
"string[python]",
|
||||
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||||
],
|
||||
)
|
||||
def test_astype_string(self, data, nullable_string_dtype):
|
||||
# GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj)
|
||||
result = pd.Series(data[:5]).astype(nullable_string_dtype)
|
||||
expected = pd.Series(
|
||||
[str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]],
|
||||
dtype=nullable_string_dtype,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_to_numpy(self, data):
|
||||
expected = np.asarray(data)
|
||||
|
||||
result = data.to_numpy()
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = pd.Series(data).to_numpy()
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_astype_empty_dataframe(self, dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/33113
|
||||
df = pd.DataFrame()
|
||||
result = df.astype(dtype)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False])
|
||||
def test_astype_own_type(self, data, copy):
|
||||
# ensure that astype returns the original object for equal dtype and copy=False
|
||||
# https://github.com/pandas-dev/pandas/issues/28488
|
||||
result = data.astype(data.dtype, copy=copy)
|
||||
assert (result is data) is (not copy)
|
||||
tm.assert_extension_array_equal(result, data)
|
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.extensions import ExtensionArray
|
||||
from pandas.core.internals.blocks import EABackedBlock
|
||||
|
||||
|
||||
class BaseConstructorsTests:
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
data = data[:0]
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_array_from_scalars(self, data):
|
||||
scalars = [data[0], data[1], data[2]]
|
||||
result = data._from_sequence(scalars, dtype=data.dtype)
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_series_constructor(self, data):
|
||||
result = pd.Series(data, copy=False)
|
||||
assert result.dtype == data.dtype
|
||||
assert len(result) == len(data)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert result._mgr.array is data
|
||||
|
||||
# Series[EA] is unboxed / boxed correctly
|
||||
result2 = pd.Series(result)
|
||||
assert result2.dtype == data.dtype
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result2._mgr.blocks[0], EABackedBlock)
|
||||
|
||||
def test_series_constructor_no_data_with_index(self, dtype, na_value):
|
||||
result = pd.Series(index=[1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 33559 - empty index
|
||||
result = pd.Series(index=[], dtype=dtype)
|
||||
expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
|
||||
result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_constructor_scalar_with_index(self, data, dtype):
|
||||
scalar = data[0]
|
||||
result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(scalar, index=["foo"], dtype=dtype)
|
||||
expected = pd.Series([scalar], index=["foo"], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("from_series", [True, False])
|
||||
def test_dataframe_constructor_from_dict(self, data, from_series):
|
||||
if from_series:
|
||||
data = pd.Series(data)
|
||||
result = pd.DataFrame({"A": data})
|
||||
assert result.dtypes["A"] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
||||
|
||||
def test_dataframe_from_series(self, data):
|
||||
result = pd.DataFrame(pd.Series(data))
|
||||
assert result.dtypes[0] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
||||
|
||||
def test_series_given_mismatched_index_raises(self, data):
|
||||
msg = r"Length of values \(3\) does not match length of index \(5\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Series(data[:3], index=[0, 1, 2, 3, 4])
|
||||
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(list(data), dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(list(data), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# gh-30280
|
||||
|
||||
expected = pd.DataFrame(data).astype(dtype)
|
||||
result = pd.DataFrame(list(data), dtype=dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.DataFrame(list(data), dtype=str(dtype))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_pandas_array(self, data):
|
||||
# pd.array(extension_array) should be idempotent...
|
||||
result = pd.array(data)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_pandas_array_dtype(self, data):
|
||||
# ... but specifying dtype will override idempotency
|
||||
result = pd.array(data, dtype=np.dtype(object))
|
||||
expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_construct_empty_dataframe(self, dtype):
|
||||
# GH 33623
|
||||
result = pd.DataFrame(columns=["a"], dtype=dtype)
|
||||
expected = pd.DataFrame(
|
||||
{"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_empty(self, dtype):
|
||||
cls = dtype.construct_array_type()
|
||||
result = cls._empty((4,), dtype=dtype)
|
||||
assert isinstance(result, cls)
|
||||
assert result.dtype == dtype
|
||||
assert result.shape == (4,)
|
||||
|
||||
# GH#19600 method on ExtensionDtype
|
||||
result2 = dtype.empty((4,))
|
||||
assert isinstance(result2, cls)
|
||||
assert result2.dtype == dtype
|
||||
assert result2.shape == (4,)
|
||||
|
||||
result2 = dtype.empty(4)
|
||||
assert isinstance(result2, cls)
|
||||
assert result2.dtype == dtype
|
||||
assert result2.shape == (4,)
|
@ -0,0 +1,345 @@
|
||||
"""
|
||||
Tests for 2D compatibility.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.missing import is_matching_na
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_integer_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
|
||||
|
||||
|
||||
class Dim2CompatTests:
|
||||
# Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
|
||||
# i.e. not for pyarrow-backed EAs.
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_doesnt_support_2d(self, dtype, request):
|
||||
if not dtype._supports_2d:
|
||||
node = request.node
|
||||
# In cases where we are mixed in to ExtensionTests, we only want to
|
||||
# skip tests that are defined in Dim2CompatTests
|
||||
test_func = node._obj
|
||||
if test_func.__qualname__.startswith("Dim2CompatTests"):
|
||||
# TODO: is there a less hacky way of checking this?
|
||||
pytest.skip(f"{dtype} does not support 2D.")
|
||||
|
||||
def test_transpose(self, data):
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
shape = arr2d.shape
|
||||
assert shape[0] != shape[-1] # otherwise the rest of the test is useless
|
||||
|
||||
assert arr2d.T.shape == shape[::-1]
|
||||
|
||||
def test_frame_from_2d_array(self, data):
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
|
||||
df = pd.DataFrame(arr2d)
|
||||
expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_swapaxes(self, data):
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
|
||||
result = arr2d.swapaxes(0, 1)
|
||||
expected = arr2d.T
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_delete_2d(self, data):
|
||||
arr2d = data.repeat(3).reshape(-1, 3)
|
||||
|
||||
# axis = 0
|
||||
result = arr2d.delete(1, axis=0)
|
||||
expected = data.delete(1).repeat(3).reshape(-1, 3)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = arr2d.delete(1, axis=1)
|
||||
expected = data.repeat(2).reshape(-1, 2)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_2d(self, data):
|
||||
arr2d = data.reshape(-1, 1)
|
||||
|
||||
result = arr2d.take([0, 0, -1], axis=0)
|
||||
|
||||
expected = data.take([0, 0, -1]).reshape(-1, 1)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_repr_2d(self, data):
|
||||
# this could fail in a corner case where an element contained the name
|
||||
res = repr(data.reshape(1, -1))
|
||||
assert res.count(f"<{type(data).__name__}") == 1
|
||||
|
||||
res = repr(data.reshape(-1, 1))
|
||||
assert res.count(f"<{type(data).__name__}") == 1
|
||||
|
||||
def test_reshape(self, data):
|
||||
arr2d = data.reshape(-1, 1)
|
||||
assert arr2d.shape == (data.size, 1)
|
||||
assert len(arr2d) == len(data)
|
||||
|
||||
arr2d = data.reshape((-1, 1))
|
||||
assert arr2d.shape == (data.size, 1)
|
||||
assert len(arr2d) == len(data)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
data.reshape((data.size, 2))
|
||||
with pytest.raises(ValueError):
|
||||
data.reshape(data.size, 2)
|
||||
|
||||
def test_getitem_2d(self, data):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
result = arr2d[0]
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
arr2d[1]
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
arr2d[-2]
|
||||
|
||||
result = arr2d[:]
|
||||
tm.assert_extension_array_equal(result, arr2d)
|
||||
|
||||
result = arr2d[:, :]
|
||||
tm.assert_extension_array_equal(result, arr2d)
|
||||
|
||||
result = arr2d[:, 0]
|
||||
expected = data[[0]]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# dimension-expanding getitem on 1D
|
||||
result = data[:, np.newaxis]
|
||||
tm.assert_extension_array_equal(result, arr2d.T)
|
||||
|
||||
def test_iter_2d(self, data):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
objs = list(iter(arr2d))
|
||||
assert len(objs) == arr2d.shape[0]
|
||||
|
||||
for obj in objs:
|
||||
assert isinstance(obj, type(data))
|
||||
assert obj.dtype == data.dtype
|
||||
assert obj.ndim == 1
|
||||
assert len(obj) == arr2d.shape[1]
|
||||
|
||||
def test_tolist_2d(self, data):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
result = arr2d.tolist()
|
||||
expected = [data.tolist()]
|
||||
|
||||
assert isinstance(result, list)
|
||||
assert all(isinstance(x, list) for x in result)
|
||||
|
||||
assert result == expected
|
||||
|
||||
def test_concat_2d(self, data):
|
||||
left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
|
||||
right = left.copy()
|
||||
|
||||
# axis=0
|
||||
result = left._concat_same_type([left, right], axis=0)
|
||||
expected = data._concat_same_type([data] * 4).reshape(-1, 2)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# axis=1
|
||||
result = left._concat_same_type([left, right], axis=1)
|
||||
assert result.shape == (len(data), 4)
|
||||
tm.assert_extension_array_equal(result[:, :2], left)
|
||||
tm.assert_extension_array_equal(result[:, 2:], right)
|
||||
|
||||
# axis > 1 -> invalid
|
||||
msg = "axis 2 is out of bounds for array of dimension 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left._concat_same_type([left, right], axis=2)
|
||||
|
||||
@pytest.mark.parametrize("method", ["backfill", "pad"])
|
||||
def test_fillna_2d_method(self, data_missing, method):
|
||||
# pad_or_backfill is always along axis=0
|
||||
arr = data_missing.repeat(2).reshape(2, 2)
|
||||
assert arr[0].isna().all()
|
||||
assert not arr[1].isna().any()
|
||||
|
||||
result = arr._pad_or_backfill(method=method, limit=None)
|
||||
|
||||
expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# Reverse so that backfill is not a no-op.
|
||||
arr2 = arr[::-1]
|
||||
assert not arr2[0].isna().any()
|
||||
assert arr2[1].isna().all()
|
||||
|
||||
result2 = arr2._pad_or_backfill(method=method, limit=None)
|
||||
|
||||
expected2 = (
|
||||
data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
|
||||
)
|
||||
tm.assert_extension_array_equal(result2, expected2)
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
||||
def test_reductions_2d_axis_none(self, data, method):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
err_expected = None
|
||||
err_result = None
|
||||
try:
|
||||
expected = getattr(data, method)()
|
||||
except Exception as err:
|
||||
# if the 1D reduction is invalid, the 2D reduction should be as well
|
||||
err_expected = err
|
||||
try:
|
||||
result = getattr(arr2d, method)(axis=None)
|
||||
except Exception as err2:
|
||||
err_result = err2
|
||||
|
||||
else:
|
||||
result = getattr(arr2d, method)(axis=None)
|
||||
|
||||
if err_result is not None or err_expected is not None:
|
||||
assert type(err_result) == type(err_expected)
|
||||
return
|
||||
|
||||
assert is_matching_na(result, expected) or result == expected
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
||||
@pytest.mark.parametrize("min_count", [0, 1])
|
||||
def test_reductions_2d_axis0(self, data, method, min_count):
|
||||
if min_count == 1 and method not in ["sum", "prod"]:
|
||||
pytest.skip(f"min_count not relevant for {method}")
|
||||
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
kwargs = {}
|
||||
if method in ["std", "var"]:
|
||||
# pass ddof=0 so we get all-zero std instead of all-NA std
|
||||
kwargs["ddof"] = 0
|
||||
elif method in ["prod", "sum"]:
|
||||
kwargs["min_count"] = min_count
|
||||
|
||||
try:
|
||||
result = getattr(arr2d, method)(axis=0, **kwargs)
|
||||
except Exception as err:
|
||||
try:
|
||||
getattr(data, method)()
|
||||
except Exception as err2:
|
||||
assert type(err) == type(err2)
|
||||
return
|
||||
else:
|
||||
raise AssertionError("Both reductions should raise or neither")
|
||||
|
||||
def get_reduction_result_dtype(dtype):
|
||||
# windows and 32bit builds will in some cases have int32/uint32
|
||||
# where other builds will have int64/uint64.
|
||||
if dtype.itemsize == 8:
|
||||
return dtype
|
||||
elif dtype.kind in "ib":
|
||||
return NUMPY_INT_TO_DTYPE[np.dtype(int)]
|
||||
else:
|
||||
# i.e. dtype.kind == "u"
|
||||
return NUMPY_INT_TO_DTYPE[np.dtype("uint")]
|
||||
|
||||
if method in ["sum", "prod"]:
|
||||
# std and var are not dtype-preserving
|
||||
expected = data
|
||||
if data.dtype.kind in "iub":
|
||||
dtype = get_reduction_result_dtype(data.dtype)
|
||||
expected = data.astype(dtype)
|
||||
assert dtype == expected.dtype
|
||||
|
||||
if min_count == 0:
|
||||
fill_value = 1 if method == "prod" else 0
|
||||
expected = expected.fillna(fill_value)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
elif method == "median":
|
||||
# std and var are not dtype-preserving
|
||||
expected = data
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
elif method in ["mean", "std", "var"]:
|
||||
if is_integer_dtype(data) or is_bool_dtype(data):
|
||||
data = data.astype("Float64")
|
||||
if method == "mean":
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
else:
|
||||
tm.assert_extension_array_equal(result, data - data)
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
||||
def test_reductions_2d_axis1(self, data, method):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
try:
|
||||
result = getattr(arr2d, method)(axis=1)
|
||||
except Exception as err:
|
||||
try:
|
||||
getattr(data, method)()
|
||||
except Exception as err2:
|
||||
assert type(err) == type(err2)
|
||||
return
|
||||
else:
|
||||
raise AssertionError("Both reductions should raise or neither")
|
||||
|
||||
# not necessarily type/dtype-preserving, so weaker assertions
|
||||
assert result.shape == (1,)
|
||||
expected_scalar = getattr(data, method)()
|
||||
res = result[0]
|
||||
assert is_matching_na(res, expected_scalar) or res == expected_scalar
|
||||
|
||||
|
||||
class NDArrayBacked2DTests(Dim2CompatTests):
|
||||
# More specific tests for NDArrayBackedExtensionArray subclasses
|
||||
|
||||
def test_copy_order(self, data):
|
||||
# We should be matching numpy semantics for the "order" keyword in 'copy'
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
assert arr2d._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.copy()
|
||||
assert res._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d[::2, ::2].copy()
|
||||
assert res._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.copy("F")
|
||||
assert not res._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert res._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.copy("K")
|
||||
assert res._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.T.copy("K")
|
||||
assert not res._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert res._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
# order not accepted by numpy
|
||||
msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr2d.copy("Q")
|
||||
|
||||
# neither contiguity
|
||||
arr_nc = arr2d[::2]
|
||||
assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]
|
@ -0,0 +1,123 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import (
|
||||
infer_dtype,
|
||||
is_object_dtype,
|
||||
is_string_dtype,
|
||||
)
|
||||
|
||||
|
||||
class BaseDtypeTests:
|
||||
"""Base class for ExtensionDtype classes"""
|
||||
|
||||
def test_name(self, dtype):
|
||||
assert isinstance(dtype.name, str)
|
||||
|
||||
def test_kind(self, dtype):
|
||||
valid = set("biufcmMOSUV")
|
||||
assert dtype.kind in valid
|
||||
|
||||
def test_is_dtype_from_name(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype.name)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_unboxes_dtype(self, data, dtype):
|
||||
assert dtype.is_dtype(data) is True
|
||||
|
||||
def test_is_dtype_from_self(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_other_input(self, dtype):
|
||||
assert dtype.is_dtype([1, 2, 3]) is False
|
||||
|
||||
def test_is_not_string_type(self, dtype):
|
||||
assert not is_string_dtype(dtype)
|
||||
|
||||
def test_is_not_object_type(self, dtype):
|
||||
assert not is_object_dtype(dtype)
|
||||
|
||||
def test_eq_with_str(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != dtype.name + "-suffix"
|
||||
|
||||
def test_eq_with_numpy_object(self, dtype):
|
||||
assert dtype != np.dtype("object")
|
||||
|
||||
def test_eq_with_self(self, dtype):
|
||||
assert dtype == dtype
|
||||
assert dtype != object()
|
||||
|
||||
def test_array_type(self, data, dtype):
|
||||
assert dtype.construct_array_type() is type(data)
|
||||
|
||||
def test_check_dtype(self, data):
|
||||
dtype = data.dtype
|
||||
|
||||
# check equivalency for using .dtypes
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series(data, dtype=dtype),
|
||||
"B": data,
|
||||
"C": pd.Series(["foo"] * len(data), dtype=object),
|
||||
"D": 1,
|
||||
}
|
||||
)
|
||||
result = df.dtypes == str(dtype)
|
||||
assert np.dtype("int64") != "Int64"
|
||||
|
||||
expected = pd.Series([True, True, False, False], index=list("ABCD"))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series([True, True, False, False], index=list("ABCD"))
|
||||
result = df.dtypes.apply(str) == str(dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_hashable(self, dtype):
|
||||
hash(dtype) # no error
|
||||
|
||||
def test_str(self, dtype):
|
||||
assert str(dtype) == dtype.name
|
||||
|
||||
def test_eq(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != "anonther_type"
|
||||
|
||||
def test_construct_from_string_own_name(self, dtype):
|
||||
result = dtype.construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
# check OK as classmethod
|
||||
result = type(dtype).construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
def test_construct_from_string_another_type_raises(self, dtype):
|
||||
msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
type(dtype).construct_from_string("another_type")
|
||||
|
||||
def test_construct_from_string_wrong_type_raises(self, dtype):
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match="'construct_from_string' expects a string, got <class 'int'>",
|
||||
):
|
||||
type(dtype).construct_from_string(0)
|
||||
|
||||
def test_get_common_dtype(self, dtype):
|
||||
# in practice we will not typically call this with a 1-length list
|
||||
# (we shortcut to just use that dtype as the common dtype), but
|
||||
# still testing as good practice to have this working (and it is the
|
||||
# only case we can test in general)
|
||||
assert dtype._get_common_dtype([dtype]) == dtype
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_infer_dtype(self, data, data_missing, skipna):
|
||||
# only testing that this works without raising an error
|
||||
res = infer_dtype(data, skipna=skipna)
|
||||
assert isinstance(res, str)
|
||||
res = infer_dtype(data_missing, skipna=skipna)
|
||||
assert isinstance(res, str)
|
@ -0,0 +1,469 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseGetitemTests:
|
||||
"""Tests for ExtensionArray.__getitem__."""
|
||||
|
||||
def test_iloc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.iloc[:4]
|
||||
expected = pd.Series(data[:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.iloc[[0, 1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.iloc[:4, [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.iloc[[0, 1, 2, 3], [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name="A")
|
||||
|
||||
# slice -> series
|
||||
result = df.iloc[:4, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.iloc[:4, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH#32959 slice columns with step
|
||||
result = df.iloc[:, ::2]
|
||||
tm.assert_frame_equal(result, df[["A"]])
|
||||
result = df[["B", "A"]].iloc[:, ::2]
|
||||
tm.assert_frame_equal(result, df[["B"]])
|
||||
|
||||
def test_iloc_frame_single_block(self, data):
|
||||
# GH#32959 null slice along index, slice along columns with single-block
|
||||
df = pd.DataFrame({"A": data})
|
||||
|
||||
result = df.iloc[:, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, :1]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, :2]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, ::2]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, 1:2]
|
||||
tm.assert_frame_equal(result, df.iloc[:, :0])
|
||||
|
||||
result = df.iloc[:, -1:]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_loc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.loc[:3]
|
||||
expected = pd.Series(data[:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.loc[[0, 1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.loc[:3, ["A"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.loc[[0, 1, 2, 3], ["A"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name="A")
|
||||
|
||||
# slice -> series
|
||||
result = df.loc[:3, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.loc[:3, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_iloc_frame_single_dtype(self, data):
|
||||
# GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly
|
||||
# return a scalar
|
||||
df = pd.DataFrame({"A": data})
|
||||
expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype)
|
||||
|
||||
result = df.loc[2]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series(
|
||||
[data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype
|
||||
)
|
||||
result = df.iloc[-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_scalar(self, data):
|
||||
result = data[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
result = pd.Series(data)[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
def test_getitem_invalid(self, data):
|
||||
# TODO: box over scalar, [scalar], (scalar,)?
|
||||
|
||||
msg = (
|
||||
r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis "
|
||||
r"\(`None`\) and integer or boolean arrays are valid indices"
|
||||
)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data["foo"]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[2.5]
|
||||
|
||||
ub = len(data)
|
||||
msg = "|".join(
|
||||
[
|
||||
"list index out of range", # json
|
||||
"index out of bounds", # pyarrow
|
||||
"Out of bounds access", # Sparse
|
||||
f"loc must be an integer between -{ub} and {ub}", # Sparse
|
||||
f"index {ub+1} is out of bounds for axis 0 with size {ub}",
|
||||
f"index -{ub+1} is out of bounds for axis 0 with size {ub}",
|
||||
]
|
||||
)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[ub + 1]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[-ub - 1]
|
||||
|
||||
def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
|
||||
result = data_missing[0]
|
||||
assert na_cmp(result, na_value)
|
||||
|
||||
def test_getitem_empty(self, data):
|
||||
# Indexing with empty list
|
||||
result = data[[]]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
expected = data[np.array([], dtype="int64")]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_getitem_mask(self, data):
|
||||
# Empty mask, raw array
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# Empty mask, in series
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
# non-empty mask, raw array
|
||||
mask[0] = True
|
||||
result = data[mask]
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# non-empty mask, in series
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 1
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
def test_getitem_mask_raises(self, data):
|
||||
mask = np.array([True, False])
|
||||
msg = f"Boolean index has wrong length: 2 instead of {len(data)}"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[mask]
|
||||
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[mask]
|
||||
|
||||
def test_getitem_boolean_array_mask(self, data):
|
||||
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
mask[:5] = True
|
||||
expected = data.take([0, 1, 2, 3, 4])
|
||||
result = data[mask]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
expected = pd.Series(expected)
|
||||
result = pd.Series(data)[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_boolean_na_treated_as_false(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
||||
mask[:2] = pd.NA
|
||||
mask[2:4] = True
|
||||
|
||||
result = data[mask]
|
||||
expected = data[mask.fillna(False)]
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(data)
|
||||
|
||||
result = s[mask]
|
||||
expected = s[mask.fillna(False)]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_getitem_integer_array(self, data, idx):
|
||||
result = data[idx]
|
||||
assert len(result) == 3
|
||||
assert isinstance(result, type(data))
|
||||
expected = data.take([0, 1, 2])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
expected = pd.Series(expected)
|
||||
result = pd.Series(data)[idx]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
|
||||
ids=["list", "integer-array"],
|
||||
)
|
||||
def test_getitem_integer_with_missing_raises(self, data, idx):
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data[idx]
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="Tries label-based and raises KeyError; "
|
||||
"in some cases raises when calling np.asarray"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
|
||||
ids=["list", "integer-array"],
|
||||
)
|
||||
def test_getitem_series_integer_with_missing_raises(self, data, idx):
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
# TODO: this raises KeyError about labels not found (it tries label-based)
|
||||
|
||||
ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[idx]
|
||||
|
||||
def test_getitem_slice(self, data):
|
||||
# getitem[slice] should return an array
|
||||
result = data[slice(0)] # empty
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = data[slice(1)] # scalar
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_getitem_ellipsis_and_slice(self, data):
|
||||
# GH#40353 this is called from slice_block_rows
|
||||
result = data[..., :]
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data[:, ...]
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data[..., :3]
|
||||
tm.assert_extension_array_equal(result, data[:3])
|
||||
|
||||
result = data[:3, ...]
|
||||
tm.assert_extension_array_equal(result, data[:3])
|
||||
|
||||
result = data[..., ::2]
|
||||
tm.assert_extension_array_equal(result, data[::2])
|
||||
|
||||
result = data[::2, ...]
|
||||
tm.assert_extension_array_equal(result, data[::2])
|
||||
|
||||
def test_get(self, data):
|
||||
# GH 20882
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = pd.Series(data[:6], index=list("abcdef"))
|
||||
assert s.get("c") == s.iloc[2]
|
||||
|
||||
result = s.get(slice("b", "d"))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get("Z")
|
||||
assert result is None
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(data)
|
||||
with tm.assert_produces_warning(None):
|
||||
# GH#45324 make sure we aren't giving a spurious FutureWarning
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
def test_take_sequence(self, data):
|
||||
result = pd.Series(data)[[0, 1, 3]]
|
||||
assert result.iloc[0] == data[0]
|
||||
assert result.iloc[1] == data[1]
|
||||
assert result.iloc[2] == data[3]
|
||||
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
result = data.take([0, -1])
|
||||
assert result.dtype == data.dtype
|
||||
assert result[0] == data[0]
|
||||
assert result[1] == data[-1]
|
||||
|
||||
result = data.take([0, -1], allow_fill=True, fill_value=na_value)
|
||||
assert result[0] == data[0]
|
||||
assert na_cmp(result[1], na_value)
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds"):
|
||||
data.take([len(data) + 1])
|
||||
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
empty = data[:0]
|
||||
|
||||
result = empty.take([-1], allow_fill=True)
|
||||
assert na_cmp(result[0], na_value)
|
||||
|
||||
msg = "cannot do a non-empty take from an empty axes|out of bounds"
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
empty.take([-1])
|
||||
|
||||
with pytest.raises(IndexError, match="cannot do a non-empty take"):
|
||||
empty.take([0, 1])
|
||||
|
||||
def test_take_negative(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20640
|
||||
n = len(data)
|
||||
result = data.take([0, -n, n - 1, -1])
|
||||
expected = data.take([0, 0, n - 1, n - 1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
fill_value = data_missing[1] # valid
|
||||
na = data_missing[0]
|
||||
|
||||
arr = data_missing._from_sequence(
|
||||
[na, fill_value, na], dtype=data_missing.dtype
|
||||
)
|
||||
result = arr.take([-1, 1], fill_value=fill_value, allow_fill=True)
|
||||
expected = arr.take([1, 1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
with pytest.raises(ValueError, match=""):
|
||||
data.take([0, -2], fill_value=na_value, allow_fill=True)
|
||||
|
||||
@pytest.mark.parametrize("allow_fill", [True, False])
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
arr = data[:3]
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds|out-of-bounds"):
|
||||
arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
|
||||
|
||||
def test_take_series(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.take([0, -1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
|
||||
index=[0, len(data) - 1],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
s = pd.Series(data)
|
||||
result = s.reindex([0, 1, 3])
|
||||
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
n = len(data)
|
||||
result = s.reindex([-1, 0, n])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, data[0], na_value], dtype=s.dtype),
|
||||
index=[-1, 0, n],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.reindex([n, n + 1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
na = data_missing[0]
|
||||
|
||||
arr = data_missing._from_sequence([na, valid], dtype=data_missing.dtype)
|
||||
ser = pd.Series(arr)
|
||||
result = ser.reindex([0, 1, 2], fill_value=valid)
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence([na, valid, valid], dtype=data_missing.dtype)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_len1(self, data):
|
||||
# see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim
|
||||
df = pd.DataFrame({"A": data})
|
||||
res = df.loc[[0], "A"]
|
||||
assert res.ndim == 1
|
||||
assert res._mgr.arrays[0].ndim == 1
|
||||
if hasattr(res._mgr, "blocks"):
|
||||
assert res._mgr._block.ndim == 1
|
||||
|
||||
def test_item(self, data):
|
||||
# https://github.com/pandas-dev/pandas/pull/30175
|
||||
s = pd.Series(data)
|
||||
result = s[:1].item()
|
||||
assert result == data[0]
|
||||
|
||||
msg = "can only convert an array of size 1 to a Python scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[:0].item()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.item()
|
@ -0,0 +1,174 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_numeric_dtype,
|
||||
is_object_dtype,
|
||||
is_string_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The default of observed=False is deprecated:FutureWarning"
|
||||
)
|
||||
class BaseGroupbyTests:
|
||||
"""Groupby-specific tests."""
|
||||
|
||||
def test_grouping_grouper(self, data_for_grouping):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series(
|
||||
["B", "B", None, None, "A", "A", "B", "C"], dtype=object
|
||||
),
|
||||
"B": data_for_grouping,
|
||||
}
|
||||
)
|
||||
gr1 = df.groupby("A")._grouper.groupings[0]
|
||||
gr2 = df.groupby("B")._grouper.groupings[0]
|
||||
|
||||
tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
|
||||
tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values, and the final entry has c==b
|
||||
# (see data_for_grouping docstring)
|
||||
df = df.iloc[:-1]
|
||||
|
||||
result = df.groupby("B", as_index=as_index).A.mean()
|
||||
_, uniques = pd.factorize(data_for_grouping, sort=True)
|
||||
|
||||
exp_vals = [3.0, 1.0, 4.0]
|
||||
if is_bool:
|
||||
exp_vals = exp_vals[:-1]
|
||||
if as_index:
|
||||
index = pd.Index(uniques, name="B")
|
||||
expected = pd.Series(exp_vals, index=index, name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
expected = pd.DataFrame({"B": uniques, "A": exp_vals})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_agg_extension(self, data_for_grouping):
|
||||
# GH#38980 groupby agg on extension type fails for non-numeric types
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
|
||||
expected = df.iloc[[0, 2, 4, 7]]
|
||||
expected = expected.set_index("A")
|
||||
|
||||
result = df.groupby("A").agg({"B": "first"})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.groupby("A").agg("first")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.groupby("A").first()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_no_sort(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values, and the final entry has c==b
|
||||
# (see data_for_grouping docstring)
|
||||
df = df.iloc[:-1]
|
||||
|
||||
result = df.groupby("B", sort=False).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=False)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
exp_vals = [1.0, 3.0, 4.0]
|
||||
if is_bool:
|
||||
exp_vals = exp_vals[:-1]
|
||||
expected = pd.Series(exp_vals, index=index, name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
|
||||
valid = data_for_grouping[~data_for_grouping.isna()]
|
||||
df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid})
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values, and the final entry has c==b
|
||||
# (see data_for_grouping docstring)
|
||||
df = df.iloc[:-1]
|
||||
|
||||
result = df.groupby("B").A.transform(len)
|
||||
expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
|
||||
if is_bool:
|
||||
expected = expected[:-1]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
df.groupby("B", group_keys=False, observed=False).apply(groupby_apply_op)
|
||||
df.groupby("B", group_keys=False, observed=False).A.apply(groupby_apply_op)
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
df.groupby("A", group_keys=False, observed=False).apply(groupby_apply_op)
|
||||
df.groupby("A", group_keys=False, observed=False).B.apply(groupby_apply_op)
|
||||
|
||||
def test_groupby_apply_identity(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
result = df.groupby("A").B.apply(lambda x: x.array)
|
||||
expected = pd.Series(
|
||||
[
|
||||
df.B.iloc[[0, 1, 6]].array,
|
||||
df.B.iloc[[2, 3]].array,
|
||||
df.B.iloc[[4, 5]].array,
|
||||
df.B.iloc[[7]].array,
|
||||
],
|
||||
index=pd.Index([1, 2, 3, 4], name="A"),
|
||||
name="B",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_in_numeric_groupby(self, data_for_grouping):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping,
|
||||
"C": [1, 1, 1, 1, 1, 1, 1, 1],
|
||||
}
|
||||
)
|
||||
|
||||
dtype = data_for_grouping.dtype
|
||||
if (
|
||||
is_numeric_dtype(dtype)
|
||||
or is_bool_dtype(dtype)
|
||||
or dtype.name == "decimal"
|
||||
or is_string_dtype(dtype)
|
||||
or is_object_dtype(dtype)
|
||||
or dtype.kind == "m" # in particular duration[*][pyarrow]
|
||||
):
|
||||
expected = pd.Index(["B", "C"])
|
||||
result = df.groupby("A").sum().columns
|
||||
else:
|
||||
expected = pd.Index(["C"])
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
# period/datetime
|
||||
"does not support sum operations",
|
||||
# all others
|
||||
re.escape(f"agg function failed [how->sum,dtype->{dtype}"),
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby("A").sum()
|
||||
result = df.groupby("A").sum(numeric_only=True).columns
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,19 @@
|
||||
"""
|
||||
Tests for Indexes backed by arbitrary ExtensionArrays.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class BaseIndexTests:
|
||||
"""Tests for Index object backed by an ExtensionArray"""
|
||||
|
||||
def test_index_from_array(self, data):
|
||||
idx = pd.Index(data)
|
||||
assert data.dtype == idx.dtype
|
||||
|
||||
def test_index_from_listlike_with_dtype(self, data):
|
||||
idx = pd.Index(data, dtype=data.dtype)
|
||||
assert idx.dtype == data.dtype
|
||||
|
||||
idx = pd.Index(list(data), dtype=data.dtype)
|
||||
assert idx.dtype == data.dtype
|
@ -0,0 +1,137 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseInterfaceTests:
|
||||
"""Tests that the basic interface is satisfied."""
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Interface
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def test_len(self, data):
|
||||
assert len(data) == 100
|
||||
|
||||
def test_size(self, data):
|
||||
assert data.size == 100
|
||||
|
||||
def test_ndim(self, data):
|
||||
assert data.ndim == 1
|
||||
|
||||
def test_can_hold_na_valid(self, data):
|
||||
# GH-20761
|
||||
assert data._can_hold_na is True
|
||||
|
||||
def test_contains(self, data, data_missing):
|
||||
# GH-37867
|
||||
# Tests for membership checks. Membership checks for nan-likes is tricky and
|
||||
# the settled on rule is: `nan_like in arr` is True if nan_like is
|
||||
# arr.dtype.na_value and arr.isna().any() is True. Else the check returns False.
|
||||
|
||||
na_value = data.dtype.na_value
|
||||
# ensure data without missing values
|
||||
data = data[~data.isna()]
|
||||
|
||||
# first elements are non-missing
|
||||
assert data[0] in data
|
||||
assert data_missing[0] in data_missing
|
||||
|
||||
# check the presence of na_value
|
||||
assert na_value in data_missing
|
||||
assert na_value not in data
|
||||
|
||||
# the data can never contain other nan-likes than na_value
|
||||
for na_value_obj in tm.NULL_OBJECTS:
|
||||
if na_value_obj is na_value or type(na_value_obj) == type(na_value):
|
||||
# type check for e.g. two instances of Decimal("NAN")
|
||||
continue
|
||||
assert na_value_obj not in data
|
||||
assert na_value_obj not in data_missing
|
||||
|
||||
def test_memory_usage(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.memory_usage(index=False)
|
||||
assert result == s.nbytes
|
||||
|
||||
def test_array_interface(self, data):
|
||||
result = np.array(data)
|
||||
assert result[0] == data[0]
|
||||
|
||||
result = np.array(data, dtype=object)
|
||||
expected = np.array(list(data), dtype=object)
|
||||
if expected.ndim > 1:
|
||||
# nested data, explicitly construct as 1D
|
||||
expected = construct_1d_object_array_from_listlike(list(data))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_is_extension_array_dtype(self, data):
|
||||
assert is_extension_array_dtype(data)
|
||||
assert is_extension_array_dtype(data.dtype)
|
||||
assert is_extension_array_dtype(pd.Series(data))
|
||||
assert isinstance(data.dtype, ExtensionDtype)
|
||||
|
||||
def test_no_values_attribute(self, data):
|
||||
# GH-20735: EA's with .values attribute give problems with internal
|
||||
# code, disallowing this for now until solved
|
||||
assert not hasattr(data, "values")
|
||||
assert not hasattr(data, "_values")
|
||||
|
||||
def test_is_numeric_honored(self, data):
|
||||
result = pd.Series(data)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric
|
||||
|
||||
def test_isna_extension_array(self, data_missing):
|
||||
# If your `isna` returns an ExtensionArray, you must also implement
|
||||
# _reduce. At the *very* least, you must implement any and all
|
||||
na = data_missing.isna()
|
||||
if is_extension_array_dtype(na):
|
||||
assert na._reduce("any")
|
||||
assert na.any()
|
||||
|
||||
assert not na._reduce("all")
|
||||
assert not na.all()
|
||||
|
||||
assert na.dtype._is_boolean
|
||||
|
||||
def test_copy(self, data):
|
||||
# GH#27083 removing deep keyword from EA.copy
|
||||
assert data[0] != data[1]
|
||||
result = data.copy()
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(f"test_copy assumes mutability and {data.dtype} is immutable")
|
||||
|
||||
data[1] = data[0]
|
||||
assert result[1] != result[0]
|
||||
|
||||
def test_view(self, data):
|
||||
# view with no dtype should return a shallow copy, *not* the same
|
||||
# object
|
||||
assert data[1] != data[0]
|
||||
|
||||
result = data.view()
|
||||
assert result is not data
|
||||
assert type(result) == type(data)
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(f"test_view assumes mutability and {data.dtype} is immutable")
|
||||
|
||||
result[1] = result[0]
|
||||
assert data[1] == data[0]
|
||||
|
||||
# check specifically that the `dtype` kwarg is accepted
|
||||
data.view(dtype=None)
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = data.tolist()
|
||||
expected = list(data)
|
||||
assert isinstance(result, list)
|
||||
assert result == expected
|
@ -0,0 +1,39 @@
|
||||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
|
||||
class BaseParsingTests:
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data, request):
|
||||
if isinstance(data.dtype, pd.CategoricalDtype):
|
||||
# in parsers.pyx _convert_with_dtype there is special-casing for
|
||||
# Categorical that pre-empts _from_sequence_of_strings
|
||||
pass
|
||||
elif isinstance(data.dtype, pd.core.dtypes.dtypes.NumpyEADtype):
|
||||
# These get unwrapped internally so are treated as numpy dtypes
|
||||
# in the parsers.pyx code
|
||||
pass
|
||||
elif (
|
||||
type(data)._from_sequence_of_strings.__func__
|
||||
is ExtensionArray._from_sequence_of_strings.__func__
|
||||
):
|
||||
# i.e. the EA hasn't overridden _from_sequence_of_strings
|
||||
mark = pytest.mark.xfail(
|
||||
reason="_from_sequence_of_strings not implemented",
|
||||
raises=NotImplementedError,
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
|
||||
df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
|
||||
csv_output = df.to_csv(index=False, na_rep=np.nan)
|
||||
result = pd.read_csv(
|
||||
StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine
|
||||
)
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,720 @@
|
||||
import inspect
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._typing import Dtype
|
||||
|
||||
from pandas.core.dtypes.common import is_bool_dtype
|
||||
from pandas.core.dtypes.dtypes import NumpyEADtype
|
||||
from pandas.core.dtypes.missing import na_value_for_dtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.sorting import nargsort
|
||||
|
||||
|
||||
class BaseMethodsTests:
|
||||
"""Various Series and DataFrame methods."""
|
||||
|
||||
def test_hash_pandas_object(self, data):
|
||||
# _hash_pandas_object should return a uint64 ndarray of the same length
|
||||
# as the data
|
||||
from pandas.core.util.hashing import _default_hash_key
|
||||
|
||||
res = data._hash_pandas_object(
|
||||
encoding="utf-8", hash_key=_default_hash_key, categorize=False
|
||||
)
|
||||
assert res.dtype == np.uint64
|
||||
assert res.shape == data.shape
|
||||
|
||||
def test_value_counts_default_dropna(self, data):
|
||||
# make sure we have consistent default dropna kwarg
|
||||
if not hasattr(data, "value_counts"):
|
||||
pytest.skip(f"value_counts is not implemented for {type(data)}")
|
||||
sig = inspect.signature(data.value_counts)
|
||||
kwarg = sig.parameters["dropna"]
|
||||
assert kwarg.default is True
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_value_counts_with_normalize(self, data):
|
||||
# GH 33172
|
||||
data = data[:10].unique()
|
||||
values = np.array(data[~data.isna()])
|
||||
ser = pd.Series(data, dtype=data.dtype)
|
||||
|
||||
result = ser.value_counts(normalize=True).sort_index()
|
||||
|
||||
if not isinstance(data, pd.Categorical):
|
||||
expected = pd.Series(
|
||||
[1 / len(values)] * len(values), index=result.index, name="proportion"
|
||||
)
|
||||
else:
|
||||
expected = pd.Series(0.0, index=result.index, name="proportion")
|
||||
expected[result > 0] = 1 / len(values)
|
||||
|
||||
if getattr(data.dtype, "storage", "") == "pyarrow" or isinstance(
|
||||
data.dtype, pd.ArrowDtype
|
||||
):
|
||||
# TODO: avoid special-casing
|
||||
expected = expected.astype("double[pyarrow]")
|
||||
elif getattr(data.dtype, "storage", "") == "pyarrow_numpy":
|
||||
# TODO: avoid special-casing
|
||||
expected = expected.astype("float64")
|
||||
elif na_value_for_dtype(data.dtype) is pd.NA:
|
||||
# TODO(GH#44692): avoid special-casing
|
||||
expected = expected.astype("Float64")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_count(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df.count(axis="columns")
|
||||
expected = pd.Series([0, 1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_count(self, data_missing):
|
||||
# GH#26835
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.count()
|
||||
expected = 1
|
||||
assert result == expected
|
||||
|
||||
def test_apply_simple_series(self, data):
|
||||
result = pd.Series(data).apply(id)
|
||||
assert isinstance(result, pd.Series)
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map(self, data_missing, na_action):
|
||||
result = data_missing.map(lambda x: x, na_action=na_action)
|
||||
expected = data_missing.to_numpy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_argsort(self, data_for_sorting):
|
||||
result = pd.Series(data_for_sorting).argsort()
|
||||
# argsort result gets passed to take, so should be np.intp
|
||||
expected = pd.Series(np.array([2, 0, 1], dtype=np.intp))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_missing_array(self, data_missing_for_sorting):
|
||||
result = data_missing_for_sorting.argsort()
|
||||
# argsort result gets passed to take, so should be np.intp
|
||||
expected = np.array([2, 0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
msg = "The behavior of Series.argsort in the presence of NA values"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = pd.Series(data_missing_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([1, -1, 0], dtype=np.intp))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value):
|
||||
# GH 24382
|
||||
is_bool = data_for_sorting.dtype._is_boolean
|
||||
|
||||
exp_argmax = 1
|
||||
exp_argmax_repeated = 3
|
||||
if is_bool:
|
||||
# See data_for_sorting docstring
|
||||
exp_argmax = 0
|
||||
exp_argmax_repeated = 1
|
||||
|
||||
# data_for_sorting -> [B, C, A] with A < B < C
|
||||
assert data_for_sorting.argmax() == exp_argmax
|
||||
assert data_for_sorting.argmin() == 2
|
||||
|
||||
# with repeated values -> first occurrence
|
||||
data = data_for_sorting.take([2, 0, 0, 1, 1, 2])
|
||||
assert data.argmax() == exp_argmax_repeated
|
||||
assert data.argmin() == 0
|
||||
|
||||
# with missing values
|
||||
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
|
||||
assert data_missing_for_sorting.argmax() == 0
|
||||
assert data_missing_for_sorting.argmin() == 2
|
||||
|
||||
@pytest.mark.parametrize("method", ["argmax", "argmin"])
|
||||
def test_argmin_argmax_empty_array(self, method, data):
|
||||
# GH 24382
|
||||
err_msg = "attempt to get"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
getattr(data[:0], method)()
|
||||
|
||||
@pytest.mark.parametrize("method", ["argmax", "argmin"])
|
||||
def test_argmin_argmax_all_na(self, method, data, na_value):
|
||||
# all missing with skipna=True is the same as empty
|
||||
err_msg = "attempt to get"
|
||||
data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype)
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
getattr(data_na, method)()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_name, skipna, expected",
|
||||
[
|
||||
("idxmax", True, 0),
|
||||
("idxmin", True, 2),
|
||||
("argmax", True, 0),
|
||||
("argmin", True, 2),
|
||||
("idxmax", False, np.nan),
|
||||
("idxmin", False, np.nan),
|
||||
("argmax", False, -1),
|
||||
("argmin", False, -1),
|
||||
],
|
||||
)
|
||||
def test_argreduce_series(
|
||||
self, data_missing_for_sorting, op_name, skipna, expected
|
||||
):
|
||||
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
|
||||
warn = None
|
||||
msg = "The behavior of Series.argmax/argmin"
|
||||
if op_name.startswith("arg") and expected == -1:
|
||||
warn = FutureWarning
|
||||
if op_name.startswith("idx") and np.isnan(expected):
|
||||
warn = FutureWarning
|
||||
msg = f"The behavior of Series.{op_name}"
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting):
|
||||
# GH#38733
|
||||
data = data_missing_for_sorting
|
||||
|
||||
with pytest.raises(NotImplementedError, match=""):
|
||||
data.argmin(skipna=False)
|
||||
|
||||
with pytest.raises(NotImplementedError, match=""):
|
||||
data.argmax(skipna=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_position, expected",
|
||||
[
|
||||
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
|
||||
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
|
||||
],
|
||||
)
|
||||
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
|
||||
# GH 25439
|
||||
result = nargsort(data_missing_for_sorting, na_position=na_position)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
|
||||
ser = pd.Series(data_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending, key=sort_by_key)
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
if not ascending:
|
||||
# GH 35922. Expect stable sort
|
||||
if ser.nunique() == 2:
|
||||
expected = ser.iloc[[0, 1, 2]]
|
||||
else:
|
||||
expected = ser.iloc[[1, 0, 2]]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_missing(
|
||||
self, data_missing_for_sorting, ascending, sort_by_key
|
||||
):
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending, key=sort_by_key)
|
||||
if ascending:
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
else:
|
||||
expected = ser.iloc[[0, 2, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting})
|
||||
result = df.sort_values(["A", "B"])
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("keep", ["first", "last", False])
|
||||
def test_duplicated(self, data, keep):
|
||||
arr = data.take([0, 1, 0, 1])
|
||||
result = arr.duplicated(keep=keep)
|
||||
if keep == "first":
|
||||
expected = np.array([False, False, True, True])
|
||||
elif keep == "last":
|
||||
expected = np.array([True, True, False, False])
|
||||
else:
|
||||
expected = np.array([True, True, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
|
||||
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
|
||||
def test_unique(self, data, box, method):
|
||||
duplicated = box(data._from_sequence([data[0], data[0]], dtype=data.dtype))
|
||||
|
||||
result = method(duplicated)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
assert result[0] == duplicated[0]
|
||||
|
||||
def test_factorize(self, data_for_grouping):
|
||||
codes, uniques = pd.factorize(data_for_grouping, use_na_sentinel=True)
|
||||
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values
|
||||
expected_codes = np.array([0, 0, -1, -1, 1, 1, 0, 0], dtype=np.intp)
|
||||
expected_uniques = data_for_grouping.take([0, 4])
|
||||
else:
|
||||
expected_codes = np.array([0, 0, -1, -1, 1, 1, 0, 2], dtype=np.intp)
|
||||
expected_uniques = data_for_grouping.take([0, 4, 7])
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_factorize_equivalence(self, data_for_grouping):
|
||||
codes_1, uniques_1 = pd.factorize(data_for_grouping, use_na_sentinel=True)
|
||||
codes_2, uniques_2 = data_for_grouping.factorize(use_na_sentinel=True)
|
||||
|
||||
tm.assert_numpy_array_equal(codes_1, codes_2)
|
||||
tm.assert_extension_array_equal(uniques_1, uniques_2)
|
||||
assert len(uniques_1) == len(pd.unique(uniques_1))
|
||||
assert uniques_1.dtype == data_for_grouping.dtype
|
||||
|
||||
def test_factorize_empty(self, data):
|
||||
codes, uniques = pd.factorize(data[:0])
|
||||
expected_codes = np.array([], dtype=np.intp)
|
||||
expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
df_orig = df.copy()
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
result.iloc[0, 0] = filled_val
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr, copy=False)
|
||||
ser_orig = ser.copy()
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
result.iloc[0] = filled_val
|
||||
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
# Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool]
|
||||
_combine_le_expected_dtype: Dtype = NumpyEADtype("bool")
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# GH 20825
|
||||
# Test that combine works when doing a <= (le) comparison
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
|
||||
dtype=self._combine_le_expected_dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
[a <= val for a in list(orig_data1)],
|
||||
dtype=self._combine_le_expected_dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
|
||||
# Check if the operation is supported pointwise for our scalars. If not,
|
||||
# we will expect Series.combine to raise as well.
|
||||
try:
|
||||
with np.errstate(over="ignore"):
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence(
|
||||
[a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]
|
||||
)
|
||||
)
|
||||
except TypeError:
|
||||
# If the operation is not supported pointwise for our scalars,
|
||||
# then Series.combine should also raise
|
||||
with pytest.raises(TypeError):
|
||||
s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
return
|
||||
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + val for a in list(orig_data1)])
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/24147
|
||||
a = pd.Series(data[:3])
|
||||
b = pd.Series(data[2:5], index=[2, 3, 4])
|
||||
result = a.combine_first(b)
|
||||
expected = pd.Series(data[:5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"periods, indices",
|
||||
[(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])],
|
||||
)
|
||||
def test_container_shift(self, data, frame, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/22386
|
||||
subset = data[:5]
|
||||
data = pd.Series(subset, name="A")
|
||||
expected = pd.Series(subset.take(indices, allow_fill=True), name="A")
|
||||
|
||||
if frame:
|
||||
result = data.to_frame(name="A").assign(B=1).shift(periods)
|
||||
expected = pd.concat(
|
||||
[expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1
|
||||
)
|
||||
compare = tm.assert_frame_equal
|
||||
else:
|
||||
result = data.shift(periods)
|
||||
compare = tm.assert_series_equal
|
||||
|
||||
compare(result, expected)
|
||||
|
||||
def test_shift_0_periods(self, data):
|
||||
# GH#33856 shifting with periods=0 should return a copy, not same obj
|
||||
result = data.shift(0)
|
||||
assert data[0] != data[1] # otherwise below is invalid
|
||||
data[0] = data[1]
|
||||
assert result[0] != result[1] # i.e. not the same object/view
|
||||
|
||||
@pytest.mark.parametrize("periods", [1, -2])
|
||||
def test_diff(self, data, periods):
|
||||
data = data[:5]
|
||||
if is_bool_dtype(data.dtype):
|
||||
op = operator.xor
|
||||
else:
|
||||
op = operator.sub
|
||||
try:
|
||||
# does this array implement ops?
|
||||
op(data, data)
|
||||
except Exception:
|
||||
pytest.skip(f"{type(data)} does not support diff")
|
||||
s = pd.Series(data)
|
||||
result = s.diff(periods)
|
||||
expected = pd.Series(op(data, data.shift(periods)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = pd.DataFrame({"A": data, "B": [1.0] * 5})
|
||||
result = df.diff(periods)
|
||||
if periods == 1:
|
||||
b = [np.nan, 0, 0, 0, 0]
|
||||
else:
|
||||
b = [0, 0, 0, np.nan, np.nan]
|
||||
expected = pd.DataFrame({"A": expected, "B": b})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods, indices",
|
||||
[[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]],
|
||||
)
|
||||
def test_shift_non_empty_array(self, data, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
subset = data[:2]
|
||||
result = subset.shift(periods)
|
||||
expected = subset.take(indices, allow_fill=True)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4])
|
||||
def test_shift_empty_array(self, data, periods):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
empty = data[:0]
|
||||
result = empty.shift(periods)
|
||||
expected = empty
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_shift_zero_copies(self, data):
|
||||
# GH#31502
|
||||
result = data.shift(0)
|
||||
assert result is not data
|
||||
|
||||
result = data[:0].shift(2)
|
||||
assert result is not data
|
||||
|
||||
def test_shift_fill_value(self, data):
|
||||
arr = data[:4]
|
||||
fill_value = data[0]
|
||||
result = arr.shift(1, fill_value=fill_value)
|
||||
expected = data.take([0, 0, 1, 2])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr.shift(-2, fill_value=fill_value)
|
||||
expected = data.take([2, 3, 0, 0])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_not_hashable(self, data):
|
||||
# We are in general mutable, so not hashable
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
hash(data)
|
||||
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
# https://github.com/pandas-dev/pandas/issues/23066
|
||||
data = pd.Series(data)
|
||||
if as_frame:
|
||||
data = data.to_frame()
|
||||
a = pd.util.hash_pandas_object(data)
|
||||
b = pd.util.hash_pandas_object(data)
|
||||
tm.assert_equal(a, b)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
if data_for_sorting.dtype._is_boolean:
|
||||
return self._test_searchsorted_bool_dtypes(data_for_sorting, as_series)
|
||||
|
||||
b, c, a = data_for_sorting
|
||||
arr = data_for_sorting.take([2, 0, 1]) # to get [a, b, c]
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
assert arr.searchsorted(c) == 2
|
||||
assert arr.searchsorted(c, side="right") == 3
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 2]))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 2, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series):
|
||||
# We call this from test_searchsorted in cases where we have a
|
||||
# boolean-like dtype. The non-bool test assumes we have more than 2
|
||||
# unique values.
|
||||
dtype = data_for_sorting.dtype
|
||||
data_for_sorting = pd.array([True, False], dtype=dtype)
|
||||
b, a = data_for_sorting
|
||||
arr = type(data_for_sorting)._from_sequence([a, b])
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 1]))
|
||||
expected = np.array([0, 1], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
def test_where_series(self, data, na_value, as_frame):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
orig = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
ser = orig.copy()
|
||||
cond = np.array([True, True, False, False])
|
||||
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name="a")
|
||||
cond = cond.reshape(-1, 1)
|
||||
|
||||
result = ser.where(cond)
|
||||
expected = pd.Series(
|
||||
cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype)
|
||||
)
|
||||
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name="a")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
ser.mask(~cond, inplace=True)
|
||||
tm.assert_equal(ser, expected)
|
||||
|
||||
# array other
|
||||
ser = orig.copy()
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name="a")
|
||||
cond = np.array([True, False, True, True])
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
if as_frame:
|
||||
other = pd.DataFrame({"a": other})
|
||||
cond = pd.DataFrame({"a": cond})
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype))
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name="a")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
ser.mask(~cond, other, inplace=True)
|
||||
tm.assert_equal(ser, expected)
|
||||
|
||||
@pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]])
|
||||
def test_repeat(self, data, repeats, as_series, use_numpy):
|
||||
arr = type(data)._from_sequence(data[:3], dtype=data.dtype)
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
|
||||
result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats)
|
||||
|
||||
repeats = [repeats] * 3 if isinstance(repeats, int) else repeats
|
||||
expected = [x for x, n in zip(arr, repeats) for _ in range(n)]
|
||||
expected = type(data)._from_sequence(expected, dtype=data.dtype)
|
||||
if as_series:
|
||||
expected = pd.Series(expected, index=arr.index.repeat(repeats))
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"repeats, kwargs, error, msg",
|
||||
[
|
||||
(2, {"axis": 1}, ValueError, "axis"),
|
||||
(-1, {}, ValueError, "negative"),
|
||||
([1, 2], {}, ValueError, "shape"),
|
||||
(2, {"foo": "bar"}, TypeError, "'foo'"),
|
||||
],
|
||||
)
|
||||
def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy):
|
||||
with pytest.raises(error, match=msg):
|
||||
if use_numpy:
|
||||
np.repeat(data, repeats, **kwargs)
|
||||
else:
|
||||
data.repeat(repeats, **kwargs)
|
||||
|
||||
def test_delete(self, data):
|
||||
result = data.delete(0)
|
||||
expected = data[1:]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = data.delete([1, 3])
|
||||
expected = data._concat_same_type([data[[0]], data[[2]], data[4:]])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_insert(self, data):
|
||||
# insert at the beginning
|
||||
result = data[1:].insert(0, data[0])
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data[1:].insert(-len(data[1:]), data[0])
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
# insert at the middle
|
||||
result = data[:-1].insert(4, data[-1])
|
||||
|
||||
taker = np.arange(len(data))
|
||||
taker[5:] = taker[4:-1]
|
||||
taker[4] = len(data) - 1
|
||||
expected = data.take(taker)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_insert_invalid(self, data, invalid_scalar):
|
||||
item = invalid_scalar
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
data.insert(0, item)
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
data.insert(4, item)
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
data.insert(len(data) - 1, item)
|
||||
|
||||
def test_insert_invalid_loc(self, data):
|
||||
ub = len(data)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
data.insert(ub + 1, data[0])
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
data.insert(-ub - 1, data[0])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
# we expect TypeError here instead of IndexError to match np.insert
|
||||
data.insert(1.5, data[0])
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame])
|
||||
def test_equals(self, data, na_value, as_series, box):
|
||||
data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype)
|
||||
data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype)
|
||||
|
||||
data = tm.box_expected(data, box, transpose=False)
|
||||
data2 = tm.box_expected(data2, box, transpose=False)
|
||||
data_na = tm.box_expected(data_na, box, transpose=False)
|
||||
|
||||
# we are asserting with `is True/False` explicitly, to test that the
|
||||
# result is an actual Python bool, and not something "truthy"
|
||||
|
||||
assert data.equals(data) is True
|
||||
assert data.equals(data.copy()) is True
|
||||
|
||||
# unequal other data
|
||||
assert data.equals(data2) is False
|
||||
assert data.equals(data_na) is False
|
||||
|
||||
# different length
|
||||
assert data[:2].equals(data[:3]) is False
|
||||
|
||||
# empty are equal
|
||||
assert data[:0].equals(data[:0]) is True
|
||||
|
||||
# other types
|
||||
assert data.equals(None) is False
|
||||
assert data[[0]].equals(data[0]) is False
|
||||
|
||||
def test_equals_same_data_different_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/34660
|
||||
assert pd.Series(data).equals(pd.Series(data))
|
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseMissingTests:
|
||||
def test_isna(self, data_missing):
|
||||
expected = np.array([True, False])
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=bool)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("na_func", ["isna", "notna"])
|
||||
def test_isna_returns_copy(self, data_missing, na_func):
|
||||
result = pd.Series(data_missing)
|
||||
expected = result.copy()
|
||||
mask = getattr(result, na_func)()
|
||||
if isinstance(mask.dtype, pd.SparseDtype):
|
||||
# TODO: GH 57739
|
||||
mask = np.array(mask)
|
||||
mask.flags.writeable = True
|
||||
|
||||
mask[:] = True
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_array(self, data_missing):
|
||||
result = data_missing.dropna()
|
||||
expected = data_missing[[1]]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_dropna_series(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[[1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_frame(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object))
|
||||
|
||||
# defaults
|
||||
result = df.dropna()
|
||||
expected = df.iloc[[1]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.dropna(axis="columns")
|
||||
expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple
|
||||
df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]})
|
||||
result = df.dropna()
|
||||
expected = df.iloc[:0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_scalar(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
result = data_missing.fillna(valid)
|
||||
expected = data_missing.fillna(valid)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
|
||||
)
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).ffill(limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"limit_area, input_ilocs, expected_ilocs",
|
||||
[
|
||||
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
|
||||
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
|
||||
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
|
||||
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
|
||||
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
|
||||
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
|
||||
],
|
||||
)
|
||||
def test_ffill_limit_area(
|
||||
self, data_missing, limit_area, input_ilocs, expected_ilocs
|
||||
):
|
||||
# GH#56616
|
||||
arr = data_missing.take(input_ilocs)
|
||||
result = pd.Series(arr).ffill(limit_area=limit_area)
|
||||
expected = pd.Series(data_missing.take(expected_ilocs))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
|
||||
)
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method="backfill", limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_no_op_returns_copy(self, data):
|
||||
data = data[~data.isna()]
|
||||
|
||||
valid = data[0]
|
||||
result = data.fillna(valid)
|
||||
assert result is not data
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data._pad_or_backfill(method="backfill")
|
||||
assert result is not data
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
result = ser.fillna(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
result = ser.fillna(ser)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
if fillna_method == "ffill":
|
||||
data_missing = data_missing[::-1]
|
||||
|
||||
result = getattr(pd.Series(data_missing), fillna_method)()
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
),
|
||||
"B": [1, 2],
|
||||
}
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_fill_other(self, data):
|
||||
result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0})
|
||||
|
||||
expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_use_inf_as_na_no_effect(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
expected = ser.isna()
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("mode.use_inf_as_na", True):
|
||||
result = ser.isna()
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,299 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import final
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.core.dtypes.common import is_string_dtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
|
||||
|
||||
class BaseOpsUtil:
|
||||
series_scalar_exc: type[Exception] | None = TypeError
|
||||
frame_scalar_exc: type[Exception] | None = TypeError
|
||||
series_array_exc: type[Exception] | None = TypeError
|
||||
divmod_exc: type[Exception] | None = TypeError
|
||||
|
||||
def _get_expected_exception(
|
||||
self, op_name: str, obj, other
|
||||
) -> type[Exception] | None:
|
||||
# Find the Exception, if any we expect to raise calling
|
||||
# obj.__op_name__(other)
|
||||
|
||||
# The self.obj_bar_exc pattern isn't great in part because it can depend
|
||||
# on op_name or dtypes, but we use it here for backward-compatibility.
|
||||
if op_name in ["__divmod__", "__rdivmod__"]:
|
||||
result = self.divmod_exc
|
||||
elif isinstance(obj, pd.Series) and isinstance(other, pd.Series):
|
||||
result = self.series_array_exc
|
||||
elif isinstance(obj, pd.Series):
|
||||
result = self.series_scalar_exc
|
||||
else:
|
||||
result = self.frame_scalar_exc
|
||||
|
||||
if using_pyarrow_string_dtype() and result is not None:
|
||||
import pyarrow as pa
|
||||
|
||||
result = ( # type: ignore[assignment]
|
||||
result,
|
||||
pa.lib.ArrowNotImplementedError,
|
||||
NotImplementedError,
|
||||
)
|
||||
return result
|
||||
|
||||
def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
|
||||
# In _check_op we check that the result of a pointwise operation
|
||||
# (found via _combine) matches the result of the vectorized
|
||||
# operation obj.__op_name__(other).
|
||||
# In some cases pandas dtype inference on the scalar result may not
|
||||
# give a matching dtype even if both operations are behaving "correctly".
|
||||
# In these cases, do extra required casting here.
|
||||
return pointwise_result
|
||||
|
||||
def get_op_from_name(self, op_name: str):
|
||||
return tm.get_op_from_name(op_name)
|
||||
|
||||
# Subclasses are not expected to need to override check_opname, _check_op,
|
||||
# _check_divmod_op, or _combine.
|
||||
# Ideally any relevant overriding can be done in _cast_pointwise_result,
|
||||
# get_op_from_name, and the specification of `exc`. If you find a use
|
||||
# case that still requires overriding _check_op or _combine, please let
|
||||
# us know at github.com/pandas-dev/pandas/issues
|
||||
@final
|
||||
def check_opname(self, ser: pd.Series, op_name: str, other):
|
||||
exc = self._get_expected_exception(op_name, ser, other)
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
self._check_op(ser, op, other, op_name, exc)
|
||||
|
||||
# see comment on check_opname
|
||||
@final
|
||||
def _combine(self, obj, other, op):
|
||||
if isinstance(obj, pd.DataFrame):
|
||||
if len(obj.columns) != 1:
|
||||
raise NotImplementedError
|
||||
expected = obj.iloc[:, 0].combine(other, op).to_frame()
|
||||
else:
|
||||
expected = obj.combine(other, op)
|
||||
return expected
|
||||
|
||||
# see comment on check_opname
|
||||
@final
|
||||
def _check_op(
|
||||
self, ser: pd.Series, op, other, op_name: str, exc=NotImplementedError
|
||||
):
|
||||
# Check that the Series/DataFrame arithmetic/comparison method matches
|
||||
# the pointwise result from _combine.
|
||||
|
||||
if exc is None:
|
||||
result = op(ser, other)
|
||||
expected = self._combine(ser, other, op)
|
||||
expected = self._cast_pointwise_result(op_name, ser, other, expected)
|
||||
assert isinstance(result, type(ser))
|
||||
tm.assert_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(ser, other)
|
||||
|
||||
# see comment on check_opname
|
||||
@final
|
||||
def _check_divmod_op(self, ser: pd.Series, op, other):
|
||||
# check that divmod behavior matches behavior of floordiv+mod
|
||||
if op is divmod:
|
||||
exc = self._get_expected_exception("__divmod__", ser, other)
|
||||
else:
|
||||
exc = self._get_expected_exception("__rdivmod__", ser, other)
|
||||
if exc is None:
|
||||
result_div, result_mod = op(ser, other)
|
||||
if op is divmod:
|
||||
expected_div, expected_mod = ser // other, ser % other
|
||||
else:
|
||||
expected_div, expected_mod = other // ser, other % ser
|
||||
tm.assert_series_equal(result_div, expected_div)
|
||||
tm.assert_series_equal(result_mod, expected_mod)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
divmod(ser, other)
|
||||
|
||||
|
||||
class BaseArithmeticOpsTests(BaseOpsUtil):
|
||||
"""
|
||||
Various Series and DataFrame arithmetic ops methods.
|
||||
|
||||
Subclasses supporting various ops should set the class variables
|
||||
to indicate that they support ops of that kind
|
||||
|
||||
* series_scalar_exc = TypeError
|
||||
* frame_scalar_exc = TypeError
|
||||
* series_array_exc = TypeError
|
||||
* divmod_exc = TypeError
|
||||
"""
|
||||
|
||||
series_scalar_exc: type[Exception] | None = TypeError
|
||||
frame_scalar_exc: type[Exception] | None = TypeError
|
||||
series_array_exc: type[Exception] | None = TypeError
|
||||
divmod_exc: type[Exception] | None = TypeError
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# series & scalar
|
||||
if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
|
||||
pytest.skip("Skip testing Python string formatting")
|
||||
|
||||
op_name = all_arithmetic_operators
|
||||
ser = pd.Series(data)
|
||||
self.check_opname(ser, op_name, ser.iloc[0])
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
|
||||
pytest.skip("Skip testing Python string formatting")
|
||||
|
||||
op_name = all_arithmetic_operators
|
||||
df = pd.DataFrame({"A": data})
|
||||
self.check_opname(df, op_name, data[0])
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op_name = all_arithmetic_operators
|
||||
ser = pd.Series(data)
|
||||
self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
|
||||
|
||||
def test_divmod(self, data):
|
||||
ser = pd.Series(data)
|
||||
self._check_divmod_op(ser, divmod, 1)
|
||||
self._check_divmod_op(1, ops.rdivmod, ser)
|
||||
|
||||
def test_divmod_series_array(self, data, data_for_twos):
|
||||
ser = pd.Series(data)
|
||||
self._check_divmod_op(ser, divmod, data)
|
||||
|
||||
other = data_for_twos
|
||||
self._check_divmod_op(other, ops.rdivmod, ser)
|
||||
|
||||
other = pd.Series(other)
|
||||
self._check_divmod_op(other, ops.rdivmod, ser)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
# Check adding an ExtensionArray to a Series of the same dtype matches
|
||||
# the behavior of adding the arrays directly and then wrapping in a
|
||||
# Series.
|
||||
|
||||
ser = pd.Series(data)
|
||||
|
||||
exc = self._get_expected_exception("__add__", ser, data)
|
||||
if exc is not None:
|
||||
with pytest.raises(exc):
|
||||
ser + data
|
||||
return
|
||||
|
||||
result = ser + data
|
||||
expected = pd.Series(data + data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, pd.DataFrame, pd.Index])
|
||||
@pytest.mark.parametrize(
|
||||
"op_name",
|
||||
[
|
||||
x
|
||||
for x in tm.arithmetic_dunder_methods + tm.comparison_dunder_methods
|
||||
if not x.startswith("__r")
|
||||
],
|
||||
)
|
||||
def test_direct_arith_with_ndframe_returns_not_implemented(
|
||||
self, data, box, op_name
|
||||
):
|
||||
# EAs should return NotImplemented for ops with Series/DataFrame/Index
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = box(data)
|
||||
|
||||
if hasattr(data, op_name):
|
||||
result = getattr(data, op_name)(other)
|
||||
assert result is NotImplemented
|
||||
|
||||
|
||||
class BaseComparisonOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame comparison ops methods."""
|
||||
|
||||
def _compare_other(self, ser: pd.Series, data, op, other):
|
||||
if op.__name__ in ["eq", "ne"]:
|
||||
# comparison should match point-wise comparisons
|
||||
result = op(ser, other)
|
||||
expected = ser.combine(other, op)
|
||||
expected = self._cast_pointwise_result(op.__name__, ser, other, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
else:
|
||||
exc = None
|
||||
try:
|
||||
result = op(ser, other)
|
||||
except Exception as err:
|
||||
exc = err
|
||||
|
||||
if exc is None:
|
||||
# Didn't error, then should match pointwise behavior
|
||||
expected = ser.combine(other, op)
|
||||
expected = self._cast_pointwise_result(
|
||||
op.__name__, ser, other, expected
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(type(exc)):
|
||||
ser.combine(other, op)
|
||||
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
self._compare_other(ser, data, comparison_op, 0)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data), dtype=data.dtype)
|
||||
self._compare_other(ser, data, comparison_op, other)
|
||||
|
||||
|
||||
class BaseUnaryOpsTests(BaseOpsUtil):
|
||||
def test_invert(self, data):
|
||||
ser = pd.Series(data, name="name")
|
||||
try:
|
||||
# 10 is an arbitrary choice here, just avoid iterating over
|
||||
# the whole array to trim test runtime
|
||||
[~x for x in data[:10]]
|
||||
except TypeError:
|
||||
# scalars don't support invert -> we don't expect the vectorized
|
||||
# operation to succeed
|
||||
with pytest.raises(TypeError):
|
||||
~ser
|
||||
with pytest.raises(TypeError):
|
||||
~data
|
||||
else:
|
||||
# Note we do not reuse the pointwise result to construct expected
|
||||
# because python semantics for negating bools are weird see GH#54569
|
||||
result = ~ser
|
||||
expected = pd.Series(~data, name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
|
||||
def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
|
||||
# the dunder __pos__ works if and only if np.positive works,
|
||||
# same for __neg__/np.negative and __abs__/np.abs
|
||||
attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[
|
||||
ufunc
|
||||
]
|
||||
|
||||
exc = None
|
||||
try:
|
||||
result = getattr(data, attr)()
|
||||
except Exception as err:
|
||||
exc = err
|
||||
|
||||
# if __pos__ raised, then so should the ufunc
|
||||
with pytest.raises((type(exc), TypeError)):
|
||||
ufunc(data)
|
||||
else:
|
||||
alt = ufunc(data)
|
||||
tm.assert_extension_array_equal(result, alt)
|
@ -0,0 +1,41 @@
|
||||
import io
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class BasePrintingTests:
|
||||
"""Tests checking the formatting of your EA when printed."""
|
||||
|
||||
@pytest.mark.parametrize("size", ["big", "small"])
|
||||
def test_array_repr(self, data, size):
|
||||
if size == "small":
|
||||
data = data[:5]
|
||||
else:
|
||||
data = type(data)._concat_same_type([data] * 5)
|
||||
|
||||
result = repr(data)
|
||||
assert type(data).__name__ in result
|
||||
assert f"Length: {len(data)}" in result
|
||||
assert str(data.dtype) in result
|
||||
if size == "big":
|
||||
assert "..." in result
|
||||
|
||||
def test_array_repr_unicode(self, data):
|
||||
result = str(data)
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_series_repr(self, data):
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
|
||||
def test_dataframe_repr(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
repr(df)
|
||||
|
||||
def test_dtype_name_in_info(self, data):
|
||||
buf = io.StringIO()
|
||||
pd.DataFrame({"A": data}).info(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert data.dtype.name in result
|
@ -0,0 +1,153 @@
|
||||
from typing import final
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
|
||||
|
||||
class BaseReduceTests:
|
||||
"""
|
||||
Reduction specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
# Specify if we expect this reduction to succeed.
|
||||
return False
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
# We perform the same operation on the np.float64 data and check
|
||||
# that the results match. Override if you need to cast to something
|
||||
# other than float64.
|
||||
res_op = getattr(ser, op_name)
|
||||
|
||||
try:
|
||||
alt = ser.astype("float64")
|
||||
except (TypeError, ValueError):
|
||||
# e.g. Interval can't cast (TypeError), StringArray can't cast
|
||||
# (ValueError), so let's cast to object and do
|
||||
# the reduction pointwise
|
||||
alt = ser.astype(object)
|
||||
|
||||
exp_op = getattr(alt, op_name)
|
||||
if op_name == "count":
|
||||
result = res_op()
|
||||
expected = exp_op()
|
||||
else:
|
||||
result = res_op(skipna=skipna)
|
||||
expected = exp_op(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
|
||||
# Find the expected dtype when the given reduction is done on a DataFrame
|
||||
# column with this array. The default assumes float64-like behavior,
|
||||
# i.e. retains the dtype.
|
||||
return arr.dtype
|
||||
|
||||
# We anticipate that authors should not need to override check_reduce_frame,
|
||||
# but should be able to do any necessary overriding in
|
||||
# _get_expected_reduction_dtype. If you have a use case where this
|
||||
# does not hold, please let us know at github.com/pandas-dev/pandas/issues.
|
||||
@final
|
||||
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
# Check that the 2D reduction done in a DataFrame reduction "looks like"
|
||||
# a wrapped version of the 1D reduction done by Series.
|
||||
arr = ser.array
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
|
||||
|
||||
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
|
||||
|
||||
# The DataFrame method just calls arr._reduce with keepdims=True,
|
||||
# so this first check is perfunctory.
|
||||
result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
|
||||
result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
|
||||
tm.assert_extension_array_equal(result1, result2)
|
||||
|
||||
# Check that the 2D reduction looks like a wrapped version of the
|
||||
# 1D reduction
|
||||
if not skipna and ser.isna().any():
|
||||
expected = pd.array([pd.NA], dtype=cmp_dtype)
|
||||
else:
|
||||
exp_value = getattr(ser.dropna(), op_name)()
|
||||
expected = pd.array([exp_value], dtype=cmp_dtype)
|
||||
|
||||
tm.assert_extension_array_equal(result1, expected)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
ser = pd.Series(data)
|
||||
|
||||
if not self._supports_reduction(ser, op_name):
|
||||
# TODO: the message being checked here isn't actually checking anything
|
||||
msg = (
|
||||
"[Cc]annot perform|Categorical is not ordered for operation|"
|
||||
"does not support reduction|"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(ser, op_name)(skipna=skipna)
|
||||
|
||||
else:
|
||||
self.check_reduce(ser, op_name, skipna)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
ser = pd.Series(data)
|
||||
|
||||
if not self._supports_reduction(ser, op_name):
|
||||
# TODO: the message being checked here isn't actually checking anything
|
||||
msg = (
|
||||
"[Cc]annot perform|Categorical is not ordered for operation|"
|
||||
"does not support reduction|"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(ser, op_name)(skipna=skipna)
|
||||
|
||||
else:
|
||||
# min/max with empty produce numpy warnings
|
||||
self.check_reduce(ser, op_name, skipna)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
ser = pd.Series(data)
|
||||
if not is_numeric_dtype(ser.dtype):
|
||||
pytest.skip(f"{ser.dtype} is not numeric dtype")
|
||||
|
||||
if op_name in ["count", "kurt", "sem"]:
|
||||
pytest.skip(f"{op_name} not an array method")
|
||||
|
||||
if not self._supports_reduction(ser, op_name):
|
||||
pytest.skip(f"Reduction {op_name} not supported for this dtype")
|
||||
|
||||
self.check_reduce_frame(ser, op_name, skipna)
|
||||
|
||||
|
||||
# TODO(3.0): remove BaseNoReduceTests, BaseNumericReduceTests,
|
||||
# BaseBooleanReduceTests
|
||||
class BaseNoReduceTests(BaseReduceTests):
|
||||
"""we don't define any reductions"""
|
||||
|
||||
|
||||
class BaseNumericReduceTests(BaseReduceTests):
|
||||
# For backward compatibility only, this only runs the numeric reductions
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if op_name in ["any", "all"]:
|
||||
pytest.skip("These are tested in BaseBooleanReduceTests")
|
||||
return True
|
||||
|
||||
|
||||
class BaseBooleanReduceTests(BaseReduceTests):
|
||||
# For backward compatibility only, this only runs the numeric reductions
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if op_name not in ["any", "all"]:
|
||||
pytest.skip("These are tested in BaseNumericReduceTests")
|
||||
return True
|
@ -0,0 +1,379 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.extensions import ExtensionArray
|
||||
from pandas.core.internals.blocks import EABackedBlock
|
||||
|
||||
|
||||
class BaseReshapingTests:
|
||||
"""Tests for reshaping and concatenation."""
|
||||
|
||||
@pytest.mark.parametrize("in_frame", [True, False])
|
||||
def test_concat(self, data, in_frame):
|
||||
wrapped = pd.Series(data)
|
||||
if in_frame:
|
||||
wrapped = pd.DataFrame(wrapped)
|
||||
result = pd.concat([wrapped, wrapped], ignore_index=True)
|
||||
|
||||
assert len(result) == len(data) * 2
|
||||
|
||||
if in_frame:
|
||||
dtype = result.dtypes[0]
|
||||
else:
|
||||
dtype = result.dtype
|
||||
|
||||
assert dtype == data.dtype
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
||||
|
||||
@pytest.mark.parametrize("in_frame", [True, False])
|
||||
def test_concat_all_na_block(self, data_missing, in_frame):
|
||||
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
|
||||
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
|
||||
if in_frame:
|
||||
valid_block = pd.DataFrame({"a": valid_block})
|
||||
na_block = pd.DataFrame({"a": na_block})
|
||||
result = pd.concat([valid_block, na_block])
|
||||
if in_frame:
|
||||
expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category")
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat([x.astype(object) for x in dfs])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = pd.concat([x["A"] for x in dfs])
|
||||
expected = pd.concat([x["A"].astype(object) for x in dfs])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# simple test for just EA and one other
|
||||
result = pd.concat([df1, df2.astype(object)])
|
||||
expected = pd.concat([df1.astype("object"), df2.astype("object")])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.concat([df1["A"], df2["A"].astype(object)])
|
||||
expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"B": [1, 2, 3]})
|
||||
|
||||
expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]})
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1["A"], df2["B"]], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-aligned
|
||||
df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3])
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype),
|
||||
"B": [np.nan, 1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1["A"], df2["B"]], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_extension_arrays_copy_false(self, data, na_value):
|
||||
# GH 20756
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"B": data[3:7]})
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype),
|
||||
"B": data[3:7],
|
||||
}
|
||||
)
|
||||
result = pd.concat([df1, df2], axis=1, copy=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_with_reindex(self, data):
|
||||
# GH-33027
|
||||
a = pd.DataFrame({"a": data[:5]})
|
||||
b = pd.DataFrame({"b": data[:5]})
|
||||
result = pd.concat([a, b], ignore_index=True)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": data.take(list(range(5)) + ([-1] * 5), allow_fill=True),
|
||||
"b": data.take(([-1] * 5) + list(range(5)), allow_fill=True),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype))
|
||||
e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype))
|
||||
tm.assert_series_equal(r1, e1)
|
||||
tm.assert_series_equal(r2, e2)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.DataFrame(
|
||||
{"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)}
|
||||
)
|
||||
e2 = pd.DataFrame(
|
||||
{"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)}
|
||||
)
|
||||
tm.assert_frame_equal(r1, e1)
|
||||
tm.assert_frame_equal(r2, e2)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/20576
|
||||
ser = pd.Series(data, name="a")
|
||||
df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
|
||||
r1, r2 = ser.align(df)
|
||||
|
||||
e1 = pd.Series(
|
||||
data._from_sequence(list(data) + [na_value], dtype=data.dtype),
|
||||
name=ser.name,
|
||||
)
|
||||
|
||||
tm.assert_series_equal(r1, e1)
|
||||
tm.assert_frame_equal(r2, df)
|
||||
|
||||
def test_set_frame_expand_regular_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
df["B"] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_expand_extension_with_regular(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
df["B"] = [1] * len(data)
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_overwrite_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20555
|
||||
df = pd.DataFrame({"A": [1] * len(data)}, dtype=object)
|
||||
df["A"] = data
|
||||
assert df.dtypes["A"] == data.dtype
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
# GH-20743
|
||||
df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]})
|
||||
df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]})
|
||||
|
||||
res = pd.merge(df1, df2)
|
||||
exp = pd.DataFrame(
|
||||
{
|
||||
"int1": [1, 1, 2],
|
||||
"int2": [1, 2, 3],
|
||||
"key": [0, 0, 1],
|
||||
"ext": data._from_sequence(
|
||||
[data[0], data[0], data[1]], dtype=data.dtype
|
||||
),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]])
|
||||
|
||||
res = pd.merge(df1, df2, how="outer")
|
||||
exp = pd.DataFrame(
|
||||
{
|
||||
"int1": [1, 1, 2, 3, np.nan],
|
||||
"int2": [1, 2, 3, np.nan, 4],
|
||||
"key": [0, 0, 1, 2, 3],
|
||||
"ext": data._from_sequence(
|
||||
[data[0], data[0], data[1], data[2], na_value], dtype=data.dtype
|
||||
),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]])
|
||||
|
||||
def test_merge_on_extension_array(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b], dtype=data.dtype)
|
||||
|
||||
df = pd.DataFrame({"key": key, "val": [1, 2]})
|
||||
result = pd.merge(df, df, on="key")
|
||||
expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# order
|
||||
result = pd.merge(df.iloc[[1, 0]], df, on="key")
|
||||
expected = expected.iloc[[1, 0]].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_merge_on_extension_array_duplicates(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b, a], dtype=data.dtype)
|
||||
df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
|
||||
result = pd.merge(df1, df2, on="key")
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"key": key.take([0, 0, 1, 2, 2]),
|
||||
"val_x": [1, 1, 2, 3, 3],
|
||||
"val_y": [1, 3, 2, 1, 3],
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The previous implementation of stack is deprecated"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[
|
||||
["A", "B"],
|
||||
pd.MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b")], names=["outer", "inner"]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("future_stack", [True, False])
|
||||
def test_stack(self, data, columns, future_stack):
|
||||
df = pd.DataFrame({"A": data[:5], "B": data[:5]})
|
||||
df.columns = columns
|
||||
result = df.stack(future_stack=future_stack)
|
||||
expected = df.astype(object).stack(future_stack=future_stack)
|
||||
# we need a second astype(object), in case the constructor inferred
|
||||
# object -> specialized, as is done for period.
|
||||
expected = expected.astype(object)
|
||||
|
||||
if isinstance(expected, pd.Series):
|
||||
assert result.dtype == df.iloc[:, 0].dtype
|
||||
else:
|
||||
assert all(result.dtypes == df.iloc[:, 0].dtype)
|
||||
|
||||
result = result.astype(object)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("A", "a", 1),
|
||||
("A", "b", 0),
|
||||
("A", "a", 0),
|
||||
("B", "a", 0),
|
||||
("B", "c", 1),
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[: len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(
|
||||
isinstance(result[col].array, type(data)) for col in result.columns
|
||||
)
|
||||
|
||||
if obj == "series":
|
||||
# We should get the same result with to_frame+unstack+droplevel
|
||||
df = ser.to_frame()
|
||||
|
||||
alt = df.unstack(level=level).droplevel(0, axis=1)
|
||||
tm.assert_frame_equal(result, alt)
|
||||
|
||||
obj_ser = ser.astype(object)
|
||||
|
||||
expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
|
||||
if obj == "series":
|
||||
assert (expected.dtypes == object).all()
|
||||
|
||||
result = result.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ravel(self, data):
|
||||
# as long as EA is 1D-only, ravel is a no-op
|
||||
result = data.ravel()
|
||||
assert type(result) == type(data)
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(f"test_ravel assumes mutability and {data.dtype} is immutable")
|
||||
|
||||
# Check that we have a view, not a copy
|
||||
result[0] = result[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_transpose(self, data):
|
||||
result = data.transpose()
|
||||
assert type(result) == type(data)
|
||||
|
||||
# check we get a new object
|
||||
assert result is not data
|
||||
|
||||
# If we ever _did_ support 2D, shape should be reversed
|
||||
assert result.shape == data.shape[::-1]
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(
|
||||
f"test_transpose assumes mutability and {data.dtype} is immutable"
|
||||
)
|
||||
|
||||
# Check that we have a view, not a copy
|
||||
result[0] = result[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_transpose_frame(self, data):
|
||||
df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"])
|
||||
result = df.T
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype),
|
||||
"b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype),
|
||||
"c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype),
|
||||
"d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype),
|
||||
},
|
||||
index=["A", "B"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(np.transpose(np.transpose(df)), df)
|
||||
tm.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]])
|
@ -0,0 +1,451 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseSetitemTests:
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
lambda x: x.index,
|
||||
lambda x: list(x.index),
|
||||
lambda x: slice(None),
|
||||
lambda x: slice(0, len(x)),
|
||||
lambda x: range(len(x)),
|
||||
lambda x: list(range(len(x))),
|
||||
lambda x: np.ones(len(x), dtype=bool),
|
||||
],
|
||||
ids=[
|
||||
"index",
|
||||
"list[index]",
|
||||
"null_slice",
|
||||
"full_slice",
|
||||
"range",
|
||||
"list(range)",
|
||||
"mask",
|
||||
],
|
||||
)
|
||||
def full_indexer(self, request):
|
||||
"""
|
||||
Fixture for an indexer to pass to obj.loc to get/set the full length of the
|
||||
object.
|
||||
|
||||
In some cases, assumes that obj.index is the default RangeIndex.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_immutable(self, dtype, request):
|
||||
if dtype._is_immutable:
|
||||
node = request.node
|
||||
if node.name.split("[")[0] == "test_is_immutable":
|
||||
# This fixture is auto-used, but we want to not-skip
|
||||
# test_is_immutable.
|
||||
return
|
||||
|
||||
# When BaseSetitemTests is mixed into ExtensionTests, we only
|
||||
# want this fixture to operate on the tests defined in this
|
||||
# class/file.
|
||||
defined_in = node.function.__qualname__.split(".")[0]
|
||||
if defined_in == "BaseSetitemTests":
|
||||
pytest.skip("__setitem__ test not applicable with immutable dtype")
|
||||
|
||||
def test_is_immutable(self, data):
|
||||
if data.dtype._is_immutable:
|
||||
with pytest.raises(TypeError):
|
||||
data[0] = data[0]
|
||||
else:
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_scalar_series(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_sequence(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
|
||||
data[[0, 1]] = [data[1], data[0]]
|
||||
assert data[0] == original[1]
|
||||
assert data[1] == original[0]
|
||||
|
||||
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
||||
ser = pd.Series(data)
|
||||
original = ser.copy()
|
||||
value = [data[0]]
|
||||
if as_array:
|
||||
value = data._from_sequence(value, dtype=data.dtype)
|
||||
|
||||
xpr = "cannot set using a {} indexer with a different length"
|
||||
with pytest.raises(ValueError, match=xpr.format("list-like")):
|
||||
ser[[0, 1]] = value
|
||||
# Ensure no modifications made before the exception
|
||||
tm.assert_series_equal(ser, original)
|
||||
|
||||
with pytest.raises(ValueError, match=xpr.format("slice")):
|
||||
ser[slice(3)] = value
|
||||
tm.assert_series_equal(ser, original)
|
||||
|
||||
def test_setitem_empty_indexer(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
data[np.array([], dtype=int)] = []
|
||||
tm.assert_equal(data, original)
|
||||
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[[0, 1]] = data[2]
|
||||
assert data[0] == data[2]
|
||||
assert data[1] == data[2]
|
||||
|
||||
@pytest.mark.parametrize("setter", ["loc", "iloc"])
|
||||
def test_setitem_scalar(self, data, setter):
|
||||
arr = pd.Series(data)
|
||||
setter = getattr(arr, setter)
|
||||
setter[0] = data[1]
|
||||
assert arr[0] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.loc[0, "B"] = data[1]
|
||||
assert df.loc[0, "B"] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.loc[10, "B"] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.loc[10, "B"] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.iloc[0, 1] = data[1]
|
||||
assert df.loc[0, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.iloc[10, 0] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.iloc[10, 1] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
np.array([True, True, True, False, False]),
|
||||
pd.array([True, True, True, False, False], dtype="boolean"),
|
||||
pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
|
||||
],
|
||||
ids=["numpy-array", "boolean-array", "boolean-array-na"],
|
||||
)
|
||||
def test_setitem_mask(self, data, mask, box_in_series):
|
||||
arr = data[:5].copy()
|
||||
expected = arr.take([0, 0, 0, 3, 4])
|
||||
if box_in_series:
|
||||
arr = pd.Series(arr)
|
||||
expected = pd.Series(expected)
|
||||
arr[mask] = data[0]
|
||||
tm.assert_equal(expected, arr)
|
||||
|
||||
def test_setitem_mask_raises(self, data, box_in_series):
|
||||
# wrong length
|
||||
mask = np.array([True, False])
|
||||
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
|
||||
with pytest.raises(IndexError, match="wrong length"):
|
||||
data[mask] = data[0]
|
||||
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
with pytest.raises(IndexError, match="wrong length"):
|
||||
data[mask] = data[0]
|
||||
|
||||
def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
|
||||
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
||||
mask[:3] = True
|
||||
mask[3:5] = pd.NA
|
||||
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
|
||||
data[mask] = data[0]
|
||||
|
||||
assert (data[:3] == data[0]).all()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_setitem_integer_array(self, data, idx, box_in_series):
|
||||
arr = data[:5].copy()
|
||||
expected = data.take([0, 0, 0, 3, 4])
|
||||
|
||||
if box_in_series:
|
||||
arr = pd.Series(arr)
|
||||
expected = pd.Series(expected)
|
||||
|
||||
arr[idx] = arr[0]
|
||||
tm.assert_equal(arr, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx, box_in_series",
|
||||
[
|
||||
([0, 1, 2, pd.NA], False),
|
||||
pytest.param(
|
||||
[0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
|
||||
),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
],
|
||||
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
||||
)
|
||||
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
||||
arr = data.copy()
|
||||
|
||||
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
|
||||
# for list of labels with Series
|
||||
if box_in_series:
|
||||
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
|
||||
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[idx] = arr[0]
|
||||
|
||||
@pytest.mark.parametrize("as_callable", [True, False])
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_aligned(self, data, as_callable, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if as_callable:
|
||||
mask2 = lambda x: mask
|
||||
else:
|
||||
mask2 = mask
|
||||
|
||||
if setter:
|
||||
# loc
|
||||
target = getattr(ser, setter)
|
||||
else:
|
||||
# Series.__setitem__
|
||||
target = ser
|
||||
|
||||
target[mask2] = data[5:7]
|
||||
|
||||
ser[mask2] = data[5:7]
|
||||
assert ser[0] == data[5]
|
||||
assert ser[1] == data[6]
|
||||
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if setter: # loc
|
||||
target = getattr(ser, setter)
|
||||
else: # __setitem__
|
||||
target = ser
|
||||
|
||||
target[mask] = data[10]
|
||||
assert ser[0] == data[10]
|
||||
assert ser[1] == data[10]
|
||||
|
||||
def test_setitem_expand_columns(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
result = df.copy()
|
||||
result["B"] = 1
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, "B"] = 1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# overwrite with new type
|
||||
result["B"] = data
|
||||
expected = pd.DataFrame({"A": data, "B": data})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_expand_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
result = df.copy()
|
||||
result["B"] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, "B"] = data
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_invalid_length(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
xpr = (
|
||||
rf"Length of values \({len(data[:5])}\) "
|
||||
rf"does not match length of index \({len(df)}\)"
|
||||
)
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
df["B"] = data[:5]
|
||||
|
||||
def test_setitem_tuple_index(self, data):
|
||||
ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
|
||||
expected = pd.Series(data.take([1, 1]), index=ser.index)
|
||||
ser[(0, 0)] = data[1]
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_setitem_slice(self, data, box_in_series):
|
||||
arr = data[:5].copy()
|
||||
expected = data.take([0, 0, 0, 3, 4])
|
||||
if box_in_series:
|
||||
arr = pd.Series(arr)
|
||||
expected = pd.Series(expected)
|
||||
|
||||
arr[:3] = data[0]
|
||||
tm.assert_equal(arr, expected)
|
||||
|
||||
def test_setitem_loc_iloc_slice(self, data):
|
||||
arr = data[:5].copy()
|
||||
s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
|
||||
expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
|
||||
|
||||
result = s.copy()
|
||||
result.iloc[:3] = data[0]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = s.copy()
|
||||
result.loc[:"c"] = data[0]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_setitem_slice_mismatch_length_raises(self, data):
|
||||
arr = data[:5]
|
||||
with pytest.raises(ValueError):
|
||||
arr[:1] = arr[:2]
|
||||
|
||||
def test_setitem_slice_array(self, data):
|
||||
arr = data[:5].copy()
|
||||
arr[:5] = data[-5:]
|
||||
tm.assert_extension_array_equal(arr, data[-5:])
|
||||
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
arr = data[:5].copy()
|
||||
with pytest.raises(ValueError):
|
||||
arr[0] = arr[[0, 1]]
|
||||
|
||||
def test_setitem_preserves_views(self, data):
|
||||
# GH#28150 setitem shouldn't swap the underlying data
|
||||
view1 = data.view()
|
||||
view2 = data[:]
|
||||
|
||||
data[0] = data[1]
|
||||
assert view1[0] == data[1]
|
||||
assert view2[0] == data[1]
|
||||
|
||||
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/32395
|
||||
df = expected = pd.DataFrame({0: pd.Series(data)})
|
||||
result = pd.DataFrame(index=df.index)
|
||||
|
||||
key = full_indexer(df)
|
||||
result.loc[key, 0] = df[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_with_expansion_row(self, data, na_value):
|
||||
df = pd.DataFrame({"data": data[:1]})
|
||||
|
||||
df.loc[1, "data"] = data[1]
|
||||
expected = pd.DataFrame({"data": data[:2]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/47284
|
||||
df.loc[2, "data"] = na_value
|
||||
expected = pd.DataFrame(
|
||||
{"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_setitem_series(self, data, full_indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/32395
|
||||
ser = pd.Series(data, name="data")
|
||||
result = pd.Series(index=ser.index, dtype=object, name="data")
|
||||
|
||||
# because result has object dtype, the attempt to do setting inplace
|
||||
# is successful, and object dtype is retained
|
||||
key = full_indexer(ser)
|
||||
result.loc[key] = ser
|
||||
|
||||
expected = pd.Series(
|
||||
data.astype(object), index=ser.index, name="data", dtype=object
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_2d_values(self, data):
|
||||
# GH#44514
|
||||
df = pd.DataFrame({"A": data})
|
||||
|
||||
# Avoiding using_array_manager fixture
|
||||
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
|
||||
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
|
||||
using_copy_on_write = pd.options.mode.copy_on_write
|
||||
|
||||
blk_data = df._mgr.arrays[0]
|
||||
|
||||
orig = df.copy()
|
||||
|
||||
df.iloc[:] = df.copy()
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
df.iloc[:-1] = df.iloc[:-1].copy()
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
df.iloc[:] = df.values
|
||||
tm.assert_frame_equal(df, orig)
|
||||
if not using_array_manager and not using_copy_on_write:
|
||||
# GH#33457 Check that this setting occurred in-place
|
||||
# FIXME(ArrayManager): this should work there too
|
||||
assert df._mgr.arrays[0] is blk_data
|
||||
|
||||
df.iloc[:-1] = df.values[:-1]
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
def test_delitem_series(self, data):
|
||||
# GH#40763
|
||||
ser = pd.Series(data, name="data")
|
||||
|
||||
taker = np.arange(len(ser))
|
||||
taker = np.delete(taker, 1)
|
||||
|
||||
expected = ser[taker]
|
||||
del ser[1]
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_setitem_invalid(self, data, invalid_scalar):
|
||||
msg = "" # messages vary by subclass, so we do not test it
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
data[0] = invalid_scalar
|
||||
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
data[:] = invalid_scalar
|
||||
|
||||
def test_setitem_2d_values(self, data):
|
||||
# GH50085
|
||||
original = data.copy()
|
||||
df = pd.DataFrame({"a": data, "b": data})
|
||||
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
|
||||
assert (df.loc[0, :] == original[1]).all()
|
||||
assert (df.loc[1, :] == original[0]).all()
|
Reference in New Issue
Block a user