This commit is contained in:
2024-12-04 13:35:57 +05:00
parent d346bf4b2a
commit 73ce681a55
7059 changed files with 1196501 additions and 0 deletions

View File

@ -0,0 +1,383 @@
from __future__ import annotations
import pytest
import pandas as pd
from pandas import api
import pandas._testing as tm
from pandas.api import (
extensions as api_extensions,
indexers as api_indexers,
interchange as api_interchange,
types as api_types,
typing as api_typing,
)
class Base:
def check(self, namespace, expected, ignored=None):
# see which names are in the namespace, minus optional
# ignored ones
# compare vs the expected
result = sorted(
f for f in dir(namespace) if not f.startswith("__") and f != "annotations"
)
if ignored is not None:
result = sorted(set(result) - set(ignored))
expected = sorted(expected)
tm.assert_almost_equal(result, expected)
class TestPDApi(Base):
# these are optionally imported based on testing
# & need to be ignored
ignored = ["tests", "locale", "conftest", "_version_meson"]
# top-level sub-packages
public_lib = [
"api",
"arrays",
"options",
"test",
"testing",
"errors",
"plotting",
"io",
"tseries",
]
private_lib = ["compat", "core", "pandas", "util", "_built_with_meson"]
# misc
misc = ["IndexSlice", "NaT", "NA"]
# top-level classes
classes = [
"ArrowDtype",
"Categorical",
"CategoricalIndex",
"DataFrame",
"DateOffset",
"DatetimeIndex",
"ExcelFile",
"ExcelWriter",
"Flags",
"Grouper",
"HDFStore",
"Index",
"MultiIndex",
"Period",
"PeriodIndex",
"RangeIndex",
"Series",
"SparseDtype",
"StringDtype",
"Timedelta",
"TimedeltaIndex",
"Timestamp",
"Interval",
"IntervalIndex",
"CategoricalDtype",
"PeriodDtype",
"IntervalDtype",
"DatetimeTZDtype",
"BooleanDtype",
"Int8Dtype",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"UInt8Dtype",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"Float32Dtype",
"Float64Dtype",
"NamedAgg",
]
# these are already deprecated; awaiting removal
deprecated_classes: list[str] = []
# external modules exposed in pandas namespace
modules: list[str] = []
# top-level functions
funcs = [
"array",
"bdate_range",
"concat",
"crosstab",
"cut",
"date_range",
"interval_range",
"eval",
"factorize",
"get_dummies",
"from_dummies",
"infer_freq",
"isna",
"isnull",
"lreshape",
"melt",
"notna",
"notnull",
"offsets",
"merge",
"merge_ordered",
"merge_asof",
"period_range",
"pivot",
"pivot_table",
"qcut",
"show_versions",
"timedelta_range",
"unique",
"value_counts",
"wide_to_long",
]
# top-level option funcs
funcs_option = [
"reset_option",
"describe_option",
"get_option",
"option_context",
"set_option",
"set_eng_float_format",
]
# top-level read_* funcs
funcs_read = [
"read_clipboard",
"read_csv",
"read_excel",
"read_fwf",
"read_gbq",
"read_hdf",
"read_html",
"read_xml",
"read_json",
"read_pickle",
"read_sas",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_stata",
"read_table",
"read_feather",
"read_parquet",
"read_orc",
"read_spss",
]
# top-level json funcs
funcs_json = ["json_normalize"]
# top-level to_* funcs
funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"]
# top-level to deprecate in the future
deprecated_funcs_in_future: list[str] = []
# these are already deprecated; awaiting removal
deprecated_funcs: list[str] = []
# private modules in pandas namespace
private_modules = [
"_config",
"_libs",
"_is_numpy_dev",
"_pandas_datetime_CAPI",
"_pandas_parser_CAPI",
"_testing",
"_typing",
]
if not pd._built_with_meson:
private_modules.append("_version")
def test_api(self):
checkthese = (
self.public_lib
+ self.private_lib
+ self.misc
+ self.modules
+ self.classes
+ self.funcs
+ self.funcs_option
+ self.funcs_read
+ self.funcs_json
+ self.funcs_to
+ self.private_modules
)
self.check(namespace=pd, expected=checkthese, ignored=self.ignored)
def test_api_all(self):
expected = set(
self.public_lib
+ self.misc
+ self.modules
+ self.classes
+ self.funcs
+ self.funcs_option
+ self.funcs_read
+ self.funcs_json
+ self.funcs_to
) - set(self.deprecated_classes)
actual = set(pd.__all__)
extraneous = actual - expected
assert not extraneous
missing = expected - actual
assert not missing
def test_depr(self):
deprecated_list = (
self.deprecated_classes
+ self.deprecated_funcs
+ self.deprecated_funcs_in_future
)
for depr in deprecated_list:
with tm.assert_produces_warning(FutureWarning):
_ = getattr(pd, depr)
class TestApi(Base):
allowed_api_dirs = [
"types",
"extensions",
"indexers",
"interchange",
"typing",
]
allowed_typing = [
"DataFrameGroupBy",
"DatetimeIndexResamplerGroupby",
"Expanding",
"ExpandingGroupby",
"ExponentialMovingWindow",
"ExponentialMovingWindowGroupby",
"JsonReader",
"NaTType",
"NAType",
"PeriodIndexResamplerGroupby",
"Resampler",
"Rolling",
"RollingGroupby",
"SeriesGroupBy",
"StataReader",
"TimedeltaIndexResamplerGroupby",
"TimeGrouper",
"Window",
]
allowed_api_types = [
"is_any_real_numeric_dtype",
"is_array_like",
"is_bool",
"is_bool_dtype",
"is_categorical_dtype",
"is_complex",
"is_complex_dtype",
"is_datetime64_any_dtype",
"is_datetime64_dtype",
"is_datetime64_ns_dtype",
"is_datetime64tz_dtype",
"is_dict_like",
"is_dtype_equal",
"is_extension_array_dtype",
"is_file_like",
"is_float",
"is_float_dtype",
"is_hashable",
"is_int64_dtype",
"is_integer",
"is_integer_dtype",
"is_interval",
"is_interval_dtype",
"is_iterator",
"is_list_like",
"is_named_tuple",
"is_number",
"is_numeric_dtype",
"is_object_dtype",
"is_period_dtype",
"is_re",
"is_re_compilable",
"is_scalar",
"is_signed_integer_dtype",
"is_sparse",
"is_string_dtype",
"is_timedelta64_dtype",
"is_timedelta64_ns_dtype",
"is_unsigned_integer_dtype",
"pandas_dtype",
"infer_dtype",
"union_categoricals",
"CategoricalDtype",
"DatetimeTZDtype",
"IntervalDtype",
"PeriodDtype",
]
allowed_api_interchange = ["from_dataframe", "DataFrame"]
allowed_api_indexers = [
"check_array_indexer",
"BaseIndexer",
"FixedForwardWindowIndexer",
"VariableOffsetWindowIndexer",
]
allowed_api_extensions = [
"no_default",
"ExtensionDtype",
"register_extension_dtype",
"register_dataframe_accessor",
"register_index_accessor",
"register_series_accessor",
"take",
"ExtensionArray",
"ExtensionScalarOpsMixin",
]
def test_api(self):
self.check(api, self.allowed_api_dirs)
def test_api_typing(self):
self.check(api_typing, self.allowed_typing)
def test_api_types(self):
self.check(api_types, self.allowed_api_types)
def test_api_interchange(self):
self.check(api_interchange, self.allowed_api_interchange)
def test_api_indexers(self):
self.check(api_indexers, self.allowed_api_indexers)
def test_api_extensions(self):
self.check(api_extensions, self.allowed_api_extensions)
class TestTesting(Base):
funcs = [
"assert_frame_equal",
"assert_series_equal",
"assert_index_equal",
"assert_extension_array_equal",
]
def test_testing(self):
from pandas import testing
self.check(testing, self.funcs)
def test_util_in_top_level(self):
with pytest.raises(AttributeError, match="foo"):
pd.util.foo
def test_pandas_array_alias():
msg = "PandasArray has been renamed NumpyExtensionArray"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = pd.arrays.PandasArray
assert res is pd.arrays.NumpyExtensionArray

View File

@ -0,0 +1,62 @@
from __future__ import annotations
import pandas._testing as tm
from pandas.api import types
from pandas.tests.api.test_api import Base
class TestTypes(Base):
allowed = [
"is_any_real_numeric_dtype",
"is_bool",
"is_bool_dtype",
"is_categorical_dtype",
"is_complex",
"is_complex_dtype",
"is_datetime64_any_dtype",
"is_datetime64_dtype",
"is_datetime64_ns_dtype",
"is_datetime64tz_dtype",
"is_dtype_equal",
"is_float",
"is_float_dtype",
"is_int64_dtype",
"is_integer",
"is_integer_dtype",
"is_number",
"is_numeric_dtype",
"is_object_dtype",
"is_scalar",
"is_sparse",
"is_string_dtype",
"is_signed_integer_dtype",
"is_timedelta64_dtype",
"is_timedelta64_ns_dtype",
"is_unsigned_integer_dtype",
"is_period_dtype",
"is_interval",
"is_interval_dtype",
"is_re",
"is_re_compilable",
"is_dict_like",
"is_iterator",
"is_file_like",
"is_list_like",
"is_hashable",
"is_array_like",
"is_named_tuple",
"pandas_dtype",
"union_categoricals",
"infer_dtype",
"is_extension_array_dtype",
]
deprecated: list[str] = []
dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"]
def test_types(self):
self.check(types, self.allowed + self.dtypes + self.deprecated)
def test_deprecated_from_api_types(self):
for t in self.deprecated:
with tm.assert_produces_warning(FutureWarning):
getattr(types, t)(1)

View File

@ -0,0 +1,7 @@
from pandas.core.groupby.base import transformation_kernels
# There is no Series.cumcount or DataFrame.cumcount
series_transform_kernels = [
x for x in sorted(transformation_kernels) if x != "cumcount"
]
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,113 @@
import numpy as np
import pytest
from pandas.compat.numpy import np_version_gte1p25
import pandas as pd
import pandas._testing as tm
def test_agg_relabel():
# GH 26513
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
# simplest case with one column, one func
result = df.agg(foo=("B", "sum"))
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
tm.assert_frame_equal(result, expected)
# test on same column with different methods
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
tm.assert_frame_equal(result, expected)
def test_agg_relabel_multi_columns_multi_methods():
# GH 26513, test on multiple columns with multiple methods
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
result = df.agg(
foo=("A", "sum"),
bar=("B", "mean"),
cat=("A", "min"),
dat=("B", "max"),
f=("A", "max"),
g=("C", "min"),
)
expected = pd.DataFrame(
{
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
},
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min")
def test_agg_relabel_partial_functions():
# GH 26513, test on partial, functools or more complex cases
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
msg = "using Series.[mean|min]"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
expected = pd.DataFrame(
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
)
tm.assert_frame_equal(result, expected)
msg = "using Series.[mean|min|max|sum]"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.agg(
foo=("A", min),
bar=("A", np.min),
cat=("B", max),
dat=("C", "min"),
f=("B", np.sum),
kk=("B", lambda x: min(x)),
)
expected = pd.DataFrame(
{
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
},
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
)
tm.assert_frame_equal(result, expected)
def test_agg_namedtuple():
# GH 26513
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
result = df.agg(
foo=pd.NamedAgg("B", "sum"),
bar=pd.NamedAgg("B", "min"),
cat=pd.NamedAgg(column="B", aggfunc="count"),
fft=pd.NamedAgg("B", aggfunc="max"),
)
expected = pd.DataFrame(
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
)
tm.assert_frame_equal(result, expected)
result = df.agg(
foo=pd.NamedAgg("A", "min"),
bar=pd.NamedAgg(column="B", aggfunc="max"),
cat=pd.NamedAgg(column="A", aggfunc="max"),
)
expected = pd.DataFrame(
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
index=pd.Index(["foo", "bar", "cat"]),
)
tm.assert_frame_equal(result, expected)
def test_reconstruct_func():
# GH 28472, test to ensure reconstruct_func isn't moved;
# This method is used by other libraries (e.g. dask)
result = pd.core.apply.reconstruct_func("min")
expected = (False, "min", None, None)
tm.assert_equal(result, expected)

View File

@ -0,0 +1,264 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.tests.apply.common import frame_transform_kernels
from pandas.tests.frame.common import zip_frames
def unpack_obj(obj, klass, axis):
"""
Helper to ensure we have the right type of object for a test parametrized
over frame_or_series.
"""
if klass is not DataFrame:
obj = obj["A"]
if axis != 0:
pytest.skip(f"Test is only for DataFrame with axis={axis}")
return obj
def test_transform_ufunc(axis, float_frame, frame_or_series):
# GH 35964
obj = unpack_obj(float_frame, frame_or_series, axis)
with np.errstate(all="ignore"):
f_sqrt = np.sqrt(obj)
# ufunc
result = obj.transform(np.sqrt, axis=axis)
expected = f_sqrt
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_transform_listlike(axis, float_frame, ops, names):
# GH 35964
other_axis = 1 if axis in {0, "index"} else 0
with np.errstate(all="ignore"):
expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
if axis in {0, "index"}:
expected.columns = MultiIndex.from_product([float_frame.columns, names])
else:
expected.index = MultiIndex.from_product([float_frame.index, names])
result = float_frame.transform(ops, axis=axis)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("ops", [[], np.array([])])
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
obj = unpack_obj(float_frame, frame_or_series, 0)
with pytest.raises(ValueError, match="No transform functions were provided"):
obj.transform(ops)
def test_transform_listlike_func_with_args():
# GH 50624
df = DataFrame({"x": [1, 2, 3]})
def foo1(x, a=1, c=0):
return x + a + c
def foo2(x, b=2, c=0):
return x + b + c
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
with pytest.raises(TypeError, match=msg):
df.transform([foo1, foo2], 0, 3, b=3, c=4)
result = df.transform([foo1, foo2], 0, 3, c=4)
expected = DataFrame(
[[8, 8], [9, 9], [10, 10]],
columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(axis, float_frame, box):
# GH 35964
if axis in (0, "index"):
e = float_frame.columns[0]
expected = float_frame[[e]].transform(np.abs)
else:
e = float_frame.index[0]
expected = float_frame.iloc[[0]].transform(np.abs)
result = float_frame.transform(box({e: np.abs}), axis=axis)
tm.assert_frame_equal(result, expected)
def test_transform_dictlike_mixed():
# GH 40018 - mix of lists and non-lists in values of a dictionary
df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
expected = DataFrame(
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"ops",
[
{},
{"A": []},
{"A": [], "B": "cumsum"},
{"A": "cumsum", "B": []},
{"A": [], "B": ["cumsum"]},
{"A": ["cumsum"], "B": []},
],
)
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
obj = unpack_obj(float_frame, frame_or_series, 0)
with pytest.raises(ValueError, match="No transform functions were provided"):
obj.transform(ops)
@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
# GH 35964
obj = unpack_obj(float_frame, frame_or_series, axis)
# transform uses UDF either via apply or passing the entire DataFrame
def func(x):
# transform is using apply iff x is not a DataFrame
if use_apply == isinstance(x, frame_or_series):
# Force transform to fallback
raise ValueError
return x + 1
result = obj.transform(func, axis=axis)
expected = obj + 1
tm.assert_equal(result, expected)
wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
def test_transform_bad_dtype(op, frame_or_series, request):
# GH 35964
if op == "ngroup":
request.applymarker(
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
)
obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
obj = tm.get_obj(obj, frame_or_series)
error = TypeError
msg = "|".join(
[
"not supported between instances of 'type' and 'type'",
"unsupported operand type",
]
)
with pytest.raises(error, match=msg):
obj.transform(op)
with pytest.raises(error, match=msg):
obj.transform([op])
with pytest.raises(error, match=msg):
obj.transform({"A": op})
with pytest.raises(error, match=msg):
obj.transform({"A": [op]})
@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_failure_typeerror(request, op):
# GH 35964
if op == "ngroup":
request.applymarker(
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
)
# Using object makes most transform kernels fail
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
error = TypeError
msg = "|".join(
[
"not supported between instances of 'type' and 'type'",
"unsupported operand type",
]
)
with pytest.raises(error, match=msg):
df.transform([op])
with pytest.raises(error, match=msg):
df.transform({"A": op, "B": op})
with pytest.raises(error, match=msg):
df.transform({"A": [op], "B": [op]})
with pytest.raises(error, match=msg):
df.transform({"A": [op, "shift"], "B": [op]})
def test_transform_failure_valueerror():
# GH 40211
def op(x):
if np.sum(np.sum(x)) < 10:
raise ValueError
return x
df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
msg = "Transform function failed"
with pytest.raises(ValueError, match=msg):
df.transform([op])
with pytest.raises(ValueError, match=msg):
df.transform({"A": op, "B": op})
with pytest.raises(ValueError, match=msg):
df.transform({"A": [op], "B": [op]})
with pytest.raises(ValueError, match=msg):
df.transform({"A": [op, "shift"], "B": [op]})
@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_passes_args(use_apply, frame_or_series):
# GH 35964
# transform uses UDF either via apply or passing the entire DataFrame
expected_args = [1, 2]
expected_kwargs = {"c": 3}
def f(x, a, b, c):
# transform is using apply iff x is not a DataFrame
if use_apply == isinstance(x, frame_or_series):
# Force transform to fallback
raise ValueError
assert [a, b] == expected_args
assert c == expected_kwargs["c"]
return x
frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
def test_transform_empty_dataframe():
# https://github.com/pandas-dev/pandas/issues/39636
df = DataFrame([], columns=["col1", "col2"])
result = df.transform(lambda x: x + 10)
tm.assert_frame_equal(result, df)
result = df["col1"].transform(lambda x: x + 10)
tm.assert_series_equal(result, df["col1"])

View File

@ -0,0 +1,361 @@
# Tests specifically aimed at detecting bad arguments.
# This file is organized by reason for exception.
# 1. always invalid argument values
# 2. missing column(s)
# 3. incompatible ops/dtype/args/kwargs
# 4. invalid result shape/type
# If your test does not fit into one of these categories, add to this list.
from itertools import chain
import re
import numpy as np
import pytest
from pandas.errors import SpecificationError
from pandas import (
DataFrame,
Series,
date_range,
)
import pandas._testing as tm
@pytest.mark.parametrize("result_type", ["foo", 1])
def test_result_type_error(result_type):
# allowed result_type
df = DataFrame(
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
columns=["A", "B", "C"],
)
msg = (
"invalid value for result_type, must be one of "
"{None, 'reduce', 'broadcast', 'expand'}"
)
with pytest.raises(ValueError, match=msg):
df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
def test_apply_invalid_axis_value():
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
msg = "No axis named 2 for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.apply(lambda x: x, 2)
def test_agg_raises():
# GH 26513
df = DataFrame({"A": [0, 1], "B": [1, 2]})
msg = "Must provide"
with pytest.raises(TypeError, match=msg):
df.agg()
def test_map_with_invalid_na_action_raises():
# https://github.com/pandas-dev/pandas/issues/32815
s = Series([1, 2, 3])
msg = "na_action must either be 'ignore' or None"
with pytest.raises(ValueError, match=msg):
s.map(lambda x: x, na_action="____")
@pytest.mark.parametrize("input_na_action", ["____", True])
def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action):
# https://github.com/pandas-dev/pandas/issues/46588
s = Series([1, 2, 3])
msg = f"na_action must either be 'ignore' or None, {input_na_action} was passed"
with pytest.raises(ValueError, match=msg):
s.map({1: 2}, na_action=input_na_action)
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
def test_nested_renamer(frame_or_series, method, func):
# GH 35964
obj = frame_or_series({"A": [1]})
match = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=match):
getattr(obj, method)(func)
@pytest.mark.parametrize(
"renamer",
[{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}],
)
def test_series_nested_renamer(renamer):
s = Series(range(6), dtype="int64", name="series")
msg = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
s.agg(renamer)
def test_apply_dict_depr():
tsdf = DataFrame(
np.random.default_rng(2).standard_normal((10, 3)),
columns=["A", "B", "C"],
index=date_range("1/1/2000", periods=10),
)
msg = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
tsdf.A.agg({"foo": ["sum", "mean"]})
@pytest.mark.parametrize("method", ["agg", "transform"])
def test_dict_nested_renaming_depr(method):
df = DataFrame({"A": range(5), "B": 5})
# nested renaming
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}})
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
def test_missing_column(method, func):
# GH 40004
obj = DataFrame({"A": [1]})
match = re.escape("Column(s) ['B'] do not exist")
with pytest.raises(KeyError, match=match):
getattr(obj, method)(func)
def test_transform_mixed_column_name_dtypes():
# GH39025
df = DataFrame({"a": ["1"]})
msg = r"Column\(s\) \[1, 'b'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.transform({"a": int, 1: str, "b": int})
@pytest.mark.parametrize(
"how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
)
def test_apply_str_axis_1_raises(how, args):
# GH 39211 - some ops don't support axis=1
df = DataFrame({"a": [1, 2], "b": [3, 4]})
msg = f"Operation {how} does not support axis=1"
with pytest.raises(ValueError, match=msg):
df.apply(how, axis=1, args=args)
def test_transform_axis_1_raises():
# GH 35964
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
Series([1]).transform("sum", axis=1)
def test_apply_modify_traceback():
data = DataFrame(
{
"A": [
"foo",
"foo",
"foo",
"foo",
"bar",
"bar",
"bar",
"bar",
"foo",
"foo",
"foo",
],
"B": [
"one",
"one",
"one",
"two",
"one",
"one",
"one",
"two",
"two",
"two",
"one",
],
"C": [
"dull",
"dull",
"shiny",
"dull",
"dull",
"shiny",
"shiny",
"dull",
"shiny",
"shiny",
"shiny",
],
"D": np.random.default_rng(2).standard_normal(11),
"E": np.random.default_rng(2).standard_normal(11),
"F": np.random.default_rng(2).standard_normal(11),
}
)
data.loc[4, "C"] = np.nan
def transform(row):
if row["C"].startswith("shin") and row["A"] == "foo":
row["D"] = 7
return row
msg = "'float' object has no attribute 'startswith'"
with pytest.raises(AttributeError, match=msg):
data.apply(transform, axis=1)
@pytest.mark.parametrize(
"df, func, expected",
tm.get_cython_table_params(
DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
),
)
def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
# GH 21224
if using_infer_string:
import pyarrow as pa
expected = (expected, pa.lib.ArrowNotImplementedError)
msg = "can't multiply sequence by non-int of type 'str'|has no kernel"
warn = None if isinstance(func, str) else FutureWarning
with pytest.raises(expected, match=msg):
with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"):
df.agg(func, axis=axis)
@pytest.mark.parametrize(
"series, func, expected",
chain(
tm.get_cython_table_params(
Series("a b c".split()),
[
("mean", TypeError), # mean raises TypeError
("prod", TypeError),
("std", TypeError),
("var", TypeError),
("median", TypeError),
("cumprod", TypeError),
],
)
),
)
def test_agg_cython_table_raises_series(series, func, expected, using_infer_string):
# GH21224
msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
if func == "median" or func is np.nanmedian or func is np.median:
msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
if using_infer_string:
import pyarrow as pa
expected = (expected, pa.lib.ArrowNotImplementedError)
msg = msg + "|does not support|has no kernel"
warn = None if isinstance(func, str) else FutureWarning
with pytest.raises(expected, match=msg):
# e.g. Series('a b'.split()).cumprod() will raise
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
series.agg(func)
def test_agg_none_to_type():
# GH 40543
df = DataFrame({"a": [None]})
msg = re.escape("int() argument must be a string")
with pytest.raises(TypeError, match=msg):
df.agg({"a": lambda x: int(x.iloc[0])})
def test_transform_none_to_type():
# GH#34377
df = DataFrame({"a": [None]})
msg = "argument must be a"
with pytest.raises(TypeError, match=msg):
df.transform({"a": lambda x: int(x.iloc[0])})
@pytest.mark.parametrize(
"func",
[
lambda x: np.array([1, 2]).reshape(-1, 2),
lambda x: [1, 2],
lambda x: Series([1, 2]),
],
)
def test_apply_broadcast_error(func):
df = DataFrame(
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
columns=["A", "B", "C"],
)
# > 1 ndim
msg = "too many dims to broadcast|cannot broadcast result"
with pytest.raises(ValueError, match=msg):
df.apply(func, axis=1, result_type="broadcast")
def test_transform_and_agg_err_agg(axis, float_frame):
# cannot both transform and agg
msg = "cannot combine transform and aggregation operations"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
float_frame.agg(["max", "sqrt"], axis=axis)
@pytest.mark.filterwarnings("ignore::FutureWarning") # GH53325
@pytest.mark.parametrize(
"func, msg",
[
(["sqrt", "max"], "cannot combine transform and aggregation"),
(
{"foo": np.sqrt, "bar": "sum"},
"cannot perform both aggregation and transformation",
),
],
)
def test_transform_and_agg_err_series(string_series, func, msg):
# we are trying to transform with an aggregator
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
string_series.agg(func)
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])
def test_transform_wont_agg_frame(axis, float_frame, func):
# GH 35964
# cannot both transform and agg
msg = "Function did not transform"
with pytest.raises(ValueError, match=msg):
float_frame.transform(func, axis=axis)
@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]])
def test_transform_wont_agg_series(string_series, func):
# GH 35964
# we are trying to transform with an aggregator
msg = "Function did not transform"
with pytest.raises(ValueError, match=msg):
string_series.transform(func)
@pytest.mark.parametrize(
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
)
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
# GH 35964
op = op_wrapper(all_reductions)
obj = DataFrame({"A": [1, 2, 3]})
obj = tm.get_obj(obj, frame_or_series)
msg = "Function did not transform"
with pytest.raises(ValueError, match=msg):
obj.transform(op)

View File

@ -0,0 +1,118 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
Index,
)
import pandas._testing as tm
pytestmark = [td.skip_if_no("numba"), pytest.mark.single_cpu]
@pytest.fixture(params=[0, 1])
def apply_axis(request):
return request.param
def test_numba_vs_python_noop(float_frame, apply_axis):
func = lambda x: x
result = float_frame.apply(func, engine="numba", axis=apply_axis)
expected = float_frame.apply(func, engine="python", axis=apply_axis)
tm.assert_frame_equal(result, expected)
def test_numba_vs_python_string_index():
# GH#56189
pytest.importorskip("pyarrow")
df = DataFrame(
1,
index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
)
func = lambda x: x
result = df.apply(func, engine="numba", axis=0)
expected = df.apply(func, engine="python", axis=0)
tm.assert_frame_equal(
result, expected, check_column_type=False, check_index_type=False
)
def test_numba_vs_python_indexing():
frame = DataFrame(
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},
index=Index(["A", "B", "C"]),
)
row_func = lambda x: x["c"]
result = frame.apply(row_func, engine="numba", axis=1)
expected = frame.apply(row_func, engine="python", axis=1)
tm.assert_series_equal(result, expected)
col_func = lambda x: x["A"]
result = frame.apply(col_func, engine="numba", axis=0)
expected = frame.apply(col_func, engine="python", axis=0)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"reduction",
[lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()],
)
def test_numba_vs_python_reductions(reduction, apply_axis):
df = DataFrame(np.ones((4, 4), dtype=np.float64))
result = df.apply(reduction, engine="numba", axis=apply_axis)
expected = df.apply(reduction, engine="python", axis=apply_axis)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]])
def test_numba_numeric_colnames(colnames):
# Check that numeric column names lower properly and can be indxed on
df = DataFrame(
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames
)
first_col = colnames[0]
f = lambda x: x[first_col] # Get the first column
result = df.apply(f, engine="numba", axis=1)
expected = df.apply(f, engine="python", axis=1)
tm.assert_series_equal(result, expected)
def test_numba_parallel_unsupported(float_frame):
f = lambda x: x
with pytest.raises(
NotImplementedError,
match="Parallel apply is not supported when raw=False and engine='numba'",
):
float_frame.apply(f, engine="numba", engine_kwargs={"parallel": True})
def test_numba_nonunique_unsupported(apply_axis):
f = lambda x: x
df = DataFrame({"a": [1, 2]}, index=Index(["a", "a"]))
with pytest.raises(
NotImplementedError,
match="The index/columns must be unique when raw=False and engine='numba'",
):
df.apply(f, engine="numba", axis=apply_axis)
def test_numba_unsupported_dtypes(apply_axis):
f = lambda x: x
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
df["c"] = df["c"].astype("double[pyarrow]")
with pytest.raises(
ValueError,
match="Column b must have a numeric dtype. Found 'object|string' instead",
):
df.apply(f, engine="numba", axis=apply_axis)
with pytest.raises(
ValueError,
match="Column c is backed by an extension array, "
"which is not supported by the numba engine.",
):
df["c"].to_frame().apply(f, engine="numba", axis=apply_axis)

View File

@ -0,0 +1,701 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
concat,
date_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.tests.apply.common import series_transform_kernels
@pytest.fixture(params=[False, "compat"])
def by_row(request):
return request.param
def test_series_map_box_timedelta(by_row):
# GH#11349
ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h"))
def f(x):
return x.total_seconds() if by_row else x.dt.total_seconds()
result = ser.apply(f, by_row=by_row)
expected = ser.map(lambda x: x.total_seconds())
tm.assert_series_equal(result, expected)
expected = Series([86401.0, 90001.0, 93601.0])
tm.assert_series_equal(result, expected)
def test_apply(datetime_series, by_row):
result = datetime_series.apply(np.sqrt, by_row=by_row)
with np.errstate(all="ignore"):
expected = np.sqrt(datetime_series)
tm.assert_series_equal(result, expected)
# element-wise apply (ufunc)
result = datetime_series.apply(np.exp, by_row=by_row)
expected = np.exp(datetime_series)
tm.assert_series_equal(result, expected)
# empty series
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
rs = s.apply(lambda x: x, by_row=by_row)
tm.assert_series_equal(s, rs)
# check all metadata (GH 9322)
assert s is not rs
assert s.index is rs.index
assert s.dtype == rs.dtype
assert s.name == rs.name
# index but no data
s = Series(index=[1, 2, 3], dtype=np.float64)
rs = s.apply(lambda x: x, by_row=by_row)
tm.assert_series_equal(s, rs)
def test_apply_map_same_length_inference_bug():
s = Series([1, 2])
def f(x):
return (x, x + 1)
result = s.apply(f, by_row="compat")
expected = s.map(f)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("convert_dtype", [True, False])
def test_apply_convert_dtype_deprecated(convert_dtype):
ser = Series(np.random.default_rng(2).standard_normal(10))
def func(x):
return x if x > 0 else np.nan
with tm.assert_produces_warning(FutureWarning):
ser.apply(func, convert_dtype=convert_dtype, by_row="compat")
def test_apply_args():
s = Series(["foo,bar"])
result = s.apply(str.split, args=(",",))
assert result[0] == ["foo", "bar"]
assert isinstance(result[0], list)
@pytest.mark.parametrize(
"args, kwargs, increment",
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
)
def test_agg_args(args, kwargs, increment):
# GH 43357
def f(x, a=0, b=0, c=0):
return x + a + 10 * b + 100 * c
s = Series([1, 2])
msg = (
"in Series.agg cannot aggregate and has been deprecated. "
"Use Series.transform to keep behavior unchanged."
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.agg(f, 0, *args, **kwargs)
expected = s + increment
tm.assert_series_equal(result, expected)
def test_agg_mapping_func_deprecated():
# GH 53325
s = Series([1, 2, 3])
def foo1(x, a=1, c=0):
return x + a + c
def foo2(x, b=2, c=0):
return x + b + c
msg = "using .+ in Series.agg cannot aggregate and"
with tm.assert_produces_warning(FutureWarning, match=msg):
s.agg(foo1, 0, 3, c=4)
with tm.assert_produces_warning(FutureWarning, match=msg):
s.agg([foo1, foo2], 0, 3, c=4)
with tm.assert_produces_warning(FutureWarning, match=msg):
s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
def test_series_apply_map_box_timestamps(by_row):
# GH#2689, GH#2627
ser = Series(date_range("1/1/2000", periods=10))
def func(x):
return (x.hour, x.day, x.month)
if not by_row:
msg = "Series' object has no attribute 'hour'"
with pytest.raises(AttributeError, match=msg):
ser.apply(func, by_row=by_row)
return
result = ser.apply(func, by_row=by_row)
expected = ser.map(func)
tm.assert_series_equal(result, expected)
def test_apply_box_dt64():
# ufunc will not be boxed. Same test cases as the test_map_box
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
ser = Series(vals, dtype="M8[ns]")
assert ser.dtype == "datetime64[ns]"
# boxed value must be Timestamp instance
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
def test_apply_box_dt64tz():
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
ser = Series(vals, dtype="M8[ns, US/Eastern]")
assert ser.dtype == "datetime64[ns, US/Eastern]"
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
def test_apply_box_td64():
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
ser = Series(vals)
assert ser.dtype == "timedelta64[ns]"
res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
def test_apply_box_period():
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
ser = Series(vals)
assert ser.dtype == "Period[M]"
res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat")
exp = Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
def test_apply_datetimetz(by_row):
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
s = Series(values, name="XX")
result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row)
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
"Asia/Tokyo"
)
exp = Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row)
exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32")
tm.assert_series_equal(result, exp)
# not vectorized
def f(x):
return str(x.tz) if by_row else str(x.dt.tz)
result = s.apply(f, by_row=by_row)
if by_row:
exp = Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
else:
assert result == "Asia/Tokyo"
def test_apply_categorical(by_row, using_infer_string):
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
ser = Series(values, name="XX", index=list("abcdefg"))
if not by_row:
msg = "Series' object has no attribute 'lower"
with pytest.raises(AttributeError, match=msg):
ser.apply(lambda x: x.lower(), by_row=by_row)
assert ser.apply(lambda x: "A", by_row=by_row) == "A"
return
result = ser.apply(lambda x: x.lower(), by_row=by_row)
# should be categorical dtype when the number of categories are
# the same
values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
exp = Series(values, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
tm.assert_categorical_equal(result.values, exp.values)
result = ser.apply(lambda x: "A")
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
assert result.dtype == object if not using_infer_string else "string[pyarrow_numpy]"
@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
def test_apply_categorical_with_nan_values(series, by_row):
# GH 20714 bug fixed in: GH 24275
s = Series(series, dtype="category")
if not by_row:
msg = "'Series' object has no attribute 'split'"
with pytest.raises(AttributeError, match=msg):
s.apply(lambda x: x.split("-")[0], by_row=by_row)
return
result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
result = result.astype(object)
expected = Series(["1", "1", np.nan], dtype="category")
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
def test_apply_empty_integer_series_with_datetime_index(by_row):
# GH 21245
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
result = s.apply(lambda x: x, by_row=by_row)
tm.assert_series_equal(result, s)
def test_apply_dataframe_iloc():
uintDF = DataFrame(np.uint64([1, 2, 3, 4, 5]), columns=["Numbers"])
indexDF = DataFrame([2, 3, 2, 1, 2], columns=["Indices"])
def retrieve(targetRow, targetDF):
val = targetDF["Numbers"].iloc[targetRow]
return val
result = indexDF["Indices"].apply(retrieve, args=(uintDF,))
expected = Series([3, 4, 3, 2, 3], name="Indices", dtype="uint64")
tm.assert_series_equal(result, expected)
def test_transform(string_series, by_row):
# transforming functions
with np.errstate(all="ignore"):
f_sqrt = np.sqrt(string_series)
f_abs = np.abs(string_series)
# ufunc
result = string_series.apply(np.sqrt, by_row=by_row)
expected = f_sqrt.copy()
tm.assert_series_equal(result, expected)
# list-like
result = string_series.apply([np.sqrt], by_row=by_row)
expected = f_sqrt.to_frame().copy()
expected.columns = ["sqrt"]
tm.assert_frame_equal(result, expected)
result = string_series.apply(["sqrt"], by_row=by_row)
tm.assert_frame_equal(result, expected)
# multiple items in list
# these are in the order as if we are applying both functions per
# series and then concatting
expected = concat([f_sqrt, f_abs], axis=1)
expected.columns = ["sqrt", "absolute"]
result = string_series.apply([np.sqrt, np.abs], by_row=by_row)
tm.assert_frame_equal(result, expected)
# dict, provide renaming
expected = concat([f_sqrt, f_abs], axis=1)
expected.columns = ["foo", "bar"]
expected = expected.unstack().rename("series")
result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row)
tm.assert_series_equal(result.reindex_like(expected), expected)
@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_partial_failure(op, request):
# GH 35964
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
request.applymarker(
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
)
# Using object makes most transform kernels fail
ser = Series(3 * [object])
if op in ("fillna", "ngroup"):
error = ValueError
msg = "Transform function failed"
else:
error = TypeError
msg = "|".join(
[
"not supported between instances of 'type' and 'type'",
"unsupported operand type",
]
)
with pytest.raises(error, match=msg):
ser.transform([op, "shift"])
with pytest.raises(error, match=msg):
ser.transform({"A": op, "B": "shift"})
with pytest.raises(error, match=msg):
ser.transform({"A": [op], "B": ["shift"]})
with pytest.raises(error, match=msg):
ser.transform({"A": [op, "shift"], "B": [op]})
def test_transform_partial_failure_valueerror():
# GH 40211
def noop(x):
return x
def raising_op(_):
raise ValueError
ser = Series(3 * [object])
msg = "Transform function failed"
with pytest.raises(ValueError, match=msg):
ser.transform([noop, raising_op])
with pytest.raises(ValueError, match=msg):
ser.transform({"A": raising_op, "B": noop})
with pytest.raises(ValueError, match=msg):
ser.transform({"A": [raising_op], "B": [noop]})
with pytest.raises(ValueError, match=msg):
ser.transform({"A": [noop, raising_op], "B": [noop]})
def test_demo():
# demonstration tests
s = Series(range(6), dtype="int64", name="series")
result = s.agg(["min", "max"])
expected = Series([0, 5], index=["min", "max"], name="series")
tm.assert_series_equal(result, expected)
result = s.agg({"foo": "min"})
expected = Series([0], index=["foo"], name="series")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
# test that we are evaluating row-by-row first if by_row="compat"
# else vectorized evaluation
result = string_series.apply(func, by_row=by_row)
if by_row:
expected = string_series.map(func)
tm.assert_series_equal(result, expected)
else:
assert result == str(string_series)
def test_agg_evaluate_lambdas(string_series):
# GH53325
# in the future, the result will be a Series class.
with tm.assert_produces_warning(FutureWarning):
result = string_series.agg(lambda x: type(x))
assert isinstance(result, Series) and len(result) == len(string_series)
with tm.assert_produces_warning(FutureWarning):
result = string_series.agg(type)
assert isinstance(result, Series) and len(result) == len(string_series)
@pytest.mark.parametrize("op_name", ["agg", "apply"])
def test_with_nested_series(datetime_series, op_name):
# GH 2316
# .agg with a reducer and a transform, what to do
msg = "cannot aggregate"
warning = FutureWarning if op_name == "agg" else None
with tm.assert_produces_warning(warning, match=msg):
# GH52123
result = getattr(datetime_series, op_name)(
lambda x: Series([x, x**2], index=["x", "x^2"])
)
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
tm.assert_frame_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
tm.assert_frame_equal(result, expected)
def test_replicate_describe(string_series):
# this also tests a result set that is all scalars
expected = string_series.describe()
result = string_series.apply(
{
"count": "count",
"mean": "mean",
"std": "std",
"min": "min",
"25%": lambda x: x.quantile(0.25),
"50%": "median",
"75%": lambda x: x.quantile(0.75),
"max": "max",
},
)
tm.assert_series_equal(result, expected)
def test_reduce(string_series):
# reductions with named functions
result = string_series.agg(["sum", "mean"])
expected = Series(
[string_series.sum(), string_series.mean()],
["sum", "mean"],
name=string_series.name,
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"how, kwds",
[("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})],
)
def test_non_callable_aggregates(how, kwds):
# test agg using non-callable series attributes
# GH 39116 - expand to apply
s = Series([1, 2, None])
# Calling agg w/ just a string arg same as calling s.arg
result = getattr(s, how)("size", **kwds)
expected = s.size
assert result == expected
# test when mixed w/ callable reducers
result = getattr(s, how)(["size", "count", "mean"], **kwds)
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
tm.assert_series_equal(result, expected)
result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds)
tm.assert_series_equal(result, expected)
def test_series_apply_no_suffix_index(by_row):
# GH36189
s = Series([4] * 3)
result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row)
expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"dti,exp",
[
(
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
),
(
Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
),
DataFrame(np.repeat([[1, 2]], 10, axis=0), dtype="int64"),
),
],
)
@pytest.mark.parametrize("aware", [True, False])
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
# GH 25959
# Calling apply on a localized time series should not cause an error
if aware:
index = dti.tz_localize("UTC").index
else:
index = dti.index
result = Series(index).apply(lambda x: Series([1, 2]))
tm.assert_frame_equal(result, exp)
@pytest.mark.parametrize(
"by_row, expected", [("compat", Series(np.ones(10), dtype="int64")), (False, 1)]
)
def test_apply_scalar_on_date_time_index_aware_series(by_row, expected):
# GH 25959
# Calling apply on a localized time series should not cause an error
series = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10, tz="UTC"),
)
result = Series(series.index).apply(lambda x: 1, by_row=by_row)
tm.assert_equal(result, expected)
def test_apply_to_timedelta(by_row):
list_of_valid_strings = ["00:00:01", "00:00:02"]
a = pd.to_timedelta(list_of_valid_strings)
b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row)
tm.assert_series_equal(Series(a), b)
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
a = pd.to_timedelta(list_of_strings)
ser = Series(list_of_strings)
b = ser.apply(pd.to_timedelta, by_row=by_row)
tm.assert_series_equal(Series(a), b)
@pytest.mark.parametrize(
"ops, names",
[
([np.sum], ["sum"]),
([np.sum, np.mean], ["sum", "mean"]),
(np.array([np.sum]), ["sum"]),
(np.array([np.sum, np.mean]), ["sum", "mean"]),
],
)
@pytest.mark.parametrize(
"how, kwargs",
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
)
def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
# GH 39140
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
expected.name = "series"
warn = FutureWarning if how == "agg" else None
msg = f"using Series.[{'|'.join(names)}]"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(string_series, how)(ops, **kwargs)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ops",
[
{"A": np.sum},
{"A": np.sum, "B": np.mean},
Series({"A": np.sum}),
Series({"A": np.sum, "B": np.mean}),
],
)
@pytest.mark.parametrize(
"how, kwargs",
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
)
def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
# GH 39140
expected = Series({name: op(string_series) for name, op in ops.items()})
expected.name = string_series.name
warn = FutureWarning if how == "agg" else None
msg = "using Series.[sum|mean]"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(string_series, how)(ops, **kwargs)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_apply_listlike_transformer(string_series, ops, names, by_row):
# GH 39140
with np.errstate(all="ignore"):
expected = concat([op(string_series) for op in ops], axis=1)
expected.columns = names
result = string_series.apply(ops, by_row=by_row)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"ops, expected",
[
([lambda x: x], DataFrame({"<lambda>": [1, 2, 3]})),
([lambda x: x.sum()], Series([6], index=["<lambda>"])),
],
)
def test_apply_listlike_lambda(ops, expected, by_row):
# GH53400
ser = Series([1, 2, 3])
result = ser.apply(ops, by_row=by_row)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"ops",
[
{"A": np.sqrt},
{"A": np.sqrt, "B": np.exp},
Series({"A": np.sqrt}),
Series({"A": np.sqrt, "B": np.exp}),
],
)
def test_apply_dictlike_transformer(string_series, ops, by_row):
# GH 39140
with np.errstate(all="ignore"):
expected = concat({name: op(string_series) for name, op in ops.items()})
expected.name = string_series.name
result = string_series.apply(ops, by_row=by_row)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ops, expected",
[
(
{"a": lambda x: x},
Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])),
),
({"a": lambda x: x.sum()}, Series([6], index=["a"])),
],
)
def test_apply_dictlike_lambda(ops, by_row, expected):
# GH53400
ser = Series([1, 2, 3])
result = ser.apply(ops, by_row=by_row)
tm.assert_equal(result, expected)
def test_apply_retains_column_name(by_row):
# GH 16380
df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
expected = DataFrame(
[[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
columns=Index(range(3), name="y"),
index=Index(range(3), name="x"),
)
tm.assert_frame_equal(result, expected)
def test_apply_type():
# GH 46719
s = Series([3, "string", float], index=["a", "b", "c"])
result = s.apply(type)
expected = Series([int, str, type], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)
def test_series_apply_unpack_nested_data():
# GH#55189
ser = Series([[1, 2, 3], [4, 5, 6, 7]])
result = ser.apply(lambda x: Series(x))
expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]})
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,39 @@
import pandas as pd
import pandas._testing as tm
def test_relabel_no_duplicated_method():
# this is to test there is no duplicated method used in agg
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
result = df["A"].agg(foo="sum")
expected = df["A"].agg({"foo": "sum"})
tm.assert_series_equal(result, expected)
result = df["B"].agg(foo="min", bar="max")
expected = df["B"].agg({"foo": "min", "bar": "max"})
tm.assert_series_equal(result, expected)
msg = "using Series.[sum|min|max]"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df["B"].agg(foo=sum, bar=min, cat="max")
msg = "using Series.[sum|min|max]"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
tm.assert_series_equal(result, expected)
def test_relabel_duplicated_method():
# this is to test with nested renaming, duplicated method can be used
# if they are assigned with different new names
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
result = df["A"].agg(foo="sum", bar="sum")
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
tm.assert_series_equal(result, expected)
msg = "using Series.min"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df["B"].agg(foo=min, bar="min")
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,84 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
concat,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"args, kwargs, increment",
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
)
def test_agg_args(args, kwargs, increment):
# GH 43357
def f(x, a=0, b=0, c=0):
return x + a + 10 * b + 100 * c
s = Series([1, 2])
result = s.transform(f, 0, *args, **kwargs)
expected = s + increment
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_transform_listlike(string_series, ops, names):
# GH 35964
with np.errstate(all="ignore"):
expected = concat([op(string_series) for op in ops], axis=1)
expected.columns = names
result = string_series.transform(ops)
tm.assert_frame_equal(result, expected)
def test_transform_listlike_func_with_args():
# GH 50624
s = Series([1, 2, 3])
def foo1(x, a=1, c=0):
return x + a + c
def foo2(x, b=2, c=0):
return x + b + c
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
with pytest.raises(TypeError, match=msg):
s.transform([foo1, foo2], 0, 3, b=3, c=4)
result = s.transform([foo1, foo2], 0, 3, c=4)
expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(string_series, box):
# GH 35964
with np.errstate(all="ignore"):
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
expected.columns = ["foo", "bar"]
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
tm.assert_frame_equal(result, expected)
def test_transform_dictlike_mixed():
# GH 40018 - mix of lists and non-lists in values of a dictionary
df = Series([1, 4])
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
expected = DataFrame(
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,326 @@
from itertools import chain
import operator
import numpy as np
import pytest
from pandas.core.dtypes.common import is_number
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
from pandas.tests.apply.common import (
frame_transform_kernels,
series_transform_kernels,
)
@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
@pytest.mark.parametrize(
"args,kwds",
[
pytest.param([], {}, id="no_args_or_kwds"),
pytest.param([1], {}, id="axis_from_args"),
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
],
)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
if len(args) > 1 and how == "agg":
request.applymarker(
pytest.mark.xfail(
raises=TypeError,
reason="agg/apply signature mismatch - agg passes 2nd "
"argument to func",
)
)
result = getattr(float_frame, how)(func, *args, **kwds)
expected = getattr(float_frame, func)(*args, **kwds)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"])
def test_with_string_args(datetime_series, arg):
result = datetime_series.apply(arg)
expected = getattr(datetime_series, arg)()
assert result == expected
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_np_reducer(op, how):
# GH 39116
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
result = getattr(float_frame, how)(op)
# pandas ddof defaults to 1, numpy to 0
kwargs = {"ddof": 1} if op in ("std", "var") else {}
expected = Series(
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
)
@pytest.mark.parametrize("how", ["transform", "apply"])
def test_apply_np_transformer(float_frame, op, how):
# GH 39116
# float_frame will _usually_ have negative values, which will
# trigger the warning here, but let's put one in just to be sure
float_frame.iloc[0, 0] = -1.0
warn = None
if op in ["log", "sqrt"]:
warn = RuntimeWarning
with tm.assert_produces_warning(warn, check_stacklevel=False):
# float_frame fixture is defined in conftest.py, so we don't check the
# stacklevel as otherwise the test would fail.
result = getattr(float_frame, how)(op)
expected = getattr(np, op)(float_frame)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"series, func, expected",
chain(
tm.get_cython_table_params(
Series(dtype=np.float64),
[
("sum", 0),
("max", np.nan),
("min", np.nan),
("all", True),
("any", False),
("mean", np.nan),
("prod", 1),
("std", np.nan),
("var", np.nan),
("median", np.nan),
],
),
tm.get_cython_table_params(
Series([np.nan, 1, 2, 3]),
[
("sum", 6),
("max", 3),
("min", 1),
("all", True),
("any", True),
("mean", 2),
("prod", 6),
("std", 1),
("var", 1),
("median", 2),
],
),
tm.get_cython_table_params(
Series("a b c".split()),
[
("sum", "abc"),
("max", "c"),
("min", "a"),
("all", True),
("any", True),
],
),
),
)
def test_agg_cython_table_series(series, func, expected):
# GH21224
# test reducing functions in
# pandas.core.base.SelectionMixin._cython_table
warn = None if isinstance(func, str) else FutureWarning
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
result = series.agg(func)
if is_number(expected):
assert np.isclose(result, expected, equal_nan=True)
else:
assert result == expected
@pytest.mark.parametrize(
"series, func, expected",
chain(
tm.get_cython_table_params(
Series(dtype=np.float64),
[
("cumprod", Series([], dtype=np.float64)),
("cumsum", Series([], dtype=np.float64)),
],
),
tm.get_cython_table_params(
Series([np.nan, 1, 2, 3]),
[
("cumprod", Series([np.nan, 1, 2, 6])),
("cumsum", Series([np.nan, 1, 3, 6])),
],
),
tm.get_cython_table_params(
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
),
),
)
def test_agg_cython_table_transform_series(series, func, expected):
# GH21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
warn = None if isinstance(func, str) else FutureWarning
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
result = series.agg(func)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"df, func, expected",
chain(
tm.get_cython_table_params(
DataFrame(),
[
("sum", Series(dtype="float64")),
("max", Series(dtype="float64")),
("min", Series(dtype="float64")),
("all", Series(dtype=bool)),
("any", Series(dtype=bool)),
("mean", Series(dtype="float64")),
("prod", Series(dtype="float64")),
("std", Series(dtype="float64")),
("var", Series(dtype="float64")),
("median", Series(dtype="float64")),
],
),
tm.get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("sum", Series([1.0, 3])),
("max", Series([1.0, 2])),
("min", Series([1.0, 1])),
("all", Series([True, True])),
("any", Series([True, True])),
("mean", Series([1, 1.5])),
("prod", Series([1.0, 2])),
("std", Series([np.nan, 0.707107])),
("var", Series([np.nan, 0.5])),
("median", Series([1, 1.5])),
],
),
),
)
def test_agg_cython_table_frame(df, func, expected, axis):
# GH 21224
# test reducing functions in
# pandas.core.base.SelectionMixin._cython_table
warn = None if isinstance(func, str) else FutureWarning
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
# GH#53425
result = df.agg(func, axis=axis)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"df, func, expected",
chain(
tm.get_cython_table_params(
DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
),
tm.get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
],
),
),
)
def test_agg_cython_table_transform_frame(df, func, expected, axis):
# GH 21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
if axis in ("columns", 1):
# operating blockwise doesn't let us preserve dtypes
expected = expected.astype("float64")
warn = None if isinstance(func, str) else FutureWarning
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
# GH#53425
result = df.agg(func, axis=axis)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_groupby_kernel_series(request, string_series, op):
# GH 35964
if op == "ngroup":
request.applymarker(
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
)
args = [0.0] if op == "fillna" else []
ones = np.ones(string_series.shape[0])
warn = FutureWarning if op == "fillna" else None
msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
expected = string_series.groupby(ones).transform(op, *args)
result = string_series.transform(op, 0, *args)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("op", frame_transform_kernels)
def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
if op == "ngroup":
request.applymarker(
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
)
# GH 35964
args = [0.0] if op == "fillna" else []
if axis in (0, "index"):
ones = np.ones(float_frame.shape[0])
msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
else:
ones = np.ones(float_frame.shape[1])
msg = "DataFrame.groupby with axis=1 is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
gb = float_frame.groupby(ones, axis=axis)
warn = FutureWarning if op == "fillna" else None
op_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=op_msg):
expected = gb.transform(op, *args)
result = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result, expected)
# same thing, but ensuring we have multiple blocks
assert "E" not in float_frame.columns
float_frame["E"] = float_frame["A"].copy()
assert len(float_frame._mgr.arrays) > 1
if axis in (0, "index"):
ones = np.ones(float_frame.shape[0])
else:
ones = np.ones(float_frame.shape[1])
with tm.assert_produces_warning(FutureWarning, match=msg):
gb2 = float_frame.groupby(ones, axis=axis)
warn = FutureWarning if op == "fillna" else None
op_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=op_msg):
expected2 = gb2.transform(op, *args)
result2 = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result2, expected2)
@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
def test_transform_method_name(method):
# GH 19760
df = DataFrame({"A": [-1, 2]})
result = df.transform(method)
expected = operator.methodcaller(method)(df)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,155 @@
"""
Assertion helpers for arithmetic tests.
"""
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
Series,
array,
)
import pandas._testing as tm
from pandas.core.arrays import (
BooleanArray,
NumpyExtensionArray,
)
def assert_cannot_add(left, right, msg="cannot add"):
"""
Helper to assert that left and right cannot be added.
Parameters
----------
left : object
right : object
msg : str, default "cannot add"
"""
with pytest.raises(TypeError, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
right + left
def assert_invalid_addsub_type(left, right, msg=None):
"""
Helper to assert that left and right can be neither added nor subtracted.
Parameters
----------
left : object
right : object
msg : str or None, default None
"""
with pytest.raises(TypeError, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
right + left
with pytest.raises(TypeError, match=msg):
left - right
with pytest.raises(TypeError, match=msg):
right - left
def get_upcast_box(left, right, is_cmp: bool = False):
"""
Get the box to use for 'expected' in an arithmetic or comparison operation.
Parameters
left : Any
right : Any
is_cmp : bool, default False
Whether the operation is a comparison method.
"""
if isinstance(left, DataFrame) or isinstance(right, DataFrame):
return DataFrame
if isinstance(left, Series) or isinstance(right, Series):
if is_cmp and isinstance(left, Index):
# Index does not defer for comparisons
return np.array
return Series
if isinstance(left, Index) or isinstance(right, Index):
if is_cmp:
return np.array
return Index
return tm.to_array
def assert_invalid_comparison(left, right, box):
"""
Assert that comparison operations with mismatched types behave correctly.
Parameters
----------
left : np.ndarray, ExtensionArray, Index, or Series
right : object
box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
"""
# Not for tznaive-tzaware comparison
# Note: not quite the same as how we do this for tm.box_expected
xbox = box if box not in [Index, array] else np.array
def xbox2(x):
# Eventually we'd like this to be tighter, but for now we'll
# just exclude NumpyExtensionArray[bool]
if isinstance(x, NumpyExtensionArray):
return x._ndarray
if isinstance(x, BooleanArray):
# NB: we are assuming no pd.NAs for now
return x.astype(bool)
return x
# rev_box: box to use for reversed comparisons
rev_box = xbox
if isinstance(right, Index) and isinstance(left, Series):
rev_box = np.array
result = xbox2(left == right)
expected = xbox(np.zeros(result.shape, dtype=np.bool_))
tm.assert_equal(result, expected)
result = xbox2(right == left)
tm.assert_equal(result, rev_box(expected))
result = xbox2(left != right)
tm.assert_equal(result, ~expected)
result = xbox2(right != left)
tm.assert_equal(result, rev_box(~expected))
msg = "|".join(
[
"Invalid comparison between",
"Cannot compare type",
"not supported between",
"invalid type promotion",
(
# GH#36706 npdev 1.20.0 2020-09-28
r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
"For example they cannot be stored in a single array unless the "
"dtype is `object`."
),
]
)
with pytest.raises(TypeError, match=msg):
left < right
with pytest.raises(TypeError, match=msg):
left <= right
with pytest.raises(TypeError, match=msg):
left > right
with pytest.raises(TypeError, match=msg):
left >= right
with pytest.raises(TypeError, match=msg):
right < left
with pytest.raises(TypeError, match=msg):
right <= left
with pytest.raises(TypeError, match=msg):
right > left
with pytest.raises(TypeError, match=msg):
right >= left

View File

@ -0,0 +1,139 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
"""
Several variants of integer value 1. The zero-dim integer array
behaves like an integer.
This fixture can be used to check that datetimelike indexes handle
addition and subtraction of integers and zero-dimensional arrays
of integers.
Examples
--------
dti = pd.date_range('2016-01-01', periods=2, freq='h')
dti
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'],
dtype='datetime64[ns]', freq='h')
dti + one
DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'],
dtype='datetime64[ns]', freq='h')
"""
return request.param
zeros = [
box_cls([0] * 5, dtype=dtype)
for box_cls in [Index, np.array, pd.array]
for dtype in [np.int64, np.uint64, np.float64]
]
zeros.extend([box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [Index, np.array]])
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([np.array(-0.0, dtype=np.float64)])
zeros.extend([0, 0.0, -0.0])
@pytest.fixture(params=zeros)
def zero(request):
"""
Several types of scalar zeros and length 5 vectors of zeros.
This fixture can be used to check that numeric-dtype indexes handle
division by any zero numeric-dtype.
Uses vector of length 5 for broadcasting with `numeric_idx` fixture,
which creates numeric-dtype vectors also of length 5.
Examples
--------
arr = RangeIndex(5)
arr / zeros
Index([nan, inf, inf, inf, inf], dtype='float64')
"""
return request.param
# ------------------------------------------------------------------
# Scalar Fixtures
@pytest.fixture(
params=[
pd.Timedelta("10m7s").to_pytimedelta(),
pd.Timedelta("10m7s"),
pd.Timedelta("10m7s").to_timedelta64(),
],
ids=lambda x: type(x).__name__,
)
def scalar_td(request):
"""
Several variants of Timedelta scalars representing 10 minutes and 7 seconds.
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Day(3),
pd.offsets.Hour(72),
pd.Timedelta(days=3).to_pytimedelta(),
pd.Timedelta("72:00:00"),
np.timedelta64(3, "D"),
np.timedelta64(72, "h"),
],
ids=lambda x: type(x).__name__,
)
def three_days(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 3-day timedelta
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Hour(2),
pd.offsets.Minute(120),
pd.Timedelta(hours=2).to_pytimedelta(),
pd.Timedelta(seconds=2 * 3600),
np.timedelta64(2, "h"),
np.timedelta64(120, "m"),
],
ids=lambda x: type(x).__name__,
)
def two_hours(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 2-hour timedelta
"""
return request.param
_common_mismatch = [
pd.offsets.YearBegin(2),
pd.offsets.MonthBegin(1),
pd.offsets.Minute(),
]
@pytest.fixture(
params=[
np.timedelta64(4, "h"),
pd.Timedelta(hours=23).to_pytimedelta(),
pd.Timedelta("23:00:00"),
]
+ _common_mismatch
)
def not_daily(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Daily frequencies.
"""
return request.param

View File

@ -0,0 +1,39 @@
import operator
import numpy as np
import pytest
import pandas._testing as tm
from pandas.core.ops.array_ops import (
comparison_op,
na_logical_op,
)
def test_na_logical_op_2d():
left = np.arange(8).reshape(4, 2)
right = left.astype(object)
right[0, 0] = np.nan
# Check that we fall back to the vec_binop branch
with pytest.raises(TypeError, match="unsupported operand type"):
operator.or_(left, right)
result = na_logical_op(left, right, operator.or_)
expected = right
tm.assert_numpy_array_equal(result, expected)
def test_object_comparison_2d():
left = np.arange(9).reshape(3, 3).astype(object)
right = left.T
result = comparison_op(left, right, operator.eq)
expected = np.eye(3).astype(bool)
tm.assert_numpy_array_equal(result, expected)
# Ensure that cython doesn't raise on non-writeable arg, which
# we can get from np.broadcast_to
right.flags.writeable = False
result = comparison_op(left, right, operator.ne)
tm.assert_numpy_array_equal(result, ~expected)

View File

@ -0,0 +1,25 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
class TestCategoricalComparisons:
def test_categorical_nan_equality(self):
cat = Series(Categorical(["a", "b", "c", np.nan]))
expected = Series([True, True, True, False])
result = cat == cat
tm.assert_series_equal(result, expected)
def test_categorical_tuple_equality(self):
# GH 18050
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
expected = Series([True, False, True, False, False])
result = ser == (0, 0)
tm.assert_series_equal(result, expected)
result = ser.astype("category") == (0, 0)
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,306 @@
import operator
import numpy as np
import pytest
from pandas.core.dtypes.common import is_list_like
import pandas as pd
from pandas import (
Categorical,
Index,
Interval,
IntervalIndex,
Period,
Series,
Timedelta,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
BooleanArray,
IntervalArray,
)
from pandas.tests.arithmetic.common import get_upcast_box
@pytest.fixture(
params=[
(Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
(Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
(
timedelta_range("0 days", periods=3).insert(3, pd.NaT),
timedelta_range("1 day", periods=3).insert(3, pd.NaT),
),
(
date_range("20170101", periods=3).insert(3, pd.NaT),
date_range("20170102", periods=3).insert(3, pd.NaT),
),
(
date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT),
date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT),
),
],
ids=lambda x: str(x[0].dtype),
)
def left_right_dtypes(request):
"""
Fixture for building an IntervalArray from various dtypes
"""
return request.param
@pytest.fixture
def interval_array(left_right_dtypes):
"""
Fixture to generate an IntervalArray of various dtypes containing NA if possible
"""
left, right = left_right_dtypes
return IntervalArray.from_arrays(left, right)
def create_categorical_intervals(left, right, closed="right"):
return Categorical(IntervalIndex.from_arrays(left, right, closed))
def create_series_intervals(left, right, closed="right"):
return Series(IntervalArray.from_arrays(left, right, closed))
def create_series_categorical_intervals(left, right, closed="right"):
return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
class TestComparison:
@pytest.fixture(params=[operator.eq, operator.ne])
def op(self, request):
return request.param
@pytest.fixture(
params=[
IntervalArray.from_arrays,
IntervalIndex.from_arrays,
create_categorical_intervals,
create_series_intervals,
create_series_categorical_intervals,
],
ids=[
"IntervalArray",
"IntervalIndex",
"Categorical[Interval]",
"Series[Interval]",
"Series[Categorical[Interval]]",
],
)
def interval_constructor(self, request):
"""
Fixture for all pandas native interval constructors.
To be used as the LHS of IntervalArray comparisons.
"""
return request.param
def elementwise_comparison(self, op, interval_array, other):
"""
Helper that performs elementwise comparisons between `array` and `other`
"""
other = other if is_list_like(other) else [other] * len(interval_array)
expected = np.array([op(x, y) for x, y in zip(interval_array, other)])
if isinstance(other, Series):
return Series(expected, index=other.index)
return expected
def test_compare_scalar_interval(self, op, interval_array):
# matches first interval
other = interval_array[0]
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
# matches on a single endpoint but not both
other = Interval(interval_array.left[0], interval_array.right[1])
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed):
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
other = Interval(0, 1, closed=other_closed)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_scalar_na(self, op, interval_array, nulls_fixture, box_with_array):
box = box_with_array
obj = tm.box_expected(interval_array, box)
result = op(obj, nulls_fixture)
if nulls_fixture is pd.NA:
# GH#31882
exp = np.ones(interval_array.shape, dtype=bool)
expected = BooleanArray(exp, exp)
else:
expected = self.elementwise_comparison(op, interval_array, nulls_fixture)
if not (box is Index and nulls_fixture is pd.NA):
# don't cast expected from BooleanArray to ndarray[object]
xbox = get_upcast_box(obj, nulls_fixture, True)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)
rev = op(nulls_fixture, obj)
tm.assert_equal(rev, expected)
@pytest.mark.parametrize(
"other",
[
0,
1.0,
True,
"foo",
Timestamp("2017-01-01"),
Timestamp("2017-01-01", tz="US/Eastern"),
Timedelta("0 days"),
Period("2017-01-01", "D"),
],
)
def test_compare_scalar_other(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_list_like_interval(self, op, interval_array, interval_constructor):
# same endpoints
other = interval_constructor(interval_array.left, interval_array.right)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
# different endpoints
other = interval_constructor(
interval_array.left[::-1], interval_array.right[::-1]
)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
# all nan endpoints
other = interval_constructor([np.nan] * 4, [np.nan] * 4)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
def test_compare_list_like_interval_mixed_closed(
self, op, interval_constructor, closed, other_closed
):
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
other = interval_constructor(range(2), range(1, 3), closed=other_closed)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"other",
[
(
Interval(0, 1),
Interval(Timedelta("1 day"), Timedelta("2 days")),
Interval(4, 5, "both"),
Interval(10, 20, "neither"),
),
(0, 1.5, Timestamp("20170103"), np.nan),
(
Timestamp("20170102", tz="US/Eastern"),
Timedelta("2 days"),
"baz",
pd.NaT,
),
],
)
def test_compare_list_like_object(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_list_like_nan(self, op, interval_array, nulls_fixture):
other = [nulls_fixture] * 4
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"other",
[
np.arange(4, dtype="int64"),
np.arange(4, dtype="float64"),
date_range("2017-01-01", periods=4),
date_range("2017-01-01", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
period_range("2017-01-01", periods=4, freq="D"),
Categorical(list("abab")),
Categorical(date_range("2017-01-01", periods=4)),
pd.array(list("abcd")),
pd.array(["foo", 3.14, None, object()], dtype=object),
],
ids=lambda x: str(x.dtype),
)
def test_compare_list_like_other(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("length", [1, 3, 5])
@pytest.mark.parametrize("other_constructor", [IntervalArray, list])
def test_compare_length_mismatch_errors(self, op, other_constructor, length):
interval_array = IntervalArray.from_arrays(range(4), range(1, 5))
other = other_constructor([Interval(0, 1)] * length)
with pytest.raises(ValueError, match="Lengths must match to compare"):
op(interval_array, other)
@pytest.mark.parametrize(
"constructor, expected_type, assert_func",
[
(IntervalIndex, np.array, tm.assert_numpy_array_equal),
(Series, Series, tm.assert_series_equal),
],
)
def test_index_series_compat(self, op, constructor, expected_type, assert_func):
# IntervalIndex/Series that rely on IntervalArray for comparisons
breaks = range(4)
index = constructor(IntervalIndex.from_breaks(breaks))
# scalar comparisons
other = index[0]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
other = breaks[0]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
# list-like comparisons
other = IntervalArray.from_breaks(breaks)
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
other = [index[0], breaks[0], "foo"]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
@pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
def test_comparison_operations(self, scalars):
# GH #28981
expected = Series([False, False])
s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
result = s == scalars
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,420 @@
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
# behave identically.
# Specifically for object dtype
import datetime
from decimal import Decimal
import operator
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Series,
Timestamp,
option_context,
)
import pandas._testing as tm
from pandas.core import ops
# ------------------------------------------------------------------
# Comparisons
class TestObjectComparisons:
def test_comparison_object_numeric_nas(self, comparison_op):
ser = Series(np.random.default_rng(2).standard_normal(10), dtype=object)
shifted = ser.shift(2)
func = comparison_op
result = func(ser, shifted)
expected = func(ser.astype(float), shifted.astype(float))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
)
def test_object_comparisons(self, infer_string):
with option_context("future.infer_string", infer_string):
ser = Series(["a", "b", np.nan, "c", "a"])
result = ser == "a"
expected = Series([True, False, False, False, True])
tm.assert_series_equal(result, expected)
result = ser < "a"
expected = Series([False, False, False, False, False])
tm.assert_series_equal(result, expected)
result = ser != "a"
expected = -(ser == "a")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [None, object])
def test_more_na_comparisons(self, dtype):
left = Series(["a", np.nan, "c"], dtype=dtype)
right = Series(["a", np.nan, "d"], dtype=dtype)
result = left == right
expected = Series([True, False, False])
tm.assert_series_equal(result, expected)
result = left != right
expected = Series([False, True, True])
tm.assert_series_equal(result, expected)
result = left == np.nan
expected = Series([False, False, False])
tm.assert_series_equal(result, expected)
result = left != np.nan
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Arithmetic
class TestArithmetic:
def test_add_period_to_array_of_offset(self):
# GH#50162
per = pd.Period("2012-1-1", freq="D")
pi = pd.period_range("2012-1-1", periods=10, freq="D")
idx = per - pi
expected = pd.Index([x + per for x in idx], dtype=object)
result = idx + per
tm.assert_index_equal(result, expected)
result = per + idx
tm.assert_index_equal(result, expected)
# TODO: parametrize
def test_pow_ops_object(self):
# GH#22922
# pow is weird with masking & 1, so testing here
a = Series([1, np.nan, 1, np.nan], dtype=object)
b = Series([1, np.nan, np.nan, 1], dtype=object)
result = a**b
expected = Series(a.values**b.values, dtype=object)
tm.assert_series_equal(result, expected)
result = b**a
expected = Series(b.values**a.values, dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("op", [operator.add, ops.radd])
@pytest.mark.parametrize("other", ["category", "Int64"])
def test_add_extension_scalar(self, other, box_with_array, op):
# GH#22378
# Check that scalars satisfying is_extension_array_dtype(obj)
# do not incorrectly try to dispatch to an ExtensionArray operation
arr = Series(["a", "b", "c"])
expected = Series([op(x, other) for x in arr])
arr = tm.box_expected(arr, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = op(arr, other)
tm.assert_equal(result, expected)
def test_objarr_add_str(self, box_with_array):
ser = Series(["x", np.nan, "x"])
expected = Series(["xa", np.nan, "xa"])
ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = ser + "a"
tm.assert_equal(result, expected)
def test_objarr_radd_str(self, box_with_array):
ser = Series(["x", np.nan, "x"])
expected = Series(["ax", np.nan, "ax"])
ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = "a" + ser
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
[1, 2, 3],
[1.1, 2.2, 3.3],
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
["x", "y", 1],
],
)
@pytest.mark.parametrize("dtype", [None, object])
def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
ser = Series(data, dtype=dtype)
ser = tm.box_expected(ser, box_with_array)
msg = "|".join(
[
"can only concatenate str",
"did not contain a loop with signature matching types",
"unsupported operand type",
"must be str",
]
)
with pytest.raises(TypeError, match=msg):
"foo_" + ser
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
def test_objarr_add_invalid(self, op, box_with_array):
# invalid ops
box = box_with_array
obj_ser = Series(list("abc"), dtype=object, name="objects")
obj_ser = tm.box_expected(obj_ser, box)
msg = "|".join(
[
"can only concatenate str",
"unsupported operand type",
"must be str",
"has no kernel",
]
)
with pytest.raises(Exception, match=msg):
op(obj_ser, 1)
with pytest.raises(Exception, match=msg):
op(obj_ser, np.array(1, dtype=np.int64))
# TODO: Moved from tests.series.test_operators; needs cleanup
def test_operators_na_handling(self):
ser = Series(["foo", "bar", "baz", np.nan])
result = "prefix_" + ser
expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
tm.assert_series_equal(result, expected)
result = ser + "_suffix"
expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
tm.assert_series_equal(result, expected)
# TODO: parametrize over box
@pytest.mark.parametrize("dtype", [None, object])
def test_series_with_dtype_radd_timedelta(self, dtype):
# note this test is _not_ aimed at timedelta64-dtyped Series
# as of 2.0 we retain object dtype when ser.dtype == object
ser = Series(
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
dtype=dtype,
)
expected = Series(
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
dtype=dtype,
)
result = pd.Timedelta("3 days") + ser
tm.assert_series_equal(result, expected)
result = ser + pd.Timedelta("3 days")
tm.assert_series_equal(result, expected)
# TODO: cleanup & parametrize over box
def test_mixed_timezone_series_ops_object(self):
# GH#13043
ser = Series(
[
Timestamp("2015-01-01", tz="US/Eastern"),
Timestamp("2015-01-01", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser.dtype == object
exp = Series(
[
Timestamp("2015-01-02", tz="US/Eastern"),
Timestamp("2015-01-02", tz="Asia/Tokyo"),
],
name="xxx",
)
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
# object series & object series
ser2 = Series(
[
Timestamp("2015-01-03", tz="US/Eastern"),
Timestamp("2015-01-05", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser2.dtype == object
exp = Series(
[pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
)
tm.assert_series_equal(ser2 - ser, exp)
tm.assert_series_equal(ser - ser2, -exp)
ser = Series(
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
name="xxx",
dtype=object,
)
assert ser.dtype == object
exp = Series(
[pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
name="xxx",
dtype=object,
)
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
# TODO: cleanup & parametrize over box
def test_iadd_preserves_name(self):
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
ser = Series([1, 2, 3])
ser.index.name = "foo"
ser.index += 1
assert ser.index.name == "foo"
ser.index -= 1
assert ser.index.name == "foo"
def test_add_string(self):
# from bug report
index = pd.Index(["a", "b", "c"])
index2 = index + "foo"
assert "a" not in index2
assert "afoo" in index2
def test_iadd_string(self):
index = pd.Index(["a", "b", "c"])
# doesn't fail test unless there is a check before `+=`
assert "a" in index
index += "_x"
assert "a_x" in index
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="add doesn't work")
def test_add(self):
index = pd.Index([str(i) for i in range(10)])
expected = pd.Index(index.values * 2)
tm.assert_index_equal(index + index, expected)
tm.assert_index_equal(index + index.tolist(), expected)
tm.assert_index_equal(index.tolist() + index, expected)
# test add and radd
index = pd.Index(list("abc"))
expected = pd.Index(["a1", "b1", "c1"])
tm.assert_index_equal(index + "1", expected)
expected = pd.Index(["1a", "1b", "1c"])
tm.assert_index_equal("1" + index, expected)
def test_sub_fail(self, using_infer_string):
index = pd.Index([str(i) for i in range(10)])
if using_infer_string:
import pyarrow as pa
err = pa.lib.ArrowNotImplementedError
msg = "has no kernel"
else:
err = TypeError
msg = "unsupported operand type|Cannot broadcast"
with pytest.raises(err, match=msg):
index - "a"
with pytest.raises(err, match=msg):
index - index
with pytest.raises(err, match=msg):
index - index.tolist()
with pytest.raises(err, match=msg):
index.tolist() - index
def test_sub_object(self):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(0), Decimal(1)])
result = index - Decimal(1)
tm.assert_index_equal(result, expected)
result = index - pd.Index([Decimal(1), Decimal(1)])
tm.assert_index_equal(result, expected)
msg = "unsupported operand type"
with pytest.raises(TypeError, match=msg):
index - "foo"
with pytest.raises(TypeError, match=msg):
index - np.array([2, "foo"], dtype=object)
def test_rsub_object(self, fixed_now_ts):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(1), Decimal(0)])
result = Decimal(2) - index
tm.assert_index_equal(result, expected)
result = np.array([Decimal(2), Decimal(2)]) - index
tm.assert_index_equal(result, expected)
msg = "unsupported operand type"
with pytest.raises(TypeError, match=msg):
"foo" - index
with pytest.raises(TypeError, match=msg):
np.array([True, fixed_now_ts]) - index
class MyIndex(pd.Index):
# Simple index subclass that tracks ops calls.
_calls: int
@classmethod
def _simple_new(cls, values, name=None, dtype=None):
result = object.__new__(cls)
result._data = values
result._name = name
result._calls = 0
result._reset_identity()
return result
def __add__(self, other):
self._calls += 1
return self._simple_new(self._data)
def __radd__(self, other):
return self.__add__(other)
@pytest.mark.parametrize(
"other",
[
[datetime.timedelta(1), datetime.timedelta(2)],
[datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
[pd.Period("2000"), pd.Period("2001")],
["a", "b"],
],
ids=["timedelta", "datetime", "period", "object"],
)
def test_index_ops_defer_to_unknown_subclasses(other):
# https://github.com/pandas-dev/pandas/issues/31109
values = np.array(
[datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
)
a = MyIndex._simple_new(values)
other = pd.Index(other)
result = other + a
assert isinstance(result, MyIndex)
assert a._calls == 1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,139 @@
import operator
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.fixture
def data():
"""Fixture returning boolean array with valid and missing values."""
return pd.array(
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
dtype="boolean",
)
@pytest.fixture
def left_array():
"""Fixture returning boolean array with valid and missing values."""
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
@pytest.fixture
def right_array():
"""Fixture returning boolean array with valid and missing values."""
return pd.array([True, False, None] * 3, dtype="boolean")
# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------
@pytest.mark.parametrize(
"opname, exp",
[
("add", [True, True, None, True, False, None, None, None, None]),
("mul", [True, False, None, False, False, None, None, None, None]),
],
ids=["add", "mul"],
)
def test_add_mul(left_array, right_array, opname, exp):
op = getattr(operator, opname)
result = op(left_array, right_array)
expected = pd.array(exp, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_sub(left_array, right_array):
msg = (
r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), "
r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\."
)
with pytest.raises(TypeError, match=msg):
left_array - right_array
def test_div(left_array, right_array):
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
# check that we are matching the non-masked Series behavior
pd.Series(left_array._data) / pd.Series(right_array._data)
with pytest.raises(NotImplementedError, match=msg):
left_array / right_array
@pytest.mark.parametrize(
"opname",
[
"floordiv",
"mod",
"pow",
],
)
def test_op_int8(left_array, right_array, opname):
op = getattr(operator, opname)
if opname != "mod":
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
result = op(left_array, right_array)
return
result = op(left_array, right_array)
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
tm.assert_extension_array_equal(result, expected)
# Test generic characteristics / errors
# -----------------------------------------------------------------------------
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
# invalid ops
if using_infer_string:
import pyarrow as pa
err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
else:
err = TypeError
op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)
# invalid scalars
msg = (
"did not contain a loop with signature matching types|"
"BooleanArray cannot perform the operation|"
"not supported for the input types, and the inputs could not be safely coerced "
"to any supported types according to the casting rule ''safe''"
)
with pytest.raises(TypeError, match=msg):
ops("foo")
msg = "|".join(
[
r"unsupported operand type\(s\) for",
"Concatenation operation is not implemented for NumPy arrays",
"has no kernel",
]
)
with pytest.raises(err, match=msg):
ops(pd.Timestamp("20180101"))
# invalid array-likes
if op not in ("__mul__", "__rmul__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
msg = "|".join(
[
r"unsupported operand type\(s\) for",
"can only concatenate str",
"not all arguments converted during string formatting",
"has no kernel",
"not implemented",
]
)
with pytest.raises(err, match=msg):
ops(pd.Series("foo", index=s.index))

View File

@ -0,0 +1,53 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_astype():
# with missing values
arr = pd.array([True, False, None], dtype="boolean")
with pytest.raises(ValueError, match="cannot convert NA to integer"):
arr.astype("int64")
with pytest.raises(ValueError, match="cannot convert float NaN to"):
arr.astype("bool")
result = arr.astype("float64")
expected = np.array([1, 0, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
result = arr.astype("str")
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values
arr = pd.array([True, False, True], dtype="boolean")
result = arr.astype("int64")
expected = np.array([1, 0, 1], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
result = arr.astype("bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
def test_astype_to_boolean_array():
# astype to BooleanArray
arr = pd.array([True, False, None], dtype="boolean")
result = arr.astype("boolean")
tm.assert_extension_array_equal(result, arr)
result = arr.astype(pd.BooleanDtype())
tm.assert_extension_array_equal(result, arr)
def test_astype_to_integer_array():
# astype to IntegerArray
arr = pd.array([True, False, None], dtype="boolean")
result = arr.astype("Int64")
expected = pd.array([1, 0, None], dtype="Int64")
tm.assert_extension_array_equal(result, expected)

View File

@ -0,0 +1,60 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.tests.arrays.masked_shared import ComparisonOps
@pytest.fixture
def data():
"""Fixture returning boolean array with valid and missing data"""
return pd.array(
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
dtype="boolean",
)
@pytest.fixture
def dtype():
"""Fixture returning BooleanDtype"""
return pd.BooleanDtype()
class TestComparisonOps(ComparisonOps):
def test_compare_scalar(self, data, comparison_op):
self._compare_other(data, comparison_op, True)
def test_compare_array(self, data, comparison_op):
other = pd.array([True] * len(data), dtype="boolean")
self._compare_other(data, comparison_op, other)
other = np.array([True] * len(data))
self._compare_other(data, comparison_op, other)
other = pd.Series([True] * len(data))
self._compare_other(data, comparison_op, other)
@pytest.mark.parametrize("other", [True, False, pd.NA])
def test_scalar(self, other, comparison_op, dtype):
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
def test_array(self, comparison_op):
op = comparison_op
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = op(a, b)
values = op(a._data, b._data)
mask = a._mask | b._mask
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
result[0] = None
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)

View File

@ -0,0 +1,325 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.core.arrays.boolean import coerce_to_array
def test_boolean_array_constructor():
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(values, mask)
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
with pytest.raises(TypeError, match="values should be boolean numpy array"):
BooleanArray(values.tolist(), mask)
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
BooleanArray(values, mask.tolist())
with pytest.raises(TypeError, match="values should be boolean numpy array"):
BooleanArray(values.astype(int), mask)
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
BooleanArray(values, None)
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
BooleanArray(values.reshape(1, -1), mask)
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
BooleanArray(values, mask.reshape(1, -1))
def test_boolean_array_constructor_copy():
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(values, mask)
assert result._data is values
assert result._mask is mask
result = BooleanArray(values, mask, copy=True)
assert result._data is not values
assert result._mask is not mask
def test_to_boolean_array():
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, False])
)
result = pd.array([True, False, True], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, True])
)
result = pd.array([True, False, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_all_none():
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
result = pd.array([None, None, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"a, b",
[
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
([True, np.nan], [True, None]),
([True, pd.NA], [True, None]),
([np.nan, np.nan], [None, None]),
(np.array([np.nan, np.nan], dtype=float), [None, None]),
],
)
def test_to_boolean_array_missing_indicators(a, b):
result = pd.array(a, dtype="boolean")
expected = pd.array(b, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"values",
[
["foo", "bar"],
["1", "2"],
# "foo",
[1, 2],
[1.0, 2.0],
pd.date_range("20130101", periods=2),
np.array(["foo"]),
np.array([1, 2]),
np.array([1.0, 2.0]),
[np.nan, {"a": 1}],
],
)
def test_to_boolean_array_error(values):
# error in converting existing arrays to BooleanArray
msg = "Need to pass bool-like value"
with pytest.raises(TypeError, match=msg):
pd.array(values, dtype="boolean")
def test_to_boolean_array_from_integer_array():
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_from_float_array():
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_integer_like():
# integers of 0's and 1's
result = pd.array([1, 0, 1, 0], dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array([1, 0, 1, None], dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_coerce_to_array():
# TODO this is currently not public API
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(*coerce_to_array(values, mask=mask))
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
assert result._data is values
assert result._mask is mask
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
assert result._data is not values
assert result._mask is not mask
# mixed missing from values and mask
values = [True, False, None, False]
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(*coerce_to_array(values, mask=mask))
expected = BooleanArray(
np.array([True, False, True, True]), np.array([False, False, True, True])
)
tm.assert_extension_array_equal(result, expected)
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
tm.assert_extension_array_equal(result, expected)
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
tm.assert_extension_array_equal(result, expected)
# raise errors for wrong dimension
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
# passing 2D values is OK as long as no mask
coerce_to_array(values.reshape(1, -1))
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
coerce_to_array(values.reshape(1, -1), mask=mask)
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
coerce_to_array(values, mask=mask.reshape(1, -1))
def test_coerce_to_array_from_boolean_array():
# passing BooleanArray to coerce_to_array
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
arr = BooleanArray(values, mask)
result = BooleanArray(*coerce_to_array(arr))
tm.assert_extension_array_equal(result, arr)
# no copy
assert result._data is arr._data
assert result._mask is arr._mask
result = BooleanArray(*coerce_to_array(arr), copy=True)
tm.assert_extension_array_equal(result, arr)
assert result._data is not arr._data
assert result._mask is not arr._mask
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
coerce_to_array(arr, mask=mask)
def test_coerce_to_numpy_array():
# with missing values -> object dtype
arr = pd.array([True, False, None], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)
# also with no missing values -> object dtype
arr = pd.array([True, False, True], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
# force bool dtype
result = np.array(arr, dtype="bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
# with missing values will raise error
arr = pd.array([True, False, None], dtype="boolean")
msg = (
"cannot convert to 'bool'-dtype NumPy array with missing values. "
"Specify an appropriate 'na_value' for this dtype."
)
with pytest.raises(ValueError, match=msg):
np.array(arr, dtype="bool")
def test_to_boolean_array_from_strings():
result = BooleanArray._from_sequence_of_strings(
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object),
dtype="boolean",
)
expected = BooleanArray(
np.array([True, False, True, True, False, False, False]),
np.array([False, False, False, False, False, False, True]),
)
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_from_strings_invalid_string():
with pytest.raises(ValueError, match="cannot be cast"):
BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean")
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy(box):
con = pd.Series if box else pd.array
# default (with or without missing values) -> object dtype
arr = con([True, False, True], dtype="boolean")
result = arr.to_numpy()
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy()
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype="str")
expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values -> can convert to bool, otherwise raises
arr = con([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype="bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
result = arr.to_numpy(dtype="bool")
# specify dtype and na_value
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype=object, na_value=None)
expected = np.array([True, False, None], dtype="object")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype=bool, na_value=False)
expected = np.array([True, False, False], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="int64", na_value=-99)
expected = np.array([1, 0, -99], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="float64", na_value=np.nan)
expected = np.array([1, 0, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
# converting to int or float without specifying na_value raises
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
arr.to_numpy(dtype="int64")
def test_to_numpy_copy():
# to_numpy can be zero-copy if no missing values
arr = pd.array([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype=bool)
result[0] = False
tm.assert_extension_array_equal(
arr, pd.array([False, False, True], dtype="boolean")
)
arr = pd.array([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype=bool, copy=True)
result[0] = False
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))

View File

@ -0,0 +1,126 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize(
"ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor]
)
def test_ufuncs_binary(ufunc):
# two BooleanArrays
a = pd.array([True, False, None], dtype="boolean")
result = ufunc(a, a)
expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
s = pd.Series(a)
result = ufunc(s, a)
expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_series_equal(result, expected)
# Boolean with numpy array
arr = np.array([True, True, False])
result = ufunc(a, arr)
expected = pd.array(ufunc(a._data, arr), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
result = ufunc(arr, a)
expected = pd.array(ufunc(arr, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
# BooleanArray with scalar
result = ufunc(a, True)
expected = pd.array(ufunc(a._data, True), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
result = ufunc(True, a)
expected = pd.array(ufunc(True, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
# not handled types
msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__"
with pytest.raises(TypeError, match=msg):
ufunc(a, "test")
@pytest.mark.parametrize("ufunc", [np.logical_not])
def test_ufuncs_unary(ufunc):
a = pd.array([True, False, None], dtype="boolean")
result = ufunc(a)
expected = pd.array(ufunc(a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
ser = pd.Series(a)
result = ufunc(ser)
expected = pd.Series(ufunc(a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_series_equal(result, expected)
def test_ufunc_numeric():
# np.sqrt on np.bool_ returns float16, which we upcast to Float32
# bc we do not have Float16
arr = pd.array([True, False, None], dtype="boolean")
res = np.sqrt(arr)
expected = pd.array([1, 0, None], dtype="Float32")
tm.assert_extension_array_equal(res, expected)
@pytest.mark.parametrize("values", [[True, False], [True, None]])
def test_ufunc_reduce_raises(values):
arr = pd.array(values, dtype="boolean")
res = np.add.reduce(arr)
if arr[-1] is pd.NA:
expected = pd.NA
else:
expected = arr._data.sum()
tm.assert_almost_equal(res, expected)
def test_value_counts_na():
arr = pd.array([True, False, pd.NA], dtype="boolean")
result = arr.value_counts(dropna=False)
expected = pd.Series([1, 1, 1], index=arr, dtype="Int64", name="count")
assert expected.index.dtype == arr.dtype
tm.assert_series_equal(result, expected)
result = arr.value_counts(dropna=True)
expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64", name="count")
assert expected.index.dtype == arr.dtype
tm.assert_series_equal(result, expected)
def test_value_counts_with_normalize():
ser = pd.Series([True, False, pd.NA], dtype="boolean")
result = ser.value_counts(normalize=True)
expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64", name="proportion") / 2
assert expected.index.dtype == "boolean"
tm.assert_series_equal(result, expected)
def test_diff():
a = pd.array(
[True, True, False, False, True, None, True, None, False], dtype="boolean"
)
result = pd.core.algorithms.diff(a, 1)
expected = pd.array(
[None, False, True, False, True, None, None, None, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
ser = pd.Series(a)
result = ser.diff()
expected = pd.Series(expected)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,13 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize("na", [None, np.nan, pd.NA])
def test_setitem_missing_values(na):
arr = pd.array([True, False, None], dtype="boolean")
expected = pd.array([True, None, None], dtype="boolean")
arr[1] = na
tm.assert_extension_array_equal(arr, expected)

View File

@ -0,0 +1,254 @@
import operator
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.core.ops.mask_ops import (
kleene_and,
kleene_or,
kleene_xor,
)
from pandas.tests.extension.base import BaseOpsUtil
class TestLogicalOps(BaseOpsUtil):
def test_numpy_scalars_ok(self, all_logical_operators):
a = pd.array([True, False, None], dtype="boolean")
op = getattr(a, all_logical_operators)
tm.assert_extension_array_equal(op(True), op(np.bool_(True)))
tm.assert_extension_array_equal(op(False), op(np.bool_(False)))
def get_op_from_name(self, op_name):
short_opname = op_name.strip("_")
short_opname = short_opname if "xor" in short_opname else short_opname + "_"
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)
return op
def test_empty_ok(self, all_logical_operators):
a = pd.array([], dtype="boolean")
op_name = all_logical_operators
result = getattr(a, op_name)(True)
tm.assert_extension_array_equal(a, result)
result = getattr(a, op_name)(False)
tm.assert_extension_array_equal(a, result)
result = getattr(a, op_name)(pd.NA)
tm.assert_extension_array_equal(a, result)
@pytest.mark.parametrize(
"other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)]
)
def test_eq_mismatched_type(self, other):
# GH-44499
arr = pd.array([True, False])
result = arr == other
expected = pd.array([False, False])
tm.assert_extension_array_equal(result, expected)
result = arr != other
expected = pd.array([True, True])
tm.assert_extension_array_equal(result, expected)
def test_logical_length_mismatch_raises(self, all_logical_operators):
op_name = all_logical_operators
a = pd.array([True, False, None], dtype="boolean")
msg = "Lengths must match"
with pytest.raises(ValueError, match=msg):
getattr(a, op_name)([True, False])
with pytest.raises(ValueError, match=msg):
getattr(a, op_name)(np.array([True, False]))
with pytest.raises(ValueError, match=msg):
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
def test_logical_nan_raises(self, all_logical_operators):
op_name = all_logical_operators
a = pd.array([True, False, None], dtype="boolean")
msg = "Got float instead"
with pytest.raises(TypeError, match=msg):
getattr(a, op_name)(np.nan)
@pytest.mark.parametrize("other", ["a", 1])
def test_non_bool_or_na_other_raises(self, other, all_logical_operators):
a = pd.array([True, False], dtype="boolean")
with pytest.raises(TypeError, match=str(type(other).__name__)):
getattr(a, all_logical_operators)(other)
def test_kleene_or(self):
# A clear test of behavior.
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = a | b
expected = pd.array(
[True, True, True, True, False, None, True, None, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
result = b | a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)
@pytest.mark.parametrize(
"other, expected",
[
(pd.NA, [True, None, None]),
(True, [True, True, True]),
(np.bool_(True), [True, True, True]),
(False, [True, False, None]),
(np.bool_(False), [True, False, None]),
],
)
def test_kleene_or_scalar(self, other, expected):
# TODO: test True & False
a = pd.array([True, False, None], dtype="boolean")
result = a | other
expected = pd.array(expected, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = other | a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)
def test_kleene_and(self):
# A clear test of behavior.
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = a & b
expected = pd.array(
[True, False, None, False, False, False, None, False, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
result = b & a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)
@pytest.mark.parametrize(
"other, expected",
[
(pd.NA, [None, False, None]),
(True, [True, False, None]),
(False, [False, False, False]),
(np.bool_(True), [True, False, None]),
(np.bool_(False), [False, False, False]),
],
)
def test_kleene_and_scalar(self, other, expected):
a = pd.array([True, False, None], dtype="boolean")
result = a & other
expected = pd.array(expected, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = other & a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)
def test_kleene_xor(self):
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = a ^ b
expected = pd.array(
[False, True, None, True, False, None, None, None, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
result = b ^ a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)
@pytest.mark.parametrize(
"other, expected",
[
(pd.NA, [None, None, None]),
(True, [False, True, None]),
(np.bool_(True), [False, True, None]),
(np.bool_(False), [True, False, None]),
],
)
def test_kleene_xor_scalar(self, other, expected):
a = pd.array([True, False, None], dtype="boolean")
result = a ^ other
expected = pd.array(expected, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = other ^ a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)
@pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3])
def test_no_masked_assumptions(self, other, all_logical_operators):
# The logical operations should not assume that masked values are False!
a = pd.arrays.BooleanArray(
np.array([True, True, True, False, False, False, True, False, True]),
np.array([False] * 6 + [True, True, True]),
)
b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
if isinstance(other, list):
other = pd.array(other, dtype="boolean")
result = getattr(a, all_logical_operators)(other)
expected = getattr(b, all_logical_operators)(other)
tm.assert_extension_array_equal(result, expected)
if isinstance(other, BooleanArray):
other._data[other._mask] = True
a._data[a._mask] = False
result = getattr(a, all_logical_operators)(other)
expected = getattr(b, all_logical_operators)(other)
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
def test_error_both_scalar(operation):
msg = r"Either `left` or `right` need to be a np\.ndarray."
with pytest.raises(TypeError, match=msg):
# masks need to be non-None, otherwise it ends up in an infinite recursion
operation(True, True, np.zeros(1), np.zeros(1))

View File

@ -0,0 +1,27 @@
import pandas as pd
import pandas._testing as tm
class TestUnaryOps:
def test_invert(self):
a = pd.array([True, False, None], dtype="boolean")
expected = pd.array([False, True, None], dtype="boolean")
tm.assert_extension_array_equal(~a, expected)
expected = pd.Series(expected, index=["a", "b", "c"], name="name")
result = ~pd.Series(a, index=["a", "b", "c"], name="name")
tm.assert_series_equal(result, expected)
df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"])
result = ~df
expected = pd.DataFrame(
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
)
tm.assert_frame_equal(result, expected)
def test_abs(self):
# matching numpy behavior, abs is the identity function
arr = pd.array([True, False, None], dtype="boolean")
result = abs(arr)
tm.assert_extension_array_equal(result, arr)

View File

@ -0,0 +1,62 @@
import numpy as np
import pytest
import pandas as pd
@pytest.fixture
def data():
"""Fixture returning boolean array, with valid and missing values."""
return pd.array(
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
dtype="boolean",
)
@pytest.mark.parametrize(
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
[
([True, pd.NA], True, True, True, pd.NA),
([False, pd.NA], False, False, pd.NA, False),
([pd.NA], False, True, pd.NA, pd.NA),
([], False, True, False, True),
# GH-33253: all True / all False values buggy with skipna=False
([True, True], True, True, True, True),
([False, False], False, False, False, False),
],
)
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
# the methods return numpy scalars
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
for con in [pd.array, pd.Series]:
a = con(values, dtype="boolean")
assert a.any() is exp_any
assert a.all() is exp_all
assert a.any(skipna=False) is exp_any_noskip
assert a.all(skipna=False) is exp_all_noskip
assert np.any(a.any()) is exp_any
assert np.all(a.all()) is exp_all
@pytest.mark.parametrize("dropna", [True, False])
def test_reductions_return_types(dropna, data, all_numeric_reductions):
op = all_numeric_reductions
s = pd.Series(data)
if dropna:
s = s.dropna()
if op in ("sum", "prod"):
assert isinstance(getattr(s, op)(), np.int_)
elif op == "count":
# Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
assert isinstance(getattr(s, op)(), np.integer)
elif op in ("min", "max"):
assert isinstance(getattr(s, op)(), np.bool_)
else:
# "mean", "std", "var", "median", "kurt", "skew"
assert isinstance(getattr(s, op)(), np.float64)

View File

@ -0,0 +1,13 @@
import pandas as pd
def test_repr():
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
expected = " A\n0 True\n1 False\n2 <NA>"
assert repr(df) == expected
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
assert repr(df.A) == expected
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
assert repr(df.A.array) == expected

Some files were not shown because too many files have changed in this diff Show More