venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,7 @@
|
||||
from pandas.core.groupby.base import transformation_kernels
|
||||
|
||||
# There is no Series.cumcount or DataFrame.cumcount
|
||||
series_transform_kernels = [
|
||||
x for x in sorted(transformation_kernels) if x != "cumcount"
|
||||
]
|
||||
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"]
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,113 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_agg_relabel():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
|
||||
# simplest case with one column, one func
|
||||
result = df.agg(foo=("B", "sum"))
|
||||
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test on same column with different methods
|
||||
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
|
||||
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_relabel_multi_columns_multi_methods():
|
||||
# GH 26513, test on multiple columns with multiple methods
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
result = df.agg(
|
||||
foo=("A", "sum"),
|
||||
bar=("B", "mean"),
|
||||
cat=("A", "min"),
|
||||
dat=("B", "max"),
|
||||
f=("A", "max"),
|
||||
g=("C", "min"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
|
||||
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min")
|
||||
def test_agg_relabel_partial_functions():
|
||||
# GH 26513, test on partial, functools or more complex cases
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
msg = "using Series.[mean|min]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "using Series.[mean|min|max|sum]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.agg(
|
||||
foo=("A", min),
|
||||
bar=("A", np.min),
|
||||
cat=("B", max),
|
||||
dat=("C", "min"),
|
||||
f=("B", np.sum),
|
||||
kk=("B", lambda x: min(x)),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
|
||||
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
|
||||
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_namedtuple():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("B", "sum"),
|
||||
bar=pd.NamedAgg("B", "min"),
|
||||
cat=pd.NamedAgg(column="B", aggfunc="count"),
|
||||
fft=pd.NamedAgg("B", aggfunc="max"),
|
||||
)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("A", "min"),
|
||||
bar=pd.NamedAgg(column="B", aggfunc="max"),
|
||||
cat=pd.NamedAgg(column="A", aggfunc="max"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
|
||||
index=pd.Index(["foo", "bar", "cat"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_reconstruct_func():
|
||||
# GH 28472, test to ensure reconstruct_func isn't moved;
|
||||
# This method is used by other libraries (e.g. dask)
|
||||
result = pd.core.apply.reconstruct_func("min")
|
||||
expected = (False, "min", None, None)
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,264 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import frame_transform_kernels
|
||||
from pandas.tests.frame.common import zip_frames
|
||||
|
||||
|
||||
def unpack_obj(obj, klass, axis):
|
||||
"""
|
||||
Helper to ensure we have the right type of object for a test parametrized
|
||||
over frame_or_series.
|
||||
"""
|
||||
if klass is not DataFrame:
|
||||
obj = obj["A"]
|
||||
if axis != 0:
|
||||
pytest.skip(f"Test is only for DataFrame with axis={axis}")
|
||||
return obj
|
||||
|
||||
|
||||
def test_transform_ufunc(axis, float_frame, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(obj)
|
||||
|
||||
# ufunc
|
||||
result = obj.transform(np.sqrt, axis=axis)
|
||||
expected = f_sqrt
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(axis, float_frame, ops, names):
|
||||
# GH 35964
|
||||
other_axis = 1 if axis in {0, "index"} else 0
|
||||
with np.errstate(all="ignore"):
|
||||
expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
|
||||
if axis in {0, "index"}:
|
||||
expected.columns = MultiIndex.from_product([float_frame.columns, names])
|
||||
else:
|
||||
expected.index = MultiIndex.from_product([float_frame.index, names])
|
||||
result = float_frame.transform(ops, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ops", [[], np.array([])])
|
||||
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
def test_transform_listlike_func_with_args():
|
||||
# GH 50624
|
||||
df = DataFrame({"x": [1, 2, 3]})
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform([foo1, foo2], 0, 3, b=3, c=4)
|
||||
|
||||
result = df.transform([foo1, foo2], 0, 3, c=4)
|
||||
expected = DataFrame(
|
||||
[[8, 8], [9, 9], [10, 10]],
|
||||
columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(axis, float_frame, box):
|
||||
# GH 35964
|
||||
if axis in (0, "index"):
|
||||
e = float_frame.columns[0]
|
||||
expected = float_frame[[e]].transform(np.abs)
|
||||
else:
|
||||
e = float_frame.index[0]
|
||||
expected = float_frame.iloc[[0]].transform(np.abs)
|
||||
result = float_frame.transform(box({e: np.abs}), axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{},
|
||||
{"A": []},
|
||||
{"A": [], "B": "cumsum"},
|
||||
{"A": "cumsum", "B": []},
|
||||
{"A": [], "B": ["cumsum"]},
|
||||
{"A": ["cumsum"], "B": []},
|
||||
],
|
||||
)
|
||||
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
def func(x):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
return x + 1
|
||||
|
||||
result = obj.transform(func, axis=axis)
|
||||
expected = obj + 1
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
|
||||
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
|
||||
def test_transform_bad_dtype(op, frame_or_series, request):
|
||||
# GH 35964
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform(op)
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform([op])
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform({"A": op})
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform({"A": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_kernels_raise)
|
||||
def test_transform_failure_typeerror(request, op):
|
||||
# GH 35964
|
||||
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform([op])
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": op, "B": op})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": [op], "B": [op]})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
def test_transform_failure_valueerror():
|
||||
# GH 40211
|
||||
def op(x):
|
||||
if np.sum(np.sum(x)) < 10:
|
||||
raise ValueError
|
||||
return x
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
|
||||
msg = "Transform function failed"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform([op])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": op, "B": op})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": [op], "B": [op]})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_passes_args(use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
expected_args = [1, 2]
|
||||
expected_kwargs = {"c": 3}
|
||||
|
||||
def f(x, a, b, c):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
assert [a, b] == expected_args
|
||||
assert c == expected_kwargs["c"]
|
||||
return x
|
||||
|
||||
frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
|
||||
|
||||
|
||||
def test_transform_empty_dataframe():
|
||||
# https://github.com/pandas-dev/pandas/issues/39636
|
||||
df = DataFrame([], columns=["col1", "col2"])
|
||||
result = df.transform(lambda x: x + 10)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df["col1"].transform(lambda x: x + 10)
|
||||
tm.assert_series_equal(result, df["col1"])
|
@ -0,0 +1,361 @@
|
||||
# Tests specifically aimed at detecting bad arguments.
|
||||
# This file is organized by reason for exception.
|
||||
# 1. always invalid argument values
|
||||
# 2. missing column(s)
|
||||
# 3. incompatible ops/dtype/args/kwargs
|
||||
# 4. invalid result shape/type
|
||||
# If your test does not fit into one of these categories, add to this list.
|
||||
|
||||
from itertools import chain
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SpecificationError
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result_type", ["foo", 1])
|
||||
def test_result_type_error(result_type):
|
||||
# allowed result_type
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
msg = (
|
||||
"invalid value for result_type, must be one of "
|
||||
"{None, 'reduce', 'broadcast', 'expand'}"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
|
||||
|
||||
|
||||
def test_apply_invalid_axis_value():
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: x, 2)
|
||||
|
||||
|
||||
def test_agg_raises():
|
||||
# GH 26513
|
||||
df = DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
msg = "Must provide"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg()
|
||||
|
||||
|
||||
def test_map_with_invalid_na_action_raises():
|
||||
# https://github.com/pandas-dev/pandas/issues/32815
|
||||
s = Series([1, 2, 3])
|
||||
msg = "na_action must either be 'ignore' or None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map(lambda x: x, na_action="____")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_na_action", ["____", True])
|
||||
def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action):
|
||||
# https://github.com/pandas-dev/pandas/issues/46588
|
||||
s = Series([1, 2, 3])
|
||||
msg = f"na_action must either be 'ignore' or None, {input_na_action} was passed"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map({1: 2}, na_action=input_na_action)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
|
||||
def test_nested_renamer(frame_or_series, method, func):
|
||||
# GH 35964
|
||||
obj = frame_or_series({"A": [1]})
|
||||
match = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"renamer",
|
||||
[{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}],
|
||||
)
|
||||
def test_series_nested_renamer(renamer):
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
s.agg(renamer)
|
||||
|
||||
|
||||
def test_apply_dict_depr():
|
||||
tsdf = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 3)),
|
||||
columns=["A", "B", "C"],
|
||||
index=date_range("1/1/2000", periods=10),
|
||||
)
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
tsdf.A.agg({"foo": ["sum", "mean"]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["agg", "transform"])
|
||||
def test_dict_nested_renaming_depr(method):
|
||||
df = DataFrame({"A": range(5), "B": 5})
|
||||
|
||||
# nested renaming
|
||||
msg = r"nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
|
||||
def test_missing_column(method, func):
|
||||
# GH 40004
|
||||
obj = DataFrame({"A": [1]})
|
||||
match = re.escape("Column(s) ['B'] do not exist")
|
||||
with pytest.raises(KeyError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
def test_transform_mixed_column_name_dtypes():
|
||||
# GH39025
|
||||
df = DataFrame({"a": ["1"]})
|
||||
msg = r"Column\(s\) \[1, 'b'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.transform({"a": int, 1: str, "b": int})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
|
||||
)
|
||||
def test_apply_str_axis_1_raises(how, args):
|
||||
# GH 39211 - some ops don't support axis=1
|
||||
df = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
msg = f"Operation {how} does not support axis=1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(how, axis=1, args=args)
|
||||
|
||||
|
||||
def test_transform_axis_1_raises():
|
||||
# GH 35964
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1]).transform("sum", axis=1)
|
||||
|
||||
|
||||
def test_apply_modify_traceback():
|
||||
data = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
],
|
||||
"B": [
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"two",
|
||||
"two",
|
||||
"one",
|
||||
],
|
||||
"C": [
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"shiny",
|
||||
],
|
||||
"D": np.random.default_rng(2).standard_normal(11),
|
||||
"E": np.random.default_rng(2).standard_normal(11),
|
||||
"F": np.random.default_rng(2).standard_normal(11),
|
||||
}
|
||||
)
|
||||
|
||||
data.loc[4, "C"] = np.nan
|
||||
|
||||
def transform(row):
|
||||
if row["C"].startswith("shin") and row["A"] == "foo":
|
||||
row["D"] = 7
|
||||
return row
|
||||
|
||||
msg = "'float' object has no attribute 'startswith'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
data.apply(transform, axis=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
|
||||
# GH 21224
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
expected = (expected, pa.lib.ArrowNotImplementedError)
|
||||
|
||||
msg = "can't multiply sequence by non-int of type 'str'|has no kernel"
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with pytest.raises(expected, match=msg):
|
||||
with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"):
|
||||
df.agg(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("mean", TypeError), # mean raises TypeError
|
||||
("prod", TypeError),
|
||||
("std", TypeError),
|
||||
("var", TypeError),
|
||||
("median", TypeError),
|
||||
("cumprod", TypeError),
|
||||
],
|
||||
)
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_series(series, func, expected, using_infer_string):
|
||||
# GH21224
|
||||
msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
|
||||
if func == "median" or func is np.nanmedian or func is np.median:
|
||||
msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
expected = (expected, pa.lib.ArrowNotImplementedError)
|
||||
|
||||
msg = msg + "|does not support|has no kernel"
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
|
||||
with pytest.raises(expected, match=msg):
|
||||
# e.g. Series('a b'.split()).cumprod() will raise
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
series.agg(func)
|
||||
|
||||
|
||||
def test_agg_none_to_type():
|
||||
# GH 40543
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = re.escape("int() argument must be a string")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg({"a": lambda x: int(x.iloc[0])})
|
||||
|
||||
|
||||
def test_transform_none_to_type():
|
||||
# GH#34377
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = "argument must be a"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform({"a": lambda x: int(x.iloc[0])})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: np.array([1, 2]).reshape(-1, 2),
|
||||
lambda x: [1, 2],
|
||||
lambda x: Series([1, 2]),
|
||||
],
|
||||
)
|
||||
def test_apply_broadcast_error(func):
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
# > 1 ndim
|
||||
msg = "too many dims to broadcast|cannot broadcast result"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(func, axis=1, result_type="broadcast")
|
||||
|
||||
|
||||
def test_transform_and_agg_err_agg(axis, float_frame):
|
||||
# cannot both transform and agg
|
||||
msg = "cannot combine transform and aggregation operations"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
float_frame.agg(["max", "sqrt"], axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning") # GH53325
|
||||
@pytest.mark.parametrize(
|
||||
"func, msg",
|
||||
[
|
||||
(["sqrt", "max"], "cannot combine transform and aggregation"),
|
||||
(
|
||||
{"foo": np.sqrt, "bar": "sum"},
|
||||
"cannot perform both aggregation and transformation",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_transform_and_agg_err_series(string_series, func, msg):
|
||||
# we are trying to transform with an aggregator
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.agg(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])
|
||||
def test_transform_wont_agg_frame(axis, float_frame, func):
|
||||
# GH 35964
|
||||
# cannot both transform and agg
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
float_frame.transform(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]])
|
||||
def test_transform_wont_agg_series(string_series, func):
|
||||
# GH 35964
|
||||
# we are trying to transform with an aggregator
|
||||
msg = "Function did not transform"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
string_series.transform(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
|
||||
)
|
||||
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
|
||||
# GH 35964
|
||||
op = op_wrapper(all_reductions)
|
||||
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.transform(op)
|
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = [td.skip_if_no("numba"), pytest.mark.single_cpu]
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 1])
|
||||
def apply_axis(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_numba_vs_python_noop(float_frame, apply_axis):
|
||||
func = lambda x: x
|
||||
result = float_frame.apply(func, engine="numba", axis=apply_axis)
|
||||
expected = float_frame.apply(func, engine="python", axis=apply_axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_numba_vs_python_string_index():
|
||||
# GH#56189
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame(
|
||||
1,
|
||||
index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
|
||||
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
|
||||
)
|
||||
func = lambda x: x
|
||||
result = df.apply(func, engine="numba", axis=0)
|
||||
expected = df.apply(func, engine="python", axis=0)
|
||||
tm.assert_frame_equal(
|
||||
result, expected, check_column_type=False, check_index_type=False
|
||||
)
|
||||
|
||||
|
||||
def test_numba_vs_python_indexing():
|
||||
frame = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},
|
||||
index=Index(["A", "B", "C"]),
|
||||
)
|
||||
row_func = lambda x: x["c"]
|
||||
result = frame.apply(row_func, engine="numba", axis=1)
|
||||
expected = frame.apply(row_func, engine="python", axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
col_func = lambda x: x["A"]
|
||||
result = frame.apply(col_func, engine="numba", axis=0)
|
||||
expected = frame.apply(col_func, engine="python", axis=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"reduction",
|
||||
[lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()],
|
||||
)
|
||||
def test_numba_vs_python_reductions(reduction, apply_axis):
|
||||
df = DataFrame(np.ones((4, 4), dtype=np.float64))
|
||||
result = df.apply(reduction, engine="numba", axis=apply_axis)
|
||||
expected = df.apply(reduction, engine="python", axis=apply_axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]])
|
||||
def test_numba_numeric_colnames(colnames):
|
||||
# Check that numeric column names lower properly and can be indxed on
|
||||
df = DataFrame(
|
||||
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames
|
||||
)
|
||||
first_col = colnames[0]
|
||||
f = lambda x: x[first_col] # Get the first column
|
||||
result = df.apply(f, engine="numba", axis=1)
|
||||
expected = df.apply(f, engine="python", axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numba_parallel_unsupported(float_frame):
|
||||
f = lambda x: x
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Parallel apply is not supported when raw=False and engine='numba'",
|
||||
):
|
||||
float_frame.apply(f, engine="numba", engine_kwargs={"parallel": True})
|
||||
|
||||
|
||||
def test_numba_nonunique_unsupported(apply_axis):
|
||||
f = lambda x: x
|
||||
df = DataFrame({"a": [1, 2]}, index=Index(["a", "a"]))
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="The index/columns must be unique when raw=False and engine='numba'",
|
||||
):
|
||||
df.apply(f, engine="numba", axis=apply_axis)
|
||||
|
||||
|
||||
def test_numba_unsupported_dtypes(apply_axis):
|
||||
f = lambda x: x
|
||||
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
|
||||
df["c"] = df["c"].astype("double[pyarrow]")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Column b must have a numeric dtype. Found 'object|string' instead",
|
||||
):
|
||||
df.apply(f, engine="numba", axis=apply_axis)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Column c is backed by an extension array, "
|
||||
"which is not supported by the numba engine.",
|
||||
):
|
||||
df["c"].to_frame().apply(f, engine="numba", axis=apply_axis)
|
@ -0,0 +1,701 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import series_transform_kernels
|
||||
|
||||
|
||||
@pytest.fixture(params=[False, "compat"])
|
||||
def by_row(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_series_map_box_timedelta(by_row):
|
||||
# GH#11349
|
||||
ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h"))
|
||||
|
||||
def f(x):
|
||||
return x.total_seconds() if by_row else x.dt.total_seconds()
|
||||
|
||||
result = ser.apply(f, by_row=by_row)
|
||||
|
||||
expected = ser.map(lambda x: x.total_seconds())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([86401.0, 90001.0, 93601.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply(datetime_series, by_row):
|
||||
result = datetime_series.apply(np.sqrt, by_row=by_row)
|
||||
with np.errstate(all="ignore"):
|
||||
expected = np.sqrt(datetime_series)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# element-wise apply (ufunc)
|
||||
result = datetime_series.apply(np.exp, by_row=by_row)
|
||||
expected = np.exp(datetime_series)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
|
||||
rs = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3], dtype=np.float64)
|
||||
rs = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
|
||||
def test_apply_map_same_length_inference_bug():
|
||||
s = Series([1, 2])
|
||||
|
||||
def f(x):
|
||||
return (x, x + 1)
|
||||
|
||||
result = s.apply(f, by_row="compat")
|
||||
expected = s.map(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("convert_dtype", [True, False])
|
||||
def test_apply_convert_dtype_deprecated(convert_dtype):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10))
|
||||
|
||||
def func(x):
|
||||
return x if x > 0 else np.nan
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.apply(func, convert_dtype=convert_dtype, by_row="compat")
|
||||
|
||||
|
||||
def test_apply_args():
|
||||
s = Series(["foo,bar"])
|
||||
|
||||
result = s.apply(str.split, args=(",",))
|
||||
assert result[0] == ["foo", "bar"]
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
msg = (
|
||||
"in Series.agg cannot aggregate and has been deprecated. "
|
||||
"Use Series.transform to keep behavior unchanged."
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.agg(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_mapping_func_deprecated():
|
||||
# GH 53325
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = "using .+ in Series.agg cannot aggregate and"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg(foo1, 0, 3, c=4)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg([foo1, foo2], 0, 3, c=4)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
|
||||
|
||||
|
||||
def test_series_apply_map_box_timestamps(by_row):
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(date_range("1/1/2000", periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
if not by_row:
|
||||
msg = "Series' object has no attribute 'hour'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.apply(func, by_row=by_row)
|
||||
return
|
||||
|
||||
result = ser.apply(func, by_row=by_row)
|
||||
expected = ser.map(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_box_dt64():
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
ser = Series(vals, dtype="M8[ns]")
|
||||
assert ser.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
|
||||
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_dt64tz():
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
ser = Series(vals, dtype="M8[ns, US/Eastern]")
|
||||
assert ser.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
|
||||
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_td64():
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "timedelta64[ns]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
|
||||
exp = Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_period():
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "Period[M]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat")
|
||||
exp = Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_datetimetz(by_row):
|
||||
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
|
||||
s = Series(values, name="XX")
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row)
|
||||
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row)
|
||||
exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
return str(x.tz) if by_row else str(x.dt.tz)
|
||||
|
||||
result = s.apply(f, by_row=by_row)
|
||||
if by_row:
|
||||
exp = Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
else:
|
||||
assert result == "Asia/Tokyo"
|
||||
|
||||
|
||||
def test_apply_categorical(by_row, using_infer_string):
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
ser = Series(values, name="XX", index=list("abcdefg"))
|
||||
|
||||
if not by_row:
|
||||
msg = "Series' object has no attribute 'lower"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.apply(lambda x: x.lower(), by_row=by_row)
|
||||
assert ser.apply(lambda x: "A", by_row=by_row) == "A"
|
||||
return
|
||||
|
||||
result = ser.apply(lambda x: x.lower(), by_row=by_row)
|
||||
|
||||
# should be categorical dtype when the number of categories are
|
||||
# the same
|
||||
values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
|
||||
exp = Series(values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp.values)
|
||||
|
||||
result = ser.apply(lambda x: "A")
|
||||
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
|
||||
def test_apply_categorical_with_nan_values(series, by_row):
|
||||
# GH 20714 bug fixed in: GH 24275
|
||||
s = Series(series, dtype="category")
|
||||
if not by_row:
|
||||
msg = "'Series' object has no attribute 'split'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.apply(lambda x: x.split("-")[0], by_row=by_row)
|
||||
return
|
||||
|
||||
result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
|
||||
result = result.astype(object)
|
||||
expected = Series(["1", "1", np.nan], dtype="category")
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_empty_integer_series_with_datetime_index(by_row):
|
||||
# GH 21245
|
||||
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
|
||||
result = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_apply_dataframe_iloc():
|
||||
uintDF = DataFrame(np.uint64([1, 2, 3, 4, 5]), columns=["Numbers"])
|
||||
indexDF = DataFrame([2, 3, 2, 1, 2], columns=["Indices"])
|
||||
|
||||
def retrieve(targetRow, targetDF):
|
||||
val = targetDF["Numbers"].iloc[targetRow]
|
||||
return val
|
||||
|
||||
result = indexDF["Indices"].apply(retrieve, args=(uintDF,))
|
||||
expected = Series([3, 4, 3, 2, 3], name="Indices", dtype="uint64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform(string_series, by_row):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(string_series)
|
||||
f_abs = np.abs(string_series)
|
||||
|
||||
# ufunc
|
||||
result = string_series.apply(np.sqrt, by_row=by_row)
|
||||
expected = f_sqrt.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = string_series.apply([np.sqrt], by_row=by_row)
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ["sqrt"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.apply(["sqrt"], by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["sqrt", "absolute"]
|
||||
result = string_series.apply([np.sqrt, np.abs], by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
expected = expected.unstack().rename("series")
|
||||
|
||||
result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row)
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_partial_failure(op, request):
|
||||
# GH 35964
|
||||
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
|
||||
)
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
ser = Series(3 * [object])
|
||||
|
||||
if op in ("fillna", "ngroup"):
|
||||
error = ValueError
|
||||
msg = "Transform function failed"
|
||||
else:
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform([op, "shift"])
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": op, "B": "shift"})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": [op], "B": ["shift"]})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
def test_transform_partial_failure_valueerror():
|
||||
# GH 40211
|
||||
def noop(x):
|
||||
return x
|
||||
|
||||
def raising_op(_):
|
||||
raise ValueError
|
||||
|
||||
ser = Series(3 * [object])
|
||||
msg = "Transform function failed"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform([noop, raising_op])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": raising_op, "B": noop})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": [raising_op], "B": [noop]})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": [noop, raising_op], "B": [noop]})
|
||||
|
||||
|
||||
def test_demo():
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
|
||||
result = s.agg(["min", "max"])
|
||||
expected = Series([0, 5], index=["min", "max"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({"foo": "min"})
|
||||
expected = Series([0], index=["foo"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
|
||||
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
|
||||
# test that we are evaluating row-by-row first if by_row="compat"
|
||||
# else vectorized evaluation
|
||||
result = string_series.apply(func, by_row=by_row)
|
||||
|
||||
if by_row:
|
||||
expected = string_series.map(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
assert result == str(string_series)
|
||||
|
||||
|
||||
def test_agg_evaluate_lambdas(string_series):
|
||||
# GH53325
|
||||
# in the future, the result will be a Series class.
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = string_series.agg(lambda x: type(x))
|
||||
assert isinstance(result, Series) and len(result) == len(string_series)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = string_series.agg(type)
|
||||
assert isinstance(result, Series) and len(result) == len(string_series)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op_name", ["agg", "apply"])
|
||||
def test_with_nested_series(datetime_series, op_name):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
msg = "cannot aggregate"
|
||||
warning = FutureWarning if op_name == "agg" else None
|
||||
with tm.assert_produces_warning(warning, match=msg):
|
||||
# GH52123
|
||||
result = getattr(datetime_series, op_name)(
|
||||
lambda x: Series([x, x**2], index=["x", "x^2"])
|
||||
)
|
||||
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_replicate_describe(string_series):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = string_series.describe()
|
||||
result = string_series.apply(
|
||||
{
|
||||
"count": "count",
|
||||
"mean": "mean",
|
||||
"std": "std",
|
||||
"min": "min",
|
||||
"25%": lambda x: x.quantile(0.25),
|
||||
"50%": "median",
|
||||
"75%": lambda x: x.quantile(0.75),
|
||||
"max": "max",
|
||||
},
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reduce(string_series):
|
||||
# reductions with named functions
|
||||
result = string_series.agg(["sum", "mean"])
|
||||
expected = Series(
|
||||
[string_series.sum(), string_series.mean()],
|
||||
["sum", "mean"],
|
||||
name=string_series.name,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwds",
|
||||
[("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})],
|
||||
)
|
||||
def test_non_callable_aggregates(how, kwds):
|
||||
# test agg using non-callable series attributes
|
||||
# GH 39116 - expand to apply
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = getattr(s, how)("size", **kwds)
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = getattr(s, how)(["size", "count", "mean"], **kwds)
|
||||
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_no_suffix_index(by_row):
|
||||
# GH36189
|
||||
s = Series([4] * 3)
|
||||
result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row)
|
||||
expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dti,exp",
|
||||
[
|
||||
(
|
||||
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
|
||||
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
|
||||
),
|
||||
(
|
||||
Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
),
|
||||
DataFrame(np.repeat([[1, 2]], 10, axis=0), dtype="int64"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("aware", [True, False])
|
||||
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
if aware:
|
||||
index = dti.tz_localize("UTC").index
|
||||
else:
|
||||
index = dti.index
|
||||
result = Series(index).apply(lambda x: Series([1, 2]))
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"by_row, expected", [("compat", Series(np.ones(10), dtype="int64")), (False, 1)]
|
||||
)
|
||||
def test_apply_scalar_on_date_time_index_aware_series(by_row, expected):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
series = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10, tz="UTC"),
|
||||
)
|
||||
result = Series(series.index).apply(lambda x: 1, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_timedelta(by_row):
|
||||
list_of_valid_strings = ["00:00:01", "00:00:02"]
|
||||
a = pd.to_timedelta(list_of_valid_strings)
|
||||
b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
|
||||
|
||||
a = pd.to_timedelta(list_of_strings)
|
||||
ser = Series(list_of_strings)
|
||||
b = ser.apply(pd.to_timedelta, by_row=by_row)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sum], ["sum"]),
|
||||
([np.sum, np.mean], ["sum", "mean"]),
|
||||
(np.array([np.sum]), ["sum"]),
|
||||
(np.array([np.sum, np.mean]), ["sum", "mean"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwargs",
|
||||
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
|
||||
)
|
||||
def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
|
||||
expected.name = "series"
|
||||
warn = FutureWarning if how == "agg" else None
|
||||
msg = f"using Series.[{'|'.join(names)}]"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(string_series, how)(ops, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sum},
|
||||
{"A": np.sum, "B": np.mean},
|
||||
Series({"A": np.sum}),
|
||||
Series({"A": np.sum, "B": np.mean}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwargs",
|
||||
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
|
||||
)
|
||||
def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
warn = FutureWarning if how == "agg" else None
|
||||
msg = "using Series.[sum|mean]"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(string_series, how)(ops, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_transformer(string_series, ops, names, by_row):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.apply(ops, by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, expected",
|
||||
[
|
||||
([lambda x: x], DataFrame({"<lambda>": [1, 2, 3]})),
|
||||
([lambda x: x.sum()], Series([6], index=["<lambda>"])),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_lambda(ops, expected, by_row):
|
||||
# GH53400
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.apply(ops, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sqrt},
|
||||
{"A": np.sqrt, "B": np.exp},
|
||||
Series({"A": np.sqrt}),
|
||||
Series({"A": np.sqrt, "B": np.exp}),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_transformer(string_series, ops, by_row):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
result = string_series.apply(ops, by_row=by_row)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, expected",
|
||||
[
|
||||
(
|
||||
{"a": lambda x: x},
|
||||
Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])),
|
||||
),
|
||||
({"a": lambda x: x.sum()}, Series([6], index=["a"])),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_lambda(ops, by_row, expected):
|
||||
# GH53400
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.apply(ops, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_retains_column_name(by_row):
|
||||
# GH 16380
|
||||
df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
|
||||
result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
|
||||
expected = DataFrame(
|
||||
[[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
|
||||
columns=Index(range(3), name="y"),
|
||||
index=Index(range(3), name="x"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_type():
|
||||
# GH 46719
|
||||
s = Series([3, "string", float], index=["a", "b", "c"])
|
||||
result = s.apply(type)
|
||||
expected = Series([int, str, type], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_unpack_nested_data():
|
||||
# GH#55189
|
||||
ser = Series([[1, 2, 3], [4, 5, 6, 7]])
|
||||
result = ser.apply(lambda x: Series(x))
|
||||
expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,39 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_relabel_no_duplicated_method():
|
||||
# this is to test there is no duplicated method used in agg
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum")
|
||||
expected = df["A"].agg({"foo": "sum"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["B"].agg(foo="min", bar="max")
|
||||
expected = df["B"].agg({"foo": "min", "bar": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "using Series.[sum|min|max]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df["B"].agg(foo=sum, bar=min, cat="max")
|
||||
msg = "using Series.[sum|min|max]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_relabel_duplicated_method():
|
||||
# this is to test with nested renaming, duplicated method can be used
|
||||
# if they are assigned with different new names
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum", bar="sum")
|
||||
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "using Series.min"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df["B"].agg(foo=min, bar="min")
|
||||
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
result = s.transform(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(string_series, ops, names):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.transform(ops)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_listlike_func_with_args():
|
||||
# GH 50624
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.transform([foo1, foo2], 0, 3, b=3, c=4)
|
||||
|
||||
result = s.transform([foo1, foo2], 0, 3, c=4)
|
||||
expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(string_series, box):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = Series([1, 4])
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
326
venv/lib/python3.12/site-packages/pandas/tests/apply/test_str.py
Normal file
326
venv/lib/python3.12/site-packages/pandas/tests/apply/test_str.py
Normal file
@ -0,0 +1,326 @@
|
||||
from itertools import chain
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_number
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import (
|
||||
frame_transform_kernels,
|
||||
series_transform_kernels,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
|
||||
@pytest.mark.parametrize(
|
||||
"args,kwds",
|
||||
[
|
||||
pytest.param([], {}, id="no_args_or_kwds"),
|
||||
pytest.param([1], {}, id="axis_from_args"),
|
||||
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
|
||||
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
|
||||
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
|
||||
if len(args) > 1 and how == "agg":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
raises=TypeError,
|
||||
reason="agg/apply signature mismatch - agg passes 2nd "
|
||||
"argument to func",
|
||||
)
|
||||
)
|
||||
result = getattr(float_frame, how)(func, *args, **kwds)
|
||||
expected = getattr(float_frame, func)(*args, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"])
|
||||
def test_with_string_args(datetime_series, arg):
|
||||
result = datetime_series.apply(arg)
|
||||
expected = getattr(datetime_series, arg)()
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_np_reducer(op, how):
|
||||
# GH 39116
|
||||
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = getattr(float_frame, how)(op)
|
||||
# pandas ddof defaults to 1, numpy to 0
|
||||
kwargs = {"ddof": 1} if op in ("std", "var") else {}
|
||||
expected = Series(
|
||||
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["transform", "apply"])
|
||||
def test_apply_np_transformer(float_frame, op, how):
|
||||
# GH 39116
|
||||
|
||||
# float_frame will _usually_ have negative values, which will
|
||||
# trigger the warning here, but let's put one in just to be sure
|
||||
float_frame.iloc[0, 0] = -1.0
|
||||
warn = None
|
||||
if op in ["log", "sqrt"]:
|
||||
warn = RuntimeWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, check_stacklevel=False):
|
||||
# float_frame fixture is defined in conftest.py, so we don't check the
|
||||
# stacklevel as otherwise the test would fail.
|
||||
result = getattr(float_frame, how)(op)
|
||||
expected = getattr(np, op)(float_frame)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("sum", 0),
|
||||
("max", np.nan),
|
||||
("min", np.nan),
|
||||
("all", True),
|
||||
("any", False),
|
||||
("mean", np.nan),
|
||||
("prod", 1),
|
||||
("std", np.nan),
|
||||
("var", np.nan),
|
||||
("median", np.nan),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("sum", 6),
|
||||
("max", 3),
|
||||
("min", 1),
|
||||
("all", True),
|
||||
("any", True),
|
||||
("mean", 2),
|
||||
("prod", 6),
|
||||
("std", 1),
|
||||
("var", 1),
|
||||
("median", 2),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("sum", "abc"),
|
||||
("max", "c"),
|
||||
("min", "a"),
|
||||
("all", True),
|
||||
("any", True),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_series(series, func, expected):
|
||||
# GH21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
result = series.agg(func)
|
||||
if is_number(expected):
|
||||
assert np.isclose(result, expected, equal_nan=True)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("cumprod", Series([], dtype=np.float64)),
|
||||
("cumsum", Series([], dtype=np.float64)),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("cumprod", Series([np.nan, 1, 2, 6])),
|
||||
("cumsum", Series([np.nan, 1, 3, 6])),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_series(series, func, expected):
|
||||
# GH21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
result = series.agg(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(),
|
||||
[
|
||||
("sum", Series(dtype="float64")),
|
||||
("max", Series(dtype="float64")),
|
||||
("min", Series(dtype="float64")),
|
||||
("all", Series(dtype=bool)),
|
||||
("any", Series(dtype=bool)),
|
||||
("mean", Series(dtype="float64")),
|
||||
("prod", Series(dtype="float64")),
|
||||
("std", Series(dtype="float64")),
|
||||
("var", Series(dtype="float64")),
|
||||
("median", Series(dtype="float64")),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("sum", Series([1.0, 3])),
|
||||
("max", Series([1.0, 2])),
|
||||
("min", Series([1.0, 1])),
|
||||
("all", Series([True, True])),
|
||||
("any", Series([True, True])),
|
||||
("mean", Series([1, 1.5])),
|
||||
("prod", Series([1.0, 2])),
|
||||
("std", Series([np.nan, 0.707107])),
|
||||
("var", Series([np.nan, 0.5])),
|
||||
("median", Series([1, 1.5])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
|
||||
# GH#53425
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
|
||||
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
if axis in ("columns", 1):
|
||||
# operating blockwise doesn't let us preserve dtypes
|
||||
expected = expected.astype("float64")
|
||||
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
|
||||
# GH#53425
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_groupby_kernel_series(request, string_series, op):
|
||||
# GH 35964
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
args = [0.0] if op == "fillna" else []
|
||||
ones = np.ones(string_series.shape[0])
|
||||
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
msg = "SeriesGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
expected = string_series.groupby(ones).transform(op, *args)
|
||||
result = string_series.transform(op, 0, *args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_transform_kernels)
|
||||
def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
# GH 35964
|
||||
|
||||
args = [0.0] if op == "fillna" else []
|
||||
if axis in (0, "index"):
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
msg = "DataFrame.groupby with axis=1 is deprecated"
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
gb = float_frame.groupby(ones, axis=axis)
|
||||
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
op_msg = "DataFrameGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=op_msg):
|
||||
expected = gb.transform(op, *args)
|
||||
|
||||
result = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# same thing, but ensuring we have multiple blocks
|
||||
assert "E" not in float_frame.columns
|
||||
float_frame["E"] = float_frame["A"].copy()
|
||||
assert len(float_frame._mgr.arrays) > 1
|
||||
|
||||
if axis in (0, "index"):
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
gb2 = float_frame.groupby(ones, axis=axis)
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
op_msg = "DataFrameGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=op_msg):
|
||||
expected2 = gb2.transform(op, *args)
|
||||
result2 = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
|
||||
def test_transform_method_name(method):
|
||||
# GH 19760
|
||||
df = DataFrame({"A": [-1, 2]})
|
||||
result = df.transform(method)
|
||||
expected = operator.methodcaller(method)(df)
|
||||
tm.assert_frame_equal(result, expected)
|
Reference in New Issue
Block a user