venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,146 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
bdate_range,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def raw(request):
|
||||
"""raw keyword argument for rolling.apply"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"sum",
|
||||
"mean",
|
||||
"median",
|
||||
"max",
|
||||
"min",
|
||||
"var",
|
||||
"std",
|
||||
"kurt",
|
||||
"skew",
|
||||
"count",
|
||||
"sem",
|
||||
]
|
||||
)
|
||||
def arithmetic_win_operators(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def center(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, 1])
|
||||
def min_periods(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def parallel(request):
|
||||
"""parallel keyword argument for numba.jit"""
|
||||
return request.param
|
||||
|
||||
|
||||
# Can parameterize nogil & nopython over True | False, but limiting per
|
||||
# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472
|
||||
|
||||
|
||||
@pytest.fixture(params=[False])
|
||||
def nogil(request):
|
||||
"""nogil keyword argument for numba.jit"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True])
|
||||
def nopython(request):
|
||||
"""nopython keyword argument for numba.jit"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def adjust(request):
|
||||
"""adjust keyword argument for ewm"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def ignore_na(request):
|
||||
"""ignore_na keyword argument for ewm"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def numeric_only(request):
|
||||
"""numeric_only keyword argument"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("numba", marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]),
|
||||
"cython",
|
||||
]
|
||||
)
|
||||
def engine(request):
|
||||
"""engine keyword argument for rolling.apply"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param(
|
||||
("numba", True), marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]
|
||||
),
|
||||
("cython", True),
|
||||
("cython", False),
|
||||
]
|
||||
)
|
||||
def engine_and_raw(request):
|
||||
"""engine and raw keyword arguments for rolling.apply"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["1 day", timedelta(days=1), np.timedelta64(1, "D")])
|
||||
def halflife_with_times(request):
|
||||
"""Halflife argument for EWM when times is specified."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series():
|
||||
"""Make mocked series as fixture."""
|
||||
arr = np.random.default_rng(2).standard_normal(100)
|
||||
locs = np.arange(20, 40)
|
||||
arr[locs] = np.nan
|
||||
series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100))
|
||||
return series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame():
|
||||
"""Make mocked frame as fixture."""
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((100, 10)),
|
||||
index=bdate_range(datetime(2009, 1, 1), periods=100),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, 1, 2, 5, 10])
|
||||
def step(request):
|
||||
"""step keyword argument for rolling window operations."""
|
||||
return request.param
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,72 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
notna,
|
||||
)
|
||||
|
||||
|
||||
def create_series():
|
||||
return [
|
||||
Series(dtype=np.float64, name="a"),
|
||||
Series([np.nan] * 5),
|
||||
Series([1.0] * 5),
|
||||
Series(range(5, 0, -1)),
|
||||
Series(range(5)),
|
||||
Series([np.nan, 1.0, np.nan, 1.0, 1.0]),
|
||||
Series([np.nan, 1.0, np.nan, 2.0, 3.0]),
|
||||
Series([np.nan, 1.0, np.nan, 3.0, 2.0]),
|
||||
]
|
||||
|
||||
|
||||
def create_dataframes():
|
||||
return [
|
||||
DataFrame(columns=["a", "a"]),
|
||||
DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]),
|
||||
] + [DataFrame(s) for s in create_series()]
|
||||
|
||||
|
||||
def is_constant(x):
|
||||
values = x.values.ravel("K")
|
||||
return len(set(values[notna(values)])) == 1
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=(
|
||||
obj
|
||||
for obj in itertools.chain(create_series(), create_dataframes())
|
||||
if is_constant(obj)
|
||||
),
|
||||
)
|
||||
def consistent_data(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=create_series())
|
||||
def series_data(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=itertools.chain(create_series(), create_dataframes()))
|
||||
def all_data(request):
|
||||
"""
|
||||
Test:
|
||||
- Empty Series / DataFrame
|
||||
- All NaN
|
||||
- All consistent value
|
||||
- Monotonically decreasing
|
||||
- Monotonically increasing
|
||||
- Monotonically consistent with NaNs
|
||||
- Monotonically increasing with NaNs
|
||||
- Monotonically decreasing with NaNs
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 2])
|
||||
def min_periods(request):
|
||||
return request.param
|
@ -0,0 +1,243 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def create_mock_weights(obj, com, adjust, ignore_na):
|
||||
if isinstance(obj, DataFrame):
|
||||
if not len(obj.columns):
|
||||
return DataFrame(index=obj.index, columns=obj.columns)
|
||||
w = concat(
|
||||
[
|
||||
create_mock_series_weights(
|
||||
obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na
|
||||
)
|
||||
for i in range(len(obj.columns))
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
w.index = obj.index
|
||||
w.columns = obj.columns
|
||||
return w
|
||||
else:
|
||||
return create_mock_series_weights(obj, com, adjust, ignore_na)
|
||||
|
||||
|
||||
def create_mock_series_weights(s, com, adjust, ignore_na):
|
||||
w = Series(np.nan, index=s.index, name=s.name)
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
if adjust:
|
||||
count = 0
|
||||
for i in range(len(s)):
|
||||
if s.iat[i] == s.iat[i]:
|
||||
w.iat[i] = pow(1.0 / (1.0 - alpha), count)
|
||||
count += 1
|
||||
elif not ignore_na:
|
||||
count += 1
|
||||
else:
|
||||
sum_wts = 0.0
|
||||
prev_i = -1
|
||||
count = 0
|
||||
for i in range(len(s)):
|
||||
if s.iat[i] == s.iat[i]:
|
||||
if prev_i == -1:
|
||||
w.iat[i] = 1.0
|
||||
else:
|
||||
w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i)
|
||||
sum_wts += w.iat[i]
|
||||
prev_i = count
|
||||
count += 1
|
||||
elif not ignore_na:
|
||||
count += 1
|
||||
return w
|
||||
|
||||
|
||||
def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods):
|
||||
com = 3.0
|
||||
|
||||
result = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
|
||||
expected = all_data.multiply(weights).cumsum().divide(weights.cumsum()).ffill()
|
||||
expected[
|
||||
all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1)
|
||||
] = np.nan
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods):
|
||||
com = 3.0
|
||||
|
||||
count_x = consistent_data.expanding().count()
|
||||
mean_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).corr(consistent_data)
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_ewm_consistency_var_debiasing_factors(
|
||||
all_data, adjust, ignore_na, min_periods
|
||||
):
|
||||
com = 3.0
|
||||
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=False)
|
||||
var_biased_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=True)
|
||||
|
||||
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
|
||||
cum_sum = weights.cumsum().ffill()
|
||||
cum_sum_sq = (weights * weights).cumsum().ffill()
|
||||
numerator = cum_sum * cum_sum
|
||||
denominator = numerator - cum_sum_sq
|
||||
denominator[denominator <= 0.0] = np.nan
|
||||
var_debiasing_factors_x = numerator / denominator
|
||||
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias):
|
||||
com = 3.0
|
||||
|
||||
mean_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
var_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if bias:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x2 = (
|
||||
(all_data * all_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_moments_consistency_var_constant(
|
||||
consistent_data, adjust, ignore_na, min_periods, bias
|
||||
):
|
||||
com = 3.0
|
||||
count_x = consistent_data.expanding(min_periods=min_periods).count()
|
||||
var_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if not bias:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias):
|
||||
com = 3.0
|
||||
var_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).cov(all_data, bias=bias)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_ewm_consistency_series_cov_corr(
|
||||
series_data, adjust, ignore_na, min_periods, bias
|
||||
):
|
||||
com = 3.0
|
||||
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.var(bias=bias)
|
||||
)
|
||||
var_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
var_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
cov_x_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).cov(series_data, bias=bias)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).corr(series_data)
|
||||
std_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
std_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if bias:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
mean_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def no_nans(x):
|
||||
return x.notna().all().all()
|
||||
|
||||
|
||||
def all_na(x):
|
||||
return x.isnull().all().all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||||
def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f):
|
||||
if f is np.sum:
|
||||
if not no_nans(all_data) and not (
|
||||
all_na(all_data) and not all_data.empty and min_periods > 0
|
||||
):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
|
||||
)
|
||||
expanding_f_result = all_data.expanding(min_periods=min_periods).sum()
|
||||
expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply(
|
||||
func=f, raw=True
|
||||
)
|
||||
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var(all_data, min_periods, ddof):
|
||||
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean()
|
||||
mean_x = all_data.expanding(min_periods=min_periods).mean()
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var_constant(consistent_data, min_periods, ddof):
|
||||
count_x = consistent_data.expanding(min_periods=min_periods).count()
|
||||
var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if ddof == 1:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof):
|
||||
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof):
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
)
|
||||
var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data)
|
||||
std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.expanding(min_periods=min_periods).mean()
|
||||
mean_y = series_data.expanding(min_periods=min_periods).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data).expanding(min_periods=min_periods).mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||||
|
||||
|
||||
def test_expanding_consistency_mean(all_data, min_periods):
|
||||
result = all_data.expanding(min_periods=min_periods).mean()
|
||||
expected = (
|
||||
all_data.expanding(min_periods=min_periods).sum()
|
||||
/ all_data.expanding(min_periods=min_periods).count()
|
||||
)
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_expanding_consistency_constant(consistent_data, min_periods):
|
||||
count_x = consistent_data.expanding().count()
|
||||
mean_x = consistent_data.expanding(min_periods=min_periods).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data)
|
||||
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_expanding_consistency_var_debiasing_factors(all_data, min_periods):
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.expanding(min_periods=min_periods).var()
|
||||
var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0)
|
||||
var_debiasing_factors_x = all_data.expanding().count() / (
|
||||
all_data.expanding().count() - 1.0
|
||||
).replace(0.0, np.nan)
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
@ -0,0 +1,244 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def no_nans(x):
|
||||
return x.notna().all().all()
|
||||
|
||||
|
||||
def all_na(x):
|
||||
return x.isnull().all().all()
|
||||
|
||||
|
||||
@pytest.fixture(params=[(1, 0), (5, 1)])
|
||||
def rolling_consistency_cases(request):
|
||||
"""window, min_periods"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||||
def test_rolling_apply_consistency_sum(
|
||||
request, all_data, rolling_consistency_cases, center, f
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
if f is np.sum:
|
||||
if not no_nans(all_data) and not (
|
||||
all_na(all_data) and not all_data.empty and min_periods > 0
|
||||
):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
|
||||
)
|
||||
rolling_f_result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).sum()
|
||||
rolling_apply_f_result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).apply(func=f, raw=True)
|
||||
tm.assert_equal(rolling_f_result, rolling_apply_f_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_x2 = (
|
||||
(all_data * all_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var_constant(
|
||||
consistent_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
count_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
var_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if ddof == 1:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_rolling_consistency_var_std_cov(
|
||||
all_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).cov(all_data, ddof=ddof)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_rolling_consistency_series_cov_corr(
|
||||
series_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.var(ddof=ddof)
|
||||
)
|
||||
var_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
var_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
cov_x_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).cov(series_data, ddof=ddof)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).corr(series_data)
|
||||
std_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).std(ddof=ddof)
|
||||
std_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).std(ddof=ddof)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||||
|
||||
|
||||
def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
expected = (
|
||||
all_data.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.sum()
|
||||
.divide(
|
||||
all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
)
|
||||
)
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_rolling_consistency_constant(
|
||||
consistent_data, rolling_consistency_cases, center
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
count_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
mean_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).corr(consistent_data)
|
||||
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_rolling_consistency_var_debiasing_factors(
|
||||
all_data, rolling_consistency_cases, center
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var()
|
||||
var_biased_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=0)
|
||||
var_debiasing_factors_x = (
|
||||
all_data.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.count()
|
||||
.divide(
|
||||
(
|
||||
all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
- 1.0
|
||||
).replace(0.0, np.nan)
|
||||
)
|
||||
)
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
@ -0,0 +1,398 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
DataError,
|
||||
SpecificationError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_getitem(step):
|
||||
frame = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
|
||||
r = frame.rolling(window=5, step=step)
|
||||
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns)
|
||||
|
||||
r = frame.rolling(window=5, step=step)[1]
|
||||
assert r._selected_obj.name == frame[::step].columns[1]
|
||||
|
||||
# technically this is allowed
|
||||
r = frame.rolling(window=5, step=step)[1, 3]
|
||||
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
|
||||
|
||||
r = frame.rolling(window=5, step=step)[[1, 3]]
|
||||
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
|
||||
|
||||
|
||||
def test_select_bad_cols():
|
||||
df = DataFrame([[1, 2]], columns=["A", "B"])
|
||||
g = df.rolling(window=5)
|
||||
with pytest.raises(KeyError, match="Columns not found: 'C'"):
|
||||
g[["C"]]
|
||||
with pytest.raises(KeyError, match="^[^A]+$"):
|
||||
# A should not be referenced as a bad column...
|
||||
# will have to rethink regex if you change message!
|
||||
g[["A", "C"]]
|
||||
|
||||
|
||||
def test_attribute_access():
|
||||
df = DataFrame([[1, 2]], columns=["A", "B"])
|
||||
r = df.rolling(window=5)
|
||||
tm.assert_series_equal(r.A.sum(), r["A"].sum())
|
||||
msg = "'Rolling' object has no attribute 'F'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
r.F
|
||||
|
||||
|
||||
def tests_skip_nuisance(step):
|
||||
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
|
||||
r = df.rolling(window=3, step=step)
|
||||
result = r[["A", "B"]].sum()
|
||||
expected = DataFrame(
|
||||
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
|
||||
columns=list("AB"),
|
||||
)[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_sum_object_str_raises(step):
|
||||
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
|
||||
r = df.rolling(window=3, step=step)
|
||||
with pytest.raises(
|
||||
DataError, match="Cannot aggregate non-numeric type: object|string"
|
||||
):
|
||||
# GH#42738, enforced in 2.0
|
||||
r.sum()
|
||||
|
||||
|
||||
def test_agg(step):
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
|
||||
r = df.rolling(window=3, step=step)
|
||||
a_mean = r["A"].mean()
|
||||
a_std = r["A"].std()
|
||||
a_sum = r["A"].sum()
|
||||
b_mean = r["B"].mean()
|
||||
b_std = r["B"].std()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
|
||||
result = r.aggregate([np.mean, np.std])
|
||||
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
|
||||
result = r.aggregate({"A": np.mean, "B": np.std})
|
||||
|
||||
expected = concat([a_mean, b_std], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = r.aggregate({"A": ["mean", "std"]})
|
||||
expected = concat([a_mean, a_std], axis=1)
|
||||
expected.columns = MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r["A"].aggregate(["mean", "sum"])
|
||||
expected = concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = ["mean", "sum"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
# using a dict with renaming
|
||||
r.aggregate({"A": {"mean": "mean", "sum": "sum"}})
|
||||
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.aggregate(
|
||||
{"A": {"mean": "mean", "sum": "sum"}, "B": {"mean2": "mean", "sum2": "sum"}}
|
||||
)
|
||||
|
||||
result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
|
||||
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
|
||||
exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
expected.columns = MultiIndex.from_tuples(exp_cols)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}]
|
||||
)
|
||||
def test_multi_axis_1_raises(func):
|
||||
# GH#46904
|
||||
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]})
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
r = df.rolling(window=3, axis=1)
|
||||
with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
|
||||
r.agg(func)
|
||||
|
||||
|
||||
def test_agg_apply(raw):
|
||||
# passed lambda
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
|
||||
r = df.rolling(window=3)
|
||||
a_sum = r["A"].sum()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|std]"):
|
||||
result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
|
||||
rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw)
|
||||
expected = concat([a_sum, rcustom], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
def test_agg_consistency(step):
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
r = df.rolling(window=3, step=step)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
|
||||
result = r.agg([np.sum, np.mean]).columns
|
||||
expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
|
||||
result = r["A"].agg([np.sum, np.mean]).columns
|
||||
expected = Index(["sum", "mean"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
|
||||
result = r.agg({"A": [np.sum, np.mean]}).columns
|
||||
expected = MultiIndex.from_tuples([("A", "sum"), ("A", "mean")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_nested_dicts():
|
||||
# API change for disallowing these types of nested dicts
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
r = df.rolling(window=3)
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
|
||||
|
||||
expected = concat(
|
||||
[r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1
|
||||
)
|
||||
expected.columns = MultiIndex.from_tuples(
|
||||
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
|
||||
)
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
|
||||
|
||||
def test_count_nonnumeric_types(step):
|
||||
# GH12541
|
||||
cols = [
|
||||
"int",
|
||||
"float",
|
||||
"string",
|
||||
"datetime",
|
||||
"timedelta",
|
||||
"periods",
|
||||
"fl_inf",
|
||||
"fl_nan",
|
||||
"str_nan",
|
||||
"dt_nat",
|
||||
"periods_nat",
|
||||
]
|
||||
dt_nat_col = [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None)]
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"int": [1, 2, 3],
|
||||
"float": [4.0, 5.0, 6.0],
|
||||
"string": list("abc"),
|
||||
"datetime": date_range("20170101", periods=3),
|
||||
"timedelta": timedelta_range("1 s", periods=3, freq="s"),
|
||||
"periods": [
|
||||
Period("2012-01"),
|
||||
Period("2012-02"),
|
||||
Period("2012-03"),
|
||||
],
|
||||
"fl_inf": [1.0, 2.0, np.inf],
|
||||
"fl_nan": [1.0, 2.0, np.nan],
|
||||
"str_nan": ["aa", "bb", np.nan],
|
||||
"dt_nat": dt_nat_col,
|
||||
"periods_nat": [
|
||||
Period("2012-01"),
|
||||
Period("2012-02"),
|
||||
Period(None),
|
||||
],
|
||||
},
|
||||
columns=cols,
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"int": [1.0, 2.0, 2.0],
|
||||
"float": [1.0, 2.0, 2.0],
|
||||
"string": [1.0, 2.0, 2.0],
|
||||
"datetime": [1.0, 2.0, 2.0],
|
||||
"timedelta": [1.0, 2.0, 2.0],
|
||||
"periods": [1.0, 2.0, 2.0],
|
||||
"fl_inf": [1.0, 2.0, 2.0],
|
||||
"fl_nan": [1.0, 2.0, 1.0],
|
||||
"str_nan": [1.0, 2.0, 1.0],
|
||||
"dt_nat": [1.0, 2.0, 1.0],
|
||||
"periods_nat": [1.0, 2.0, 1.0],
|
||||
},
|
||||
columns=cols,
|
||||
)[::step]
|
||||
|
||||
result = df.rolling(window=2, min_periods=0, step=step).count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(1, min_periods=0, step=step).count()
|
||||
expected = df.notna().astype(float)[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_preserve_metadata():
|
||||
# GH 10565
|
||||
s = Series(np.arange(100), name="foo")
|
||||
|
||||
s2 = s.rolling(30).sum()
|
||||
s3 = s.rolling(20).sum()
|
||||
assert s2.name == "foo"
|
||||
assert s3.name == "foo"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,window_size,expected_vals",
|
||||
[
|
||||
(
|
||||
"rolling",
|
||||
2,
|
||||
[
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
[15.0, 20.0, 25.0, 20.0],
|
||||
[25.0, 30.0, 35.0, 30.0],
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
[20.0, 30.0, 35.0, 30.0],
|
||||
[35.0, 40.0, 60.0, 40.0],
|
||||
[60.0, 80.0, 85.0, 80],
|
||||
],
|
||||
),
|
||||
(
|
||||
"expanding",
|
||||
None,
|
||||
[
|
||||
[10.0, 10.0, 20.0, 20.0],
|
||||
[15.0, 20.0, 25.0, 20.0],
|
||||
[20.0, 30.0, 30.0, 20.0],
|
||||
[10.0, 10.0, 30.0, 30.0],
|
||||
[20.0, 30.0, 35.0, 30.0],
|
||||
[26.666667, 40.0, 50.0, 30.0],
|
||||
[40.0, 80.0, 60.0, 30.0],
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_multiple_agg_funcs(func, window_size, expected_vals):
|
||||
# GH 15072
|
||||
df = DataFrame(
|
||||
[
|
||||
["A", 10, 20],
|
||||
["A", 20, 30],
|
||||
["A", 30, 40],
|
||||
["B", 10, 30],
|
||||
["B", 30, 40],
|
||||
["B", 40, 80],
|
||||
["B", 80, 90],
|
||||
],
|
||||
columns=["stock", "low", "high"],
|
||||
)
|
||||
|
||||
f = getattr(df.groupby("stock"), func)
|
||||
if window_size:
|
||||
window = f(window_size)
|
||||
else:
|
||||
window = f()
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
|
||||
names=["stock", None],
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
|
||||
)
|
||||
expected = DataFrame(expected_vals, index=index, columns=columns)
|
||||
|
||||
result = window.agg({"low": ["mean", "max"], "high": ["mean", "min"]})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_dont_modify_attributes_after_methods(
|
||||
arithmetic_win_operators, closed, center, min_periods, step
|
||||
):
|
||||
# GH 39554
|
||||
roll_obj = Series(range(1)).rolling(
|
||||
1, center=center, closed=closed, min_periods=min_periods, step=step
|
||||
)
|
||||
expected = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
|
||||
getattr(roll_obj, arithmetic_win_operators)()
|
||||
result = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_centered_axis_validation(step):
|
||||
# ok
|
||||
msg = "The 'axis' keyword in Series.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean()
|
||||
|
||||
# bad axis
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean()
|
||||
|
||||
# ok ok
|
||||
df = DataFrame(np.ones((10, 10)))
|
||||
msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.rolling(window=3, center=True, axis=0, step=step).mean()
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.rolling(window=3, center=True, axis=1, step=step).mean()
|
||||
|
||||
# bad axis
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
(df.rolling(window=3, center=True, axis=2, step=step).mean())
|
||||
|
||||
|
||||
def test_rolling_min_min_periods(step):
|
||||
a = Series([1, 2, 3, 4, 5])
|
||||
result = a.rolling(window=100, min_periods=1, step=step).min()
|
||||
expected = Series(np.ones(len(a)))[::step]
|
||||
tm.assert_series_equal(result, expected)
|
||||
msg = "min_periods 5 must be <= window 3"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).min()
|
||||
|
||||
|
||||
def test_rolling_max_min_periods(step):
|
||||
a = Series([1, 2, 3, 4, 5], dtype=np.float64)
|
||||
result = a.rolling(window=100, min_periods=1, step=step).max()
|
||||
expected = a[::step]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
msg = "min_periods 5 must be <= window 3"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).max()
|
@ -0,0 +1,328 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
# suppress warnings about empty slices, as we are deliberately testing
|
||||
# with a 0-length Series
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:.*(empty slice|0 for slice).*:RuntimeWarning"
|
||||
)
|
||||
|
||||
|
||||
def f(x):
|
||||
return x[np.isfinite(x)].mean()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad_raw", [None, 1, 0])
|
||||
def test_rolling_apply_invalid_raw(bad_raw):
|
||||
with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"):
|
||||
Series(range(3)).rolling(1).apply(len, raw=bad_raw)
|
||||
|
||||
|
||||
def test_rolling_apply_out_of_bounds(engine_and_raw):
|
||||
# gh-1850
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
vals = Series([1, 2, 3, 4])
|
||||
|
||||
result = vals.rolling(10).apply(np.sum, engine=engine, raw=raw)
|
||||
assert result.isna().all()
|
||||
|
||||
result = vals.rolling(10, min_periods=1).apply(np.sum, engine=engine, raw=raw)
|
||||
expected = Series([1, 3, 6, 10], dtype=float)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("window", [2, "2s"])
|
||||
def test_rolling_apply_with_pandas_objects(window):
|
||||
# 5071
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": np.random.default_rng(2).standard_normal(5),
|
||||
"B": np.random.default_rng(2).integers(0, 10, size=5),
|
||||
},
|
||||
index=date_range("20130101", periods=5, freq="s"),
|
||||
)
|
||||
|
||||
# we have an equal spaced timeseries index
|
||||
# so simulate removing the first period
|
||||
def f(x):
|
||||
if x.index[0] == df.index[0]:
|
||||
return np.nan
|
||||
return x.iloc[-1]
|
||||
|
||||
result = df.rolling(window).apply(f, raw=False)
|
||||
expected = df.iloc[2:].reindex_like(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.external_error_raised(AttributeError):
|
||||
df.rolling(window).apply(f, raw=True)
|
||||
|
||||
|
||||
def test_rolling_apply(engine_and_raw, step):
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
expected = Series([], dtype="float64")
|
||||
result = expected.rolling(10, step=step).apply(
|
||||
lambda x: x.mean(), engine=engine, raw=raw
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# gh-8080
|
||||
s = Series([None, None, None])
|
||||
result = s.rolling(2, min_periods=0, step=step).apply(
|
||||
lambda x: len(x), engine=engine, raw=raw
|
||||
)
|
||||
expected = Series([1.0, 2.0, 2.0])[::step]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.rolling(2, min_periods=0, step=step).apply(len, engine=engine, raw=raw)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_all_apply(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
).set_index("A")
|
||||
* 2
|
||||
)
|
||||
er = df.rolling(window=1)
|
||||
r = df.rolling(window="1s")
|
||||
|
||||
result = r.apply(lambda x: 1, engine=engine, raw=raw)
|
||||
expected = er.apply(lambda x: 1, engine=engine, raw=raw)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_ragged_apply(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
df = DataFrame({"B": range(5)})
|
||||
df.index = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
|
||||
f = lambda x: 1
|
||||
result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_invalid_engine():
|
||||
with pytest.raises(ValueError, match="engine must be either 'numba' or 'cython'"):
|
||||
Series(range(1)).rolling(1).apply(lambda x: x, engine="foo")
|
||||
|
||||
|
||||
def test_invalid_engine_kwargs_cython():
|
||||
with pytest.raises(ValueError, match="cython engine does not accept engine_kwargs"):
|
||||
Series(range(1)).rolling(1).apply(
|
||||
lambda x: x, engine="cython", engine_kwargs={"nopython": False}
|
||||
)
|
||||
|
||||
|
||||
def test_invalid_raw_numba():
|
||||
with pytest.raises(
|
||||
ValueError, match="raw must be `True` when using the numba engine"
|
||||
):
|
||||
Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]])
|
||||
def test_rolling_apply_args_kwargs(args_kwargs):
|
||||
# GH 33433
|
||||
def numpysum(x, par):
|
||||
return np.sum(x + par)
|
||||
|
||||
df = DataFrame({"gr": [1, 1], "a": [1, 2]})
|
||||
|
||||
idx = Index(["gr", "a"])
|
||||
expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx)
|
||||
|
||||
result = df.rolling(1).apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None])
|
||||
expected = Series([11.0, 12.0], index=midx, name="a")
|
||||
|
||||
gb_rolling = df.groupby("gr")["a"].rolling(1)
|
||||
|
||||
result = gb_rolling.apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_nans(raw):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(50, min_periods=30).apply(f, raw=raw)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = obj.rolling(20, min_periods=15).apply(f, raw=raw)
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = obj2.rolling(10, min_periods=5).apply(f, raw=raw)
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
result0 = obj.rolling(20, min_periods=0).apply(f, raw=raw)
|
||||
result1 = obj.rolling(20, min_periods=1).apply(f, raw=raw)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
def test_center(raw):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(20, min_periods=15, center=True).apply(f, raw=raw)
|
||||
expected = (
|
||||
concat([obj, Series([np.nan] * 9)])
|
||||
.rolling(20, min_periods=15)
|
||||
.apply(f, raw=raw)
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series(raw, series):
|
||||
result = series.rolling(50).apply(f, raw=raw)
|
||||
assert isinstance(result, Series)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(series[-50:]))
|
||||
|
||||
|
||||
def test_frame(raw, frame):
|
||||
result = frame.rolling(50).apply(f, raw=raw)
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[-50:, :].apply(np.mean, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
def test_time_rule_series(raw, series):
|
||||
win = 25
|
||||
minp = 10
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = ser.rolling(window=win, min_periods=minp).apply(f, raw=raw)
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], np.mean(trunc_series))
|
||||
|
||||
|
||||
def test_time_rule_frame(raw, frame):
|
||||
win = 25
|
||||
minp = 10
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = frm.rolling(window=win, min_periods=minp).apply(f, raw=raw)
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(np.mean, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
def test_min_periods(raw, series, minp, step):
|
||||
result = series.rolling(len(series) + 1, min_periods=minp, step=step).apply(
|
||||
f, raw=raw
|
||||
)
|
||||
expected = series.rolling(len(series), min_periods=minp, step=step).apply(
|
||||
f, raw=raw
|
||||
)
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
def test_center_reindex_series(raw, series):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
minp = 10
|
||||
|
||||
series_xp = (
|
||||
series.reindex(list(series.index) + s)
|
||||
.rolling(window=25, min_periods=minp)
|
||||
.apply(f, raw=raw)
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
series_rs = series.rolling(window=25, min_periods=minp, center=True).apply(
|
||||
f, raw=raw
|
||||
)
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
def test_center_reindex_frame(raw):
|
||||
# shifter index
|
||||
frame = DataFrame(range(100), index=date_range("2020-01-01", freq="D", periods=100))
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
minp = 10
|
||||
|
||||
frame_xp = (
|
||||
frame.reindex(list(frame.index) + s)
|
||||
.rolling(window=25, min_periods=minp)
|
||||
.apply(f, raw=raw)
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw)
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
def test_axis1(raw):
|
||||
# GH 45912
|
||||
df = DataFrame([1, 2])
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.rolling(window=1, axis=1).apply(np.sum, raw=raw)
|
||||
expected = DataFrame([1.0, 2.0])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,519 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import (
|
||||
BaseIndexer,
|
||||
FixedForwardWindowIndexer,
|
||||
)
|
||||
from pandas.core.indexers.objects import (
|
||||
ExpandingIndexer,
|
||||
FixedWindowIndexer,
|
||||
VariableOffsetWindowIndexer,
|
||||
)
|
||||
|
||||
from pandas.tseries.offsets import BusinessDay
|
||||
|
||||
|
||||
def test_bad_get_window_bounds_signature():
|
||||
class BadIndexer(BaseIndexer):
|
||||
def get_window_bounds(self):
|
||||
return None
|
||||
|
||||
indexer = BadIndexer()
|
||||
with pytest.raises(ValueError, match="BadIndexer does not implement"):
|
||||
Series(range(5)).rolling(indexer)
|
||||
|
||||
|
||||
def test_expanding_indexer():
|
||||
s = Series(range(10))
|
||||
indexer = ExpandingIndexer()
|
||||
result = s.rolling(indexer).mean()
|
||||
expected = s.expanding().mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexer_constructor_arg():
|
||||
# Example found in computation.rst
|
||||
use_expanding = [True, False, True, False, True]
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if self.use_expanding[i]:
|
||||
start[i] = 0
|
||||
end[i] = i + 1
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
|
||||
result = df.rolling(indexer).sum()
|
||||
expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexer_accepts_rolling_args():
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if (
|
||||
center
|
||||
and min_periods == 1
|
||||
and closed == "both"
|
||||
and step == 1
|
||||
and i == 2
|
||||
):
|
||||
start[i] = 0
|
||||
end[i] = num_values
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
indexer = CustomIndexer(window_size=1)
|
||||
result = df.rolling(
|
||||
indexer, center=True, min_periods=1, closed="both", step=1
|
||||
).sum()
|
||||
expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,np_func,expected,np_kwargs",
|
||||
[
|
||||
("count", len, [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, np.nan], {}),
|
||||
("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan], {}),
|
||||
(
|
||||
"max",
|
||||
np.max,
|
||||
[2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan],
|
||||
{},
|
||||
),
|
||||
(
|
||||
"std",
|
||||
np.std,
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
55.71654452,
|
||||
54.85739087,
|
||||
53.9845657,
|
||||
1.0,
|
||||
1.0,
|
||||
0.70710678,
|
||||
np.nan,
|
||||
],
|
||||
{"ddof": 1},
|
||||
),
|
||||
(
|
||||
"var",
|
||||
np.var,
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
3104.333333,
|
||||
3009.333333,
|
||||
2914.333333,
|
||||
1.0,
|
||||
1.0,
|
||||
0.500000,
|
||||
np.nan,
|
||||
],
|
||||
{"ddof": 1},
|
||||
),
|
||||
(
|
||||
"median",
|
||||
np.median,
|
||||
[1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan],
|
||||
{},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_forward_window(
|
||||
frame_or_series, func, np_func, expected, np_kwargs, step
|
||||
):
|
||||
# GH 32865
|
||||
values = np.arange(10.0)
|
||||
values[5] = 100.0
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=3)
|
||||
|
||||
match = "Forward-looking windows can't have center=True"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
rolling = frame_or_series(values).rolling(window=indexer, center=True)
|
||||
getattr(rolling, func)()
|
||||
|
||||
match = "Forward-looking windows don't support setting the closed argument"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
rolling = frame_or_series(values).rolling(window=indexer, closed="right")
|
||||
getattr(rolling, func)()
|
||||
|
||||
rolling = frame_or_series(values).rolling(window=indexer, min_periods=2, step=step)
|
||||
result = getattr(rolling, func)()
|
||||
|
||||
# Check that the function output matches the explicitly provided array
|
||||
expected = frame_or_series(expected)[::step]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# Check that the rolling function output matches applying an alternative
|
||||
# function to the rolling window object
|
||||
expected2 = frame_or_series(rolling.apply(lambda x: np_func(x, **np_kwargs)))
|
||||
tm.assert_equal(result, expected2)
|
||||
|
||||
# Check that the function output matches applying an alternative function
|
||||
# if min_periods isn't specified
|
||||
# GH 39604: After count-min_periods deprecation, apply(lambda x: len(x))
|
||||
# is equivalent to count after setting min_periods=0
|
||||
min_periods = 0 if func == "count" else None
|
||||
rolling3 = frame_or_series(values).rolling(window=indexer, min_periods=min_periods)
|
||||
result3 = getattr(rolling3, func)()
|
||||
expected3 = frame_or_series(rolling3.apply(lambda x: np_func(x, **np_kwargs)))
|
||||
tm.assert_equal(result3, expected3)
|
||||
|
||||
|
||||
def test_rolling_forward_skewness(frame_or_series, step):
|
||||
values = np.arange(10.0)
|
||||
values[5] = 100.0
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=5)
|
||||
rolling = frame_or_series(values).rolling(window=indexer, min_periods=3, step=step)
|
||||
result = rolling.skew()
|
||||
|
||||
expected = frame_or_series(
|
||||
[
|
||||
0.0,
|
||||
2.232396,
|
||||
2.229508,
|
||||
2.228340,
|
||||
2.229091,
|
||||
2.231989,
|
||||
0.0,
|
||||
0.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
)[::step]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,expected",
|
||||
[
|
||||
("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan]),
|
||||
(
|
||||
"corr",
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
0.8704775290207161,
|
||||
0.018229084250926637,
|
||||
-0.861357304646493,
|
||||
1.0,
|
||||
1.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_forward_cov_corr(func, expected):
|
||||
values1 = np.arange(10).reshape(-1, 1)
|
||||
values2 = values1 * 2
|
||||
values1[5, 0] = 100
|
||||
values = np.concatenate([values1, values2], axis=1)
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=3)
|
||||
rolling = DataFrame(values).rolling(window=indexer, min_periods=3)
|
||||
# We are interested in checking only pairwise covariance / correlation
|
||||
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
|
||||
result = result.reset_index(drop=True)
|
||||
expected = Series(expected).reset_index(drop=True)
|
||||
expected.name = result.name
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"closed,expected_data",
|
||||
[
|
||||
["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]],
|
||||
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
|
||||
],
|
||||
)
|
||||
def test_non_fixed_variable_window_indexer(closed, expected_data):
|
||||
index = date_range("2020", periods=10)
|
||||
df = DataFrame(range(10), index=index)
|
||||
offset = BusinessDay(1)
|
||||
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
|
||||
result = df.rolling(indexer, closed=closed).sum()
|
||||
expected = DataFrame(expected_data, index=index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_variableoffsetwindowindexer_not_dti():
|
||||
# GH 54379
|
||||
with pytest.raises(ValueError, match="index must be a DatetimeIndex."):
|
||||
VariableOffsetWindowIndexer(index="foo", offset=BusinessDay(1))
|
||||
|
||||
|
||||
def test_variableoffsetwindowindexer_not_offset():
|
||||
# GH 54379
|
||||
idx = date_range("2020", periods=10)
|
||||
with pytest.raises(ValueError, match="offset must be a DateOffset-like object."):
|
||||
VariableOffsetWindowIndexer(index=idx, offset="foo")
|
||||
|
||||
|
||||
def test_fixed_forward_indexer_count(step):
|
||||
# GH: 35579
|
||||
df = DataFrame({"b": [None, None, None, 7]})
|
||||
indexer = FixedForwardWindowIndexer(window_size=2)
|
||||
result = df.rolling(window=indexer, min_periods=0, step=step).count()
|
||||
expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("end_value", "values"), [(1, [0.0, 1, 1, 3, 2]), (-1, [0.0, 1, 0, 3, 1])]
|
||||
)
|
||||
@pytest.mark.parametrize(("func", "args"), [("median", []), ("quantile", [0.5])])
|
||||
def test_indexer_quantile_sum(end_value, values, func, args):
|
||||
# GH 37153
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if self.use_expanding[i]:
|
||||
start[i] = 0
|
||||
end[i] = max(i + end_value, 1)
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
use_expanding = [True, False, True, False, True]
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
|
||||
result = getattr(df.rolling(indexer), func)(*args)
|
||||
expected = DataFrame({"values": values})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer]
|
||||
)
|
||||
@pytest.mark.parametrize("window_size", [1, 2, 12])
|
||||
@pytest.mark.parametrize(
|
||||
"df_data",
|
||||
[
|
||||
{"a": [1, 1], "b": [0, 1]},
|
||||
{"a": [1, 2], "b": [0, 1]},
|
||||
{"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))},
|
||||
],
|
||||
)
|
||||
def test_indexers_are_reusable_after_groupby_rolling(
|
||||
indexer_class, window_size, df_data
|
||||
):
|
||||
# GH 43267
|
||||
df = DataFrame(df_data)
|
||||
num_trials = 3
|
||||
indexer = indexer_class(window_size=window_size)
|
||||
original_window_size = indexer.window_size
|
||||
for i in range(num_trials):
|
||||
df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
|
||||
assert indexer.window_size == original_window_size
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"window_size, num_values, expected_start, expected_end",
|
||||
[
|
||||
(1, 1, [0], [1]),
|
||||
(1, 2, [0, 1], [1, 2]),
|
||||
(2, 1, [0], [1]),
|
||||
(2, 2, [0, 1], [2, 2]),
|
||||
(5, 12, range(12), list(range(5, 12)) + [12] * 5),
|
||||
(12, 5, range(5), [5] * 5),
|
||||
(0, 0, np.array([]), np.array([])),
|
||||
(1, 0, np.array([]), np.array([])),
|
||||
(0, 1, [0], [0]),
|
||||
],
|
||||
)
|
||||
def test_fixed_forward_indexer_bounds(
|
||||
window_size, num_values, expected_start, expected_end, step
|
||||
):
|
||||
# GH 43267
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
start, end = indexer.get_window_bounds(num_values=num_values, step=step)
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
start, np.array(expected_start[::step]), check_dtype=False
|
||||
)
|
||||
tm.assert_numpy_array_equal(end, np.array(expected_end[::step]), check_dtype=False)
|
||||
assert len(start) == len(end)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, window_size, expected",
|
||||
[
|
||||
(
|
||||
DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}),
|
||||
2,
|
||||
Series(
|
||||
[0, 1.5, 2.0],
|
||||
index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]),
|
||||
name="b",
|
||||
dtype=np.float64,
|
||||
),
|
||||
),
|
||||
(
|
||||
DataFrame(
|
||||
{
|
||||
"b": [np.nan, 1, 2, np.nan] + list(range(4, 18)),
|
||||
"a": [1] * 7 + [2] * 11,
|
||||
"c": range(18),
|
||||
}
|
||||
),
|
||||
12,
|
||||
Series(
|
||||
[
|
||||
3.6,
|
||||
3.6,
|
||||
4.25,
|
||||
5.0,
|
||||
5.0,
|
||||
5.5,
|
||||
6.0,
|
||||
12.0,
|
||||
12.5,
|
||||
13.0,
|
||||
13.5,
|
||||
14.0,
|
||||
14.5,
|
||||
15.0,
|
||||
15.5,
|
||||
16.0,
|
||||
16.5,
|
||||
17.0,
|
||||
],
|
||||
index=MultiIndex.from_arrays(
|
||||
[[1] * 7 + [2] * 11, range(18)], names=["a", None]
|
||||
),
|
||||
name="b",
|
||||
dtype=np.float64,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected):
|
||||
# GH 43267
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"group_keys",
|
||||
[
|
||||
(1,),
|
||||
(1, 2),
|
||||
(2, 1),
|
||||
(1, 1, 2),
|
||||
(1, 2, 1),
|
||||
(1, 1, 2, 2),
|
||||
(1, 2, 3, 2, 3),
|
||||
(1, 1, 2) * 4,
|
||||
(1, 2, 3) * 5,
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20])
|
||||
def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
|
||||
# GH 43267
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.array(list(group_keys)),
|
||||
"b": np.arange(len(group_keys), dtype=np.float64) + 17,
|
||||
"c": np.arange(len(group_keys), dtype=np.int64),
|
||||
}
|
||||
)
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum()
|
||||
result.index.names = ["a", "c"]
|
||||
|
||||
groups = df.groupby("a")[["a", "b", "c"]]
|
||||
manual = concat(
|
||||
[
|
||||
g.assign(
|
||||
b=[
|
||||
g["b"].iloc[i : i + window_size].sum(min_count=1)
|
||||
for i in range(len(g))
|
||||
]
|
||||
)
|
||||
for _, g in groups
|
||||
]
|
||||
)
|
||||
manual = manual.set_index(["a", "c"])["b"]
|
||||
|
||||
tm.assert_series_equal(result, manual)
|
||||
|
||||
|
||||
def test_unequal_start_end_bounds():
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([1]), np.array([1, 2])
|
||||
|
||||
indexer = CustomIndexer()
|
||||
roll = Series(1).rolling(indexer)
|
||||
match = "start"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.mean()
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
next(iter(roll))
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.corr(pairwise=True)
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.cov(pairwise=True)
|
||||
|
||||
|
||||
def test_unequal_bounds_to_object():
|
||||
# GH 44470
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([1]), np.array([2])
|
||||
|
||||
indexer = CustomIndexer()
|
||||
roll = Series([1, 1]).rolling(indexer)
|
||||
match = "start and end"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.mean()
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
next(iter(roll))
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.corr(pairwise=True)
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.cov(pairwise=True)
|
@ -0,0 +1,111 @@
|
||||
from functools import partial
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.window.aggregations as window_aggregations
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def _get_rolling_aggregations():
|
||||
# list pairs of name and function
|
||||
# each function has this signature:
|
||||
# (const float64_t[:] values, ndarray[int64_t] start,
|
||||
# ndarray[int64_t] end, int64_t minp) -> np.ndarray
|
||||
named_roll_aggs = (
|
||||
[
|
||||
("roll_sum", window_aggregations.roll_sum),
|
||||
("roll_mean", window_aggregations.roll_mean),
|
||||
]
|
||||
+ [
|
||||
(f"roll_var({ddof})", partial(window_aggregations.roll_var, ddof=ddof))
|
||||
for ddof in [0, 1]
|
||||
]
|
||||
+ [
|
||||
("roll_skew", window_aggregations.roll_skew),
|
||||
("roll_kurt", window_aggregations.roll_kurt),
|
||||
("roll_median_c", window_aggregations.roll_median_c),
|
||||
("roll_max", window_aggregations.roll_max),
|
||||
("roll_min", window_aggregations.roll_min),
|
||||
]
|
||||
+ [
|
||||
(
|
||||
f"roll_quantile({quantile},{interpolation})",
|
||||
partial(
|
||||
window_aggregations.roll_quantile,
|
||||
quantile=quantile,
|
||||
interpolation=interpolation,
|
||||
),
|
||||
)
|
||||
for quantile in [0.0001, 0.5, 0.9999]
|
||||
for interpolation in window_aggregations.interpolation_types
|
||||
]
|
||||
+ [
|
||||
(
|
||||
f"roll_rank({percentile},{method},{ascending})",
|
||||
partial(
|
||||
window_aggregations.roll_rank,
|
||||
percentile=percentile,
|
||||
method=method,
|
||||
ascending=ascending,
|
||||
),
|
||||
)
|
||||
for percentile in [True, False]
|
||||
for method in window_aggregations.rolling_rank_tiebreakers.keys()
|
||||
for ascending in [True, False]
|
||||
]
|
||||
)
|
||||
# unzip to a list of 2 tuples, names and functions
|
||||
unzipped = list(zip(*named_roll_aggs))
|
||||
return {"ids": unzipped[0], "params": unzipped[1]}
|
||||
|
||||
|
||||
_rolling_aggregations = _get_rolling_aggregations()
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=_rolling_aggregations["params"], ids=_rolling_aggregations["ids"]
|
||||
)
|
||||
def rolling_aggregation(request):
|
||||
"""Make a rolling aggregation function as fixture."""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_rolling_aggregation_boundary_consistency(rolling_aggregation):
|
||||
# GH-45647
|
||||
minp, step, width, size, selection = 0, 1, 3, 11, [2, 7]
|
||||
values = np.arange(1, 1 + size, dtype=np.float64)
|
||||
end = np.arange(width, size, step, dtype=np.int64)
|
||||
start = end - width
|
||||
selarr = np.array(selection, dtype=np.int32)
|
||||
result = Series(rolling_aggregation(values, start[selarr], end[selarr], minp))
|
||||
expected = Series(rolling_aggregation(values, start, end, minp)[selarr])
|
||||
tm.assert_equal(expected, result)
|
||||
|
||||
|
||||
def test_rolling_aggregation_with_unused_elements(rolling_aggregation):
|
||||
# GH-45647
|
||||
minp, width = 0, 5 # width at least 4 for kurt
|
||||
size = 2 * width + 5
|
||||
values = np.arange(1, size + 1, dtype=np.float64)
|
||||
values[width : width + 2] = sys.float_info.min
|
||||
values[width + 2] = np.nan
|
||||
values[width + 3 : width + 5] = sys.float_info.max
|
||||
start = np.array([0, size - width], dtype=np.int64)
|
||||
end = np.array([width, size], dtype=np.int64)
|
||||
loc = np.array(
|
||||
[j for i in range(len(start)) for j in range(start[i], end[i])],
|
||||
dtype=np.int32,
|
||||
)
|
||||
result = Series(rolling_aggregation(values, start, end, minp))
|
||||
compact_values = np.array(values[loc], dtype=np.float64)
|
||||
compact_start = np.arange(0, len(start) * width, width, dtype=np.int64)
|
||||
compact_end = compact_start + width
|
||||
expected = Series(
|
||||
rolling_aggregation(compact_values, compact_start, compact_end, minp)
|
||||
)
|
||||
assert np.isfinite(expected.values).all(), "Not all expected values are finite"
|
||||
tm.assert_equal(expected, result)
|
@ -0,0 +1,173 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import DataError
|
||||
|
||||
from pandas.core.dtypes.common import pandas_dtype
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
# gh-12373 : rolling functions error on float32 data
|
||||
# make sure rolling functions works for different dtypes
|
||||
#
|
||||
# further note that we are only checking rolling for fully dtype
|
||||
# compliance (though both expanding and ewm inherit)
|
||||
|
||||
|
||||
def get_dtype(dtype, coerce_int=None):
|
||||
if coerce_int is False and "int" in dtype:
|
||||
return None
|
||||
return pandas_dtype(dtype)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"category",
|
||||
"int8",
|
||||
"int16",
|
||||
"int32",
|
||||
"int64",
|
||||
"uint8",
|
||||
"uint16",
|
||||
"uint32",
|
||||
"uint64",
|
||||
"float16",
|
||||
"float32",
|
||||
"float64",
|
||||
"m8[ns]",
|
||||
"M8[ns]",
|
||||
"datetime64[ns, UTC]",
|
||||
]
|
||||
)
|
||||
def dtypes(request):
|
||||
"""Dtypes for window tests"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, data, expected_data, coerce_int, min_periods",
|
||||
[
|
||||
("count", np.arange(5), [1, 2, 2, 2, 2], True, 0),
|
||||
("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0),
|
||||
("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0),
|
||||
("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None),
|
||||
("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None),
|
||||
("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None),
|
||||
("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None),
|
||||
("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None),
|
||||
("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None),
|
||||
("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None),
|
||||
("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None),
|
||||
("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None),
|
||||
("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
|
||||
("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
|
||||
("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None),
|
||||
("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None),
|
||||
("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None),
|
||||
(
|
||||
"std",
|
||||
[0, 1, 2, np.nan, 4],
|
||||
[np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2,
|
||||
False,
|
||||
None,
|
||||
),
|
||||
("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None),
|
||||
("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None),
|
||||
("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None),
|
||||
("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
|
||||
("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
|
||||
(
|
||||
"median",
|
||||
[0, 1, 2, np.nan, 4],
|
||||
[np.nan, 0.5, 1.5, np.nan, np.nan],
|
||||
False,
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_dtypes(
|
||||
method, data, expected_data, coerce_int, dtypes, min_periods, step
|
||||
):
|
||||
ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
|
||||
rolled = ser.rolling(2, min_periods=min_periods, step=step)
|
||||
|
||||
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
|
||||
msg = "No numeric types to aggregate"
|
||||
with pytest.raises(DataError, match=msg):
|
||||
getattr(rolled, method)()
|
||||
else:
|
||||
result = getattr(rolled, method)()
|
||||
expected = Series(expected_data, dtype="float64")[::step]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_nullable_int(any_signed_int_ea_dtype, step):
|
||||
# GH 43016
|
||||
ser = Series([0, 1, NA], dtype=any_signed_int_ea_dtype)
|
||||
result = ser.rolling(2, step=step).mean()
|
||||
expected = Series([np.nan, 0.5, np.nan])[::step]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, expected_data, min_periods",
|
||||
[
|
||||
("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0),
|
||||
(
|
||||
"max",
|
||||
{0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"min",
|
||||
{0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"sum",
|
||||
{0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"mean",
|
||||
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"std",
|
||||
{
|
||||
0: Series([np.nan] + [np.sqrt(2)] * 4),
|
||||
1: Series([np.nan] + [np.sqrt(2)] * 4),
|
||||
},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"var",
|
||||
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"median",
|
||||
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step):
|
||||
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
|
||||
rolled = df.rolling(2, min_periods=min_periods, step=step)
|
||||
|
||||
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
|
||||
msg = "Cannot aggregate non-numeric type"
|
||||
with pytest.raises(DataError, match=msg):
|
||||
getattr(rolled, method)()
|
||||
else:
|
||||
result = getattr(rolled, method)()
|
||||
expected = DataFrame(expected_data, dtype="float64")[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,727 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_doc_string():
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.ewm(com=0.5).mean()
|
||||
|
||||
|
||||
def test_constructor(frame_or_series):
|
||||
c = frame_or_series(range(5)).ewm
|
||||
|
||||
# valid
|
||||
c(com=0.5)
|
||||
c(span=1.5)
|
||||
c(alpha=0.5)
|
||||
c(halflife=0.75)
|
||||
c(com=0.5, span=None)
|
||||
c(alpha=0.5, com=None)
|
||||
c(halflife=0.75, alpha=None)
|
||||
|
||||
# not valid: mutually exclusive
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(com=0.5, alpha=0.5)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(span=1.5, halflife=0.75)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(alpha=0.5, span=1.5)
|
||||
|
||||
# not valid: com < 0
|
||||
msg = "comass must satisfy: comass >= 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(com=-0.5)
|
||||
|
||||
# not valid: span < 1
|
||||
msg = "span must satisfy: span >= 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(span=0.5)
|
||||
|
||||
# not valid: halflife <= 0
|
||||
msg = "halflife must satisfy: halflife > 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(halflife=0)
|
||||
|
||||
# not valid: alpha <= 0 or alpha > 1
|
||||
msg = "alpha must satisfy: 0 < alpha <= 1"
|
||||
for alpha in (-0.5, 1.5):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(alpha=alpha)
|
||||
|
||||
|
||||
def test_ewma_times_not_datetime_type():
|
||||
msg = r"times must be datetime64 dtype."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(times=np.arange(5))
|
||||
|
||||
|
||||
def test_ewma_times_not_same_length():
|
||||
msg = "times must be the same length as the object."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(times=np.arange(4).astype("datetime64[ns]"))
|
||||
|
||||
|
||||
def test_ewma_halflife_not_correct_type():
|
||||
msg = "halflife must be a timedelta convertible object"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]"))
|
||||
|
||||
|
||||
def test_ewma_halflife_without_times(halflife_with_times):
|
||||
msg = "halflife can only be a timedelta convertible argument if times is not None."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(halflife=halflife_with_times)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"times",
|
||||
[
|
||||
np.arange(10).astype("datetime64[D]").astype("datetime64[ns]"),
|
||||
date_range("2000", freq="D", periods=10),
|
||||
date_range("2000", freq="D", periods=10).tz_localize("UTC"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("min_periods", [0, 2])
|
||||
def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
|
||||
halflife = halflife_with_times
|
||||
data = np.arange(10.0)
|
||||
data[::2] = np.nan
|
||||
df = DataFrame({"A": data})
|
||||
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
|
||||
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
|
||||
tz = tz_aware_fixture
|
||||
halflife = "23 days"
|
||||
times = (
|
||||
DatetimeIndex(["2020-01-01", "2020-01-10T00:04:05", "2020-02-23T05:00:23"])
|
||||
.tz_localize(tz)
|
||||
.as_unit(unit)
|
||||
)
|
||||
data = np.arange(3)
|
||||
df = DataFrame(data)
|
||||
result = df.ewm(halflife=halflife, times=times).mean()
|
||||
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewm_with_nat_raises(halflife_with_times):
|
||||
# GH#38535
|
||||
ser = Series(range(1))
|
||||
times = DatetimeIndex(["NaT"])
|
||||
with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
|
||||
ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
|
||||
|
||||
|
||||
def test_ewm_with_times_getitem(halflife_with_times):
|
||||
# GH 40164
|
||||
halflife = halflife_with_times
|
||||
data = np.arange(10.0)
|
||||
data[::2] = np.nan
|
||||
times = date_range("2000", freq="D", periods=10)
|
||||
df = DataFrame({"A": data, "B": data})
|
||||
result = df.ewm(halflife=halflife, times=times)["A"].mean()
|
||||
expected = df.ewm(halflife=1.0)["A"].mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["com", "halflife", "span", "alpha"])
|
||||
def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na):
|
||||
# GH 40164
|
||||
kwargs = {arg: 1, "adjust": adjust, "ignore_na": ignore_na}
|
||||
ewm = DataFrame({"A": range(1), "B": range(1)}).ewm(**kwargs)
|
||||
expected = {attr: getattr(ewm, attr) for attr in ewm._attributes}
|
||||
ewm_slice = ewm["A"]
|
||||
result = {attr: getattr(ewm, attr) for attr in ewm_slice._attributes}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_ewma_times_adjust_false_raises():
|
||||
# GH 40098
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="times is not supported with adjust=False."
|
||||
):
|
||||
Series(range(1)).ewm(
|
||||
0.1, adjust=False, times=date_range("2000", freq="D", periods=1)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, expected",
|
||||
[
|
||||
[
|
||||
"mean",
|
||||
DataFrame(
|
||||
{
|
||||
0: range(5),
|
||||
1: range(4, 9),
|
||||
2: [7.428571, 9, 10.571429, 12.142857, 13.714286],
|
||||
},
|
||||
dtype=float,
|
||||
),
|
||||
],
|
||||
[
|
||||
"std",
|
||||
DataFrame(
|
||||
{
|
||||
0: [np.nan] * 5,
|
||||
1: [4.242641] * 5,
|
||||
2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788],
|
||||
}
|
||||
),
|
||||
],
|
||||
[
|
||||
"var",
|
||||
DataFrame(
|
||||
{
|
||||
0: [np.nan] * 5,
|
||||
1: [18.0] * 5,
|
||||
2: [21.428571, 27, 33.428571, 40.714286, 48.857143],
|
||||
}
|
||||
),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_float_dtype_ewma(func, expected, float_numpy_dtype):
|
||||
# GH#42452
|
||||
|
||||
df = DataFrame(
|
||||
{0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype
|
||||
)
|
||||
msg = "Support for axis=1 in DataFrame.ewm is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
e = df.ewm(alpha=0.5, axis=1)
|
||||
result = getattr(e, func)()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_times_string_col_raises():
|
||||
# GH 43265
|
||||
df = DataFrame(
|
||||
{"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)}
|
||||
)
|
||||
with pytest.raises(ValueError, match="times must be datetime64"):
|
||||
df.ewm(halflife="1 day", min_periods=0, times="time_col")
|
||||
|
||||
|
||||
def test_ewm_sum_adjust_false_notimplemented():
|
||||
data = Series(range(1)).ewm(com=1, adjust=False)
|
||||
with pytest.raises(NotImplementedError, match="sum is not"):
|
||||
data.sum()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expected_data, ignore",
|
||||
[[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]],
|
||||
)
|
||||
def test_ewm_sum(expected_data, ignore):
|
||||
# xref from Numbagg tests
|
||||
# https://github.com/numbagg/numbagg/blob/v0.2.1/numbagg/test/test_moving.py#L50
|
||||
data = Series([10, 0, np.nan, 10])
|
||||
result = data.ewm(alpha=0.5, ignore_na=ignore).sum()
|
||||
expected = Series(expected_data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewma_adjust():
|
||||
vals = Series(np.zeros(1000))
|
||||
vals[5] = 1
|
||||
result = vals.ewm(span=100, adjust=False).mean().sum()
|
||||
assert np.abs(result - 1) < 1e-2
|
||||
|
||||
|
||||
def test_ewma_cases(adjust, ignore_na):
|
||||
# try adjust/ignore_na args matrix
|
||||
|
||||
s = Series([1.0, 2.0, 4.0, 8.0])
|
||||
|
||||
if adjust:
|
||||
expected = Series([1.0, 1.6, 2.736842, 4.923077])
|
||||
else:
|
||||
expected = Series([1.0, 1.333333, 2.222222, 4.148148])
|
||||
|
||||
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewma_nan_handling():
|
||||
s = Series([1.0] + [np.nan] * 5 + [1.0])
|
||||
result = s.ewm(com=5).mean()
|
||||
tm.assert_series_equal(result, Series([1.0] * len(s)))
|
||||
|
||||
s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0])
|
||||
result = s.ewm(com=5).mean()
|
||||
tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s, adjust, ignore_na, w",
|
||||
[
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
True,
|
||||
False,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
True,
|
||||
True,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
False,
|
||||
False,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
False,
|
||||
True,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
True,
|
||||
False,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
True,
|
||||
True,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
False,
|
||||
False,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
False,
|
||||
True,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
True,
|
||||
False,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
True,
|
||||
True,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
False,
|
||||
False,
|
||||
[
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
|
||||
np.nan,
|
||||
np.nan,
|
||||
(1.0 / (1.0 + 2.0)),
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
False,
|
||||
True,
|
||||
[
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))),
|
||||
np.nan,
|
||||
np.nan,
|
||||
(1.0 / (1.0 + 2.0)),
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
True,
|
||||
False,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))),
|
||||
1.0,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
True,
|
||||
True,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 2,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))),
|
||||
1.0,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
False,
|
||||
False,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
|
||||
(1.0 / (1.0 + 2.0))
|
||||
* ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))),
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
False,
|
||||
True,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 2,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
|
||||
(1.0 / (1.0 + 2.0)),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ewma_nan_handling_cases(s, adjust, ignore_na, w):
|
||||
# GH 7603
|
||||
expected = (s.multiply(w).cumsum() / Series(w).cumsum()).ffill()
|
||||
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
if ignore_na is False:
|
||||
# check that ignore_na defaults to False
|
||||
result = s.ewm(com=2.0, adjust=adjust).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewm_alpha():
|
||||
# GH 10789
|
||||
arr = np.random.default_rng(2).standard_normal(100)
|
||||
locs = np.arange(20, 40)
|
||||
arr[locs] = np.nan
|
||||
|
||||
s = Series(arr)
|
||||
a = s.ewm(alpha=0.61722699889169674).mean()
|
||||
b = s.ewm(com=0.62014947789973052).mean()
|
||||
c = s.ewm(span=2.240298955799461).mean()
|
||||
d = s.ewm(halflife=0.721792864318).mean()
|
||||
tm.assert_series_equal(a, b)
|
||||
tm.assert_series_equal(a, c)
|
||||
tm.assert_series_equal(a, d)
|
||||
|
||||
|
||||
def test_ewm_domain_checks():
|
||||
# GH 12492
|
||||
arr = np.random.default_rng(2).standard_normal(100)
|
||||
locs = np.arange(20, 40)
|
||||
arr[locs] = np.nan
|
||||
|
||||
s = Series(arr)
|
||||
msg = "comass must satisfy: comass >= 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(com=-0.1)
|
||||
s.ewm(com=0.0)
|
||||
s.ewm(com=0.1)
|
||||
|
||||
msg = "span must satisfy: span >= 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=-0.1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=0.0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=0.9)
|
||||
s.ewm(span=1.0)
|
||||
s.ewm(span=1.1)
|
||||
|
||||
msg = "halflife must satisfy: halflife > 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(halflife=-0.1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(halflife=0.0)
|
||||
s.ewm(halflife=0.1)
|
||||
|
||||
msg = "alpha must satisfy: 0 < alpha <= 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(alpha=-0.1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(alpha=0.0)
|
||||
s.ewm(alpha=0.1)
|
||||
s.ewm(alpha=1.0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(alpha=1.1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "std", "var"])
|
||||
def test_ew_empty_series(method):
|
||||
vals = Series([], dtype=np.float64)
|
||||
|
||||
ewm = vals.ewm(3)
|
||||
result = getattr(ewm, method)()
|
||||
tm.assert_almost_equal(result, vals)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_periods", [0, 1])
|
||||
@pytest.mark.parametrize("name", ["mean", "var", "std"])
|
||||
def test_ew_min_periods(min_periods, name):
|
||||
# excluding NaNs correctly
|
||||
arr = np.random.default_rng(2).standard_normal(50)
|
||||
arr[:10] = np.nan
|
||||
arr[-10:] = np.nan
|
||||
s = Series(arr)
|
||||
|
||||
# check min_periods
|
||||
# GH 7898
|
||||
result = getattr(s.ewm(com=50, min_periods=2), name)()
|
||||
assert result[:11].isna().all()
|
||||
assert not result[11:].isna().any()
|
||||
|
||||
result = getattr(s.ewm(com=50, min_periods=min_periods), name)()
|
||||
if name == "mean":
|
||||
assert result[:10].isna().all()
|
||||
assert not result[10:].isna().any()
|
||||
else:
|
||||
# ewm.std, ewm.var (with bias=False) require at least
|
||||
# two values
|
||||
assert result[:11].isna().all()
|
||||
assert not result[11:].isna().any()
|
||||
|
||||
# check series of length 0
|
||||
result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)()
|
||||
tm.assert_series_equal(result, Series(dtype="float64"))
|
||||
|
||||
# check series of length 1
|
||||
result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)()
|
||||
if name == "mean":
|
||||
tm.assert_series_equal(result, Series([1.0]))
|
||||
else:
|
||||
# ewm.std, ewm.var with bias=False require at least
|
||||
# two values
|
||||
tm.assert_series_equal(result, Series([np.nan]))
|
||||
|
||||
# pass in ints
|
||||
result2 = getattr(Series(np.arange(50)).ewm(span=10), name)()
|
||||
assert result2.dtype == np.float64
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["cov", "corr"])
|
||||
def test_ewm_corr_cov(name):
|
||||
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
|
||||
B = A[2:] + np.random.default_rng(2).standard_normal(48)
|
||||
|
||||
A[:10] = np.nan
|
||||
B.iloc[-10:] = np.nan
|
||||
|
||||
result = getattr(A.ewm(com=20, min_periods=5), name)(B)
|
||||
assert np.isnan(result.values[:14]).all()
|
||||
assert not np.isnan(result.values[14:]).any()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_periods", [0, 1, 2])
|
||||
@pytest.mark.parametrize("name", ["cov", "corr"])
|
||||
def test_ewm_corr_cov_min_periods(name, min_periods):
|
||||
# GH 7898
|
||||
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
|
||||
B = A[2:] + np.random.default_rng(2).standard_normal(48)
|
||||
|
||||
A[:10] = np.nan
|
||||
B.iloc[-10:] = np.nan
|
||||
|
||||
result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
|
||||
# binary functions (ewmcov, ewmcorr) with bias=False require at
|
||||
# least two values
|
||||
assert np.isnan(result.values[:11]).all()
|
||||
assert not np.isnan(result.values[11:]).any()
|
||||
|
||||
# check series of length 0
|
||||
empty = Series([], dtype=np.float64)
|
||||
result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty)
|
||||
tm.assert_series_equal(result, empty)
|
||||
|
||||
# check series of length 1
|
||||
result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)(
|
||||
Series([1.0])
|
||||
)
|
||||
tm.assert_series_equal(result, Series([np.nan]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["cov", "corr"])
|
||||
def test_different_input_array_raise_exception(name):
|
||||
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
|
||||
A[:10] = np.nan
|
||||
|
||||
msg = "other must be a DataFrame or Series"
|
||||
# exception raised is Exception
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(A.ewm(com=20, min_periods=5), name)(
|
||||
np.random.default_rng(2).standard_normal(50)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["var", "std", "mean"])
|
||||
def test_ewma_series(series, name):
|
||||
series_result = getattr(series.ewm(com=10), name)()
|
||||
assert isinstance(series_result, Series)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["var", "std", "mean"])
|
||||
def test_ewma_frame(frame, name):
|
||||
frame_result = getattr(frame.ewm(com=10), name)()
|
||||
assert isinstance(frame_result, DataFrame)
|
||||
|
||||
|
||||
def test_ewma_span_com_args(series):
|
||||
A = series.ewm(com=9.5).mean()
|
||||
B = series.ewm(span=20).mean()
|
||||
tm.assert_almost_equal(A, B)
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(com=9.5, span=20)
|
||||
|
||||
msg = "Must pass one of comass, span, halflife, or alpha"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm().mean()
|
||||
|
||||
|
||||
def test_ewma_halflife_arg(series):
|
||||
A = series.ewm(com=13.932726172912965).mean()
|
||||
B = series.ewm(halflife=10.0).mean()
|
||||
tm.assert_almost_equal(A, B)
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(span=20, halflife=50)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(com=9.5, halflife=50)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(com=9.5, span=20, halflife=50)
|
||||
msg = "Must pass one of comass, span, halflife, or alpha"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm()
|
||||
|
||||
|
||||
def test_ewm_alpha_arg(series):
|
||||
# GH 10789
|
||||
s = series
|
||||
msg = "Must pass one of comass, span, halflife, or alpha"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm()
|
||||
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(com=10.0, alpha=0.5)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=10.0, alpha=0.5)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(halflife=10.0, alpha=0.5)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["cov", "corr"])
|
||||
def test_ewm_pairwise_cov_corr(func, frame):
|
||||
result = getattr(frame.ewm(span=10, min_periods=5), func)()
|
||||
result = result.loc[(slice(None), 1), 5]
|
||||
result.index = result.index.droplevel(1)
|
||||
expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
|
||||
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
df = DataFrame({"a": [1], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
ewm = df.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel, None)
|
||||
if op is not None:
|
||||
result = op(numeric_only=numeric_only)
|
||||
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
|
||||
assert list(expected.columns) == columns
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
|
||||
# GH#46560
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
arg = (df,) if use_arg else ()
|
||||
ewm = df.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel)
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
# Compare result to op using float dtypes, dropping c when numeric_only is True
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
df2 = df[columns].astype(float)
|
||||
arg2 = (df2,) if use_arg else ()
|
||||
ewm2 = df2.ewm(span=2, min_periods=1)
|
||||
op2 = getattr(ewm2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
ser = Series([1], dtype=dtype)
|
||||
ewm = ser.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel, None)
|
||||
if op is None:
|
||||
# Nothing to test
|
||||
pytest.skip("No op to test")
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(numeric_only=numeric_only)
|
||||
expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
|
||||
# GH#46560
|
||||
ser = Series([1, 2, 3], dtype=dtype)
|
||||
arg = (ser,) if use_arg else ()
|
||||
ewm = ser.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel)
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(*arg, numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
ser2 = ser.astype(float)
|
||||
arg2 = (ser2,) if use_arg else ()
|
||||
ewm2 = ser2.ewm(span=2, min_periods=1)
|
||||
op2 = getattr(ewm2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,723 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_doc_string():
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.expanding(2).sum()
|
||||
|
||||
|
||||
def test_constructor(frame_or_series):
|
||||
# GH 12669
|
||||
|
||||
c = frame_or_series(range(5)).expanding
|
||||
|
||||
# valid
|
||||
c(min_periods=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
|
||||
def test_constructor_invalid(frame_or_series, w):
|
||||
# not valid
|
||||
|
||||
c = frame_or_series(range(5)).expanding
|
||||
msg = "min_periods must be an integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(min_periods=w)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expander",
|
||||
[
|
||||
1,
|
||||
pytest.param(
|
||||
"ls",
|
||||
marks=pytest.mark.xfail(
|
||||
reason="GH#16425 expanding with offset not supported"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_empty_df_expanding(expander):
|
||||
# GH 15819 Verifies that datetime and integer expanding windows can be
|
||||
# applied to empty DataFrames
|
||||
|
||||
expected = DataFrame()
|
||||
result = DataFrame().expanding(expander).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Verifies that datetime and integer expanding windows can be applied
|
||||
# to empty DataFrames with datetime index
|
||||
expected = DataFrame(index=DatetimeIndex([]))
|
||||
result = DataFrame(index=DatetimeIndex([])).expanding(expander).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_missing_minp_zero():
|
||||
# https://github.com/pandas-dev/pandas/pull/18921
|
||||
# minp=0
|
||||
x = Series([np.nan])
|
||||
result = x.expanding(min_periods=0).sum()
|
||||
expected = Series([0.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# minp=1
|
||||
result = x.expanding(min_periods=1).sum()
|
||||
expected = Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_axis(axis_frame):
|
||||
# see gh-23372.
|
||||
df = DataFrame(np.ones((10, 20)))
|
||||
axis = df._get_axis_number(axis_frame)
|
||||
|
||||
if axis == 0:
|
||||
msg = "The 'axis' keyword in DataFrame.expanding is deprecated"
|
||||
expected = DataFrame(
|
||||
{i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
|
||||
)
|
||||
else:
|
||||
# axis == 1
|
||||
msg = "Support for axis=1 in DataFrame.expanding is deprecated"
|
||||
expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.expanding(3, axis=axis_frame).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_count_with_min_periods(frame_or_series):
|
||||
# GH 26996
|
||||
result = frame_or_series(range(5)).expanding(min_periods=3).count()
|
||||
expected = frame_or_series([np.nan, np.nan, 3.0, 4.0, 5.0])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_count_default_min_periods_with_null_values(frame_or_series):
|
||||
# GH 26996
|
||||
values = [1, 2, 3, np.nan, 4, 5, 6]
|
||||
expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
|
||||
|
||||
result = frame_or_series(values).expanding().count()
|
||||
expected = frame_or_series(expected_counts)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_series):
|
||||
# GH 25857
|
||||
result = frame_or_series(range(5)).expanding(min_periods=6).count()
|
||||
expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df,expected,min_periods",
|
||||
[
|
||||
(
|
||||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
|
||||
[
|
||||
({"A": [1], "B": [4]}, [0]),
|
||||
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
|
||||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
3,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
|
||||
[
|
||||
({"A": [1], "B": [4]}, [0]),
|
||||
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
|
||||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
2,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
|
||||
[
|
||||
({"A": [1], "B": [4]}, [0]),
|
||||
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
|
||||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
1,
|
||||
),
|
||||
(DataFrame({"A": [1], "B": [4]}), [], 2),
|
||||
(DataFrame(), [({}, [])], 1),
|
||||
(
|
||||
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
|
||||
[
|
||||
({"A": [1.0], "B": [np.nan]}, [0]),
|
||||
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
|
||||
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
3,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
|
||||
[
|
||||
({"A": [1.0], "B": [np.nan]}, [0]),
|
||||
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
|
||||
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
2,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
|
||||
[
|
||||
({"A": [1.0], "B": [np.nan]}, [0]),
|
||||
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
|
||||
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
1,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iter_expanding_dataframe(df, expected, min_periods):
|
||||
# GH 11704
|
||||
expected = [DataFrame(values, index=index) for (values, index) in expected]
|
||||
|
||||
for expected, actual in zip(expected, df.expanding(min_periods)):
|
||||
tm.assert_frame_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser,expected,min_periods",
|
||||
[
|
||||
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
|
||||
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
|
||||
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
|
||||
(Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
|
||||
(Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
|
||||
(Series([], dtype="int64"), [], 2),
|
||||
],
|
||||
)
|
||||
def test_iter_expanding_series(ser, expected, min_periods):
|
||||
# GH 11704
|
||||
expected = [Series(values, index=index) for (values, index) in expected]
|
||||
|
||||
for expected, actual in zip(expected, ser.expanding(min_periods)):
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_center_invalid():
|
||||
# GH 20647
|
||||
df = DataFrame()
|
||||
with pytest.raises(TypeError, match=".* got an unexpected keyword"):
|
||||
df.expanding(center=True)
|
||||
|
||||
|
||||
def test_expanding_sem(frame_or_series):
|
||||
# GH: 26476
|
||||
obj = frame_or_series([0, 1, 2])
|
||||
result = obj.expanding().sem()
|
||||
if isinstance(result, DataFrame):
|
||||
result = Series(result[0].values)
|
||||
expected = Series([np.nan] + [0.707107] * 2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["skew", "kurt"])
|
||||
def test_expanding_skew_kurt_numerical_stability(method):
|
||||
# GH: 6929
|
||||
s = Series(np.random.default_rng(2).random(10))
|
||||
expected = getattr(s.expanding(3), method)()
|
||||
s = s + 5000
|
||||
result = getattr(s.expanding(3), method)()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("window", [1, 3, 10, 20])
|
||||
@pytest.mark.parametrize("method", ["min", "max", "average"])
|
||||
@pytest.mark.parametrize("pct", [True, False])
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
|
||||
def test_rank(window, method, pct, ascending, test_data):
|
||||
length = 20
|
||||
if test_data == "default":
|
||||
ser = Series(data=np.random.default_rng(2).random(length))
|
||||
elif test_data == "duplicates":
|
||||
ser = Series(data=np.random.default_rng(2).choice(3, length))
|
||||
elif test_data == "nans":
|
||||
ser = Series(
|
||||
data=np.random.default_rng(2).choice(
|
||||
[1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
|
||||
)
|
||||
)
|
||||
|
||||
expected = ser.expanding(window).apply(
|
||||
lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
|
||||
)
|
||||
result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_corr(series):
|
||||
A = series.dropna()
|
||||
B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
|
||||
|
||||
result = A.expanding().corr(B)
|
||||
|
||||
rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
|
||||
|
||||
tm.assert_almost_equal(rolling_result, result)
|
||||
|
||||
|
||||
def test_expanding_count(series):
|
||||
result = series.expanding(min_periods=0).count()
|
||||
tm.assert_almost_equal(
|
||||
result, series.rolling(window=len(series), min_periods=0).count()
|
||||
)
|
||||
|
||||
|
||||
def test_expanding_quantile(series):
|
||||
result = series.expanding().quantile(0.5)
|
||||
|
||||
rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)
|
||||
|
||||
tm.assert_almost_equal(result, rolling_result)
|
||||
|
||||
|
||||
def test_expanding_cov(series):
|
||||
A = series
|
||||
B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
|
||||
|
||||
result = A.expanding().cov(B)
|
||||
|
||||
rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
|
||||
|
||||
tm.assert_almost_equal(rolling_result, result)
|
||||
|
||||
|
||||
def test_expanding_cov_pairwise(frame):
|
||||
result = frame.expanding().cov()
|
||||
|
||||
rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()
|
||||
|
||||
tm.assert_frame_equal(result, rolling_result)
|
||||
|
||||
|
||||
def test_expanding_corr_pairwise(frame):
|
||||
result = frame.expanding().corr()
|
||||
|
||||
rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
|
||||
tm.assert_frame_equal(result, rolling_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,static_comp",
|
||||
[
|
||||
("sum", np.sum),
|
||||
("mean", lambda x: np.mean(x, axis=0)),
|
||||
("max", lambda x: np.max(x, axis=0)),
|
||||
("min", lambda x: np.min(x, axis=0)),
|
||||
],
|
||||
ids=["sum", "mean", "max", "min"],
|
||||
)
|
||||
def test_expanding_func(func, static_comp, frame_or_series):
|
||||
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
|
||||
|
||||
msg = "The 'axis' keyword in (Series|DataFrame).expanding is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
obj = data.expanding(min_periods=1, axis=0)
|
||||
result = getattr(obj, func)()
|
||||
assert isinstance(result, frame_or_series)
|
||||
|
||||
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
|
||||
warn = None
|
||||
if frame_or_series is DataFrame and static_comp is np.sum:
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
expected = static_comp(data[:11])
|
||||
if frame_or_series is Series:
|
||||
tm.assert_almost_equal(result[10], expected)
|
||||
else:
|
||||
tm.assert_series_equal(result.iloc[10], expected, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,static_comp",
|
||||
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
|
||||
ids=["sum", "mean", "max", "min"],
|
||||
)
|
||||
def test_expanding_min_periods(func, static_comp):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(50))
|
||||
|
||||
msg = "The 'axis' keyword in Series.expanding is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser.expanding(min_periods=30, axis=0), func)()
|
||||
assert result[:29].isna().all()
|
||||
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
||||
|
||||
# min_periods is working correctly
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser.expanding(min_periods=15, axis=0), func)()
|
||||
assert isna(result.iloc[13])
|
||||
assert notna(result.iloc[14])
|
||||
|
||||
ser2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
|
||||
assert isna(result[3])
|
||||
assert notna(result[4])
|
||||
|
||||
# min_periods=0
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser.expanding(min_periods=1, axis=0), func)()
|
||||
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
||||
|
||||
|
||||
def test_expanding_apply(engine_and_raw, frame_or_series):
|
||||
engine, raw = engine_and_raw
|
||||
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
|
||||
result = data.expanding(min_periods=1).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert isinstance(result, frame_or_series)
|
||||
|
||||
if frame_or_series is Series:
|
||||
tm.assert_almost_equal(result[9], np.mean(data[:11], axis=0))
|
||||
else:
|
||||
tm.assert_series_equal(
|
||||
result.iloc[9], np.mean(data[:11], axis=0), check_names=False
|
||||
)
|
||||
|
||||
|
||||
def test_expanding_min_periods_apply(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
ser = Series(np.random.default_rng(2).standard_normal(50))
|
||||
|
||||
result = ser.expanding(min_periods=30).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert result[:29].isna().all()
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = ser.expanding(min_periods=15).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert isna(result.iloc[13])
|
||||
assert notna(result.iloc[14])
|
||||
|
||||
ser2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = ser2.expanding(min_periods=5).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert isna(result[3])
|
||||
assert notna(result[4])
|
||||
|
||||
# min_periods=0
|
||||
result0 = ser.expanding(min_periods=0).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
result1 = ser.expanding(min_periods=1).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
result = ser.expanding(min_periods=1).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
|
||||
lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length_pairwise(f):
|
||||
df1 = DataFrame()
|
||||
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
|
||||
df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
|
||||
df2_expected = DataFrame(
|
||||
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
|
||||
columns=Index(["a"], name="foo"),
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().count(),
|
||||
lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
|
||||
lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
|
||||
lambda x: x.expanding(min_periods=5).max(),
|
||||
lambda x: x.expanding(min_periods=5).min(),
|
||||
lambda x: x.expanding(min_periods=5).sum(),
|
||||
lambda x: x.expanding(min_periods=5).mean(),
|
||||
lambda x: x.expanding(min_periods=5).std(),
|
||||
lambda x: x.expanding(min_periods=5).var(),
|
||||
lambda x: x.expanding(min_periods=5).skew(),
|
||||
lambda x: x.expanding(min_periods=5).kurt(),
|
||||
lambda x: x.expanding(min_periods=5).quantile(0.5),
|
||||
lambda x: x.expanding(min_periods=5).median(),
|
||||
lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
|
||||
lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length(f):
|
||||
# GH 8056
|
||||
s = Series(dtype=np.float64)
|
||||
s_expected = s
|
||||
df1 = DataFrame()
|
||||
df1_expected = df1
|
||||
df2 = DataFrame(columns=["a"])
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
df2_expected = df2
|
||||
|
||||
s_result = f(s)
|
||||
tm.assert_series_equal(s_result, s_expected)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
||||
|
||||
|
||||
def test_expanding_apply_empty_series(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
ser = Series([], dtype=np.float64)
|
||||
tm.assert_series_equal(
|
||||
ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
|
||||
)
|
||||
|
||||
|
||||
def test_expanding_apply_min_periods_0(engine_and_raw):
|
||||
# GH 8080
|
||||
engine, raw = engine_and_raw
|
||||
s = Series([None, None, None])
|
||||
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
|
||||
expected = Series([1.0, 2.0, 3.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_cov_diff_index():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.expanding().cov(s2)
|
||||
expected = Series([None, None, 2.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.expanding().cov(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
||||
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
||||
result = s1.expanding().cov(s2)
|
||||
expected = Series([None, None, None, 4.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_corr_diff_index():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.expanding().corr(s2)
|
||||
expected = Series([None, None, 1.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.expanding().corr(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
||||
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
||||
result = s1.expanding().corr(s2)
|
||||
expected = Series([None, None, None, 1.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_cov_pairwise_diff_length():
|
||||
# GH 7512
|
||||
df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
|
||||
df1a = DataFrame(
|
||||
[[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
|
||||
)
|
||||
df2 = DataFrame(
|
||||
[[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
|
||||
)
|
||||
df2a = DataFrame(
|
||||
[[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
|
||||
)
|
||||
# TODO: xref gh-15826
|
||||
# .loc is not preserving the names
|
||||
result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
|
||||
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
|
||||
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
|
||||
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
|
||||
expected = DataFrame(
|
||||
[[-3.0, -6.0], [-5.0, -10.0]],
|
||||
columns=Index(["A", "B"], name="foo"),
|
||||
index=Index(["X", "Y"], name="foo"),
|
||||
)
|
||||
tm.assert_frame_equal(result1, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
tm.assert_frame_equal(result3, expected)
|
||||
tm.assert_frame_equal(result4, expected)
|
||||
|
||||
|
||||
def test_expanding_corr_pairwise_diff_length():
|
||||
# GH 7512
|
||||
df1 = DataFrame(
|
||||
[[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
|
||||
)
|
||||
df1a = DataFrame(
|
||||
[[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
|
||||
)
|
||||
df2 = DataFrame(
|
||||
[[5, 6], [None, None], [2, 1]],
|
||||
columns=["X", "Y"],
|
||||
index=Index(range(3), name="bar"),
|
||||
)
|
||||
df2a = DataFrame(
|
||||
[[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
|
||||
)
|
||||
result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
|
||||
result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
|
||||
result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
|
||||
result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
|
||||
expected = DataFrame(
|
||||
[[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
|
||||
)
|
||||
tm.assert_frame_equal(result1, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
tm.assert_frame_equal(result3, expected)
|
||||
tm.assert_frame_equal(result4, expected)
|
||||
|
||||
|
||||
def test_expanding_apply_args_kwargs(engine_and_raw):
|
||||
def mean_w_arg(x, const):
|
||||
return np.mean(x) + const
|
||||
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((20, 3)))
|
||||
|
||||
expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0
|
||||
|
||||
result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
df = DataFrame({"a": [1], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
expanding = df.expanding()
|
||||
op = getattr(expanding, kernel, None)
|
||||
if op is not None:
|
||||
result = op(numeric_only=numeric_only)
|
||||
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
|
||||
assert list(expected.columns) == columns
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
|
||||
# GH#46560
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
arg = (df,) if use_arg else ()
|
||||
expanding = df.expanding()
|
||||
op = getattr(expanding, kernel)
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
# Compare result to op using float dtypes, dropping c when numeric_only is True
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
df2 = df[columns].astype(float)
|
||||
arg2 = (df2,) if use_arg else ()
|
||||
expanding2 = df2.expanding()
|
||||
op2 = getattr(expanding2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
ser = Series([1], dtype=dtype)
|
||||
expanding = ser.expanding()
|
||||
op = getattr(expanding, kernel)
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"Expanding.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(numeric_only=numeric_only)
|
||||
expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
|
||||
# GH#46560
|
||||
ser = Series([1, 2, 3], dtype=dtype)
|
||||
arg = (ser,) if use_arg else ()
|
||||
expanding = ser.expanding()
|
||||
op = getattr(expanding, kernel)
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"Expanding.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(*arg, numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
ser2 = ser.astype(float)
|
||||
arg2 = (ser2,) if use_arg else ()
|
||||
expanding2 = ser2.expanding()
|
||||
op2 = getattr(expanding2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_keyword_quantile_deprecated():
|
||||
# GH #52550
|
||||
ser = Series([1, 2, 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.expanding().quantile(quantile=0.5)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,455 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import NumbaUtilError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
option_context,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture(params=["single", "table"])
|
||||
def method(request):
|
||||
"""method keyword in rolling/expanding/ewm constructor"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
["sum", {}],
|
||||
["mean", {}],
|
||||
["median", {}],
|
||||
["max", {}],
|
||||
["min", {}],
|
||||
["var", {}],
|
||||
["var", {"ddof": 0}],
|
||||
["std", {}],
|
||||
["std", {"ddof": 0}],
|
||||
]
|
||||
)
|
||||
def arithmetic_numba_supported_operators(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
@pytest.mark.filterwarnings("ignore")
|
||||
# Filter warnings when parallel=True and the function can't be parallelized by Numba
|
||||
class TestEngine:
|
||||
@pytest.mark.parametrize("jit", [True, False])
|
||||
def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step):
|
||||
def f(x, *args):
|
||||
arg_sum = 0
|
||||
for arg in args:
|
||||
arg_sum += arg
|
||||
return np.mean(x) + arg_sum
|
||||
|
||||
if jit:
|
||||
import numba
|
||||
|
||||
f = numba.jit(f)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
args = (2,)
|
||||
|
||||
s = Series(range(10))
|
||||
result = s.rolling(2, center=center, step=step).apply(
|
||||
f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = s.rolling(2, center=center, step=step).apply(
|
||||
f, engine="cython", args=args, raw=True
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
DataFrame(np.eye(5)),
|
||||
DataFrame(
|
||||
[
|
||||
[5, 7, 7, 7, np.nan, np.inf, 4, 3, 3, 3],
|
||||
[5, 7, 7, 7, np.nan, np.inf, 7, 3, 3, 3],
|
||||
[np.nan, np.nan, 5, 6, 7, 5, 5, 5, 5, 5],
|
||||
]
|
||||
).T,
|
||||
Series(range(5), name="foo"),
|
||||
Series([20, 10, 10, np.inf, 1, 1, 2, 3]),
|
||||
Series([20, 10, 10, np.nan, 10, 1, 2, 3]),
|
||||
],
|
||||
)
|
||||
def test_numba_vs_cython_rolling_methods(
|
||||
self,
|
||||
data,
|
||||
nogil,
|
||||
parallel,
|
||||
nopython,
|
||||
arithmetic_numba_supported_operators,
|
||||
step,
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
roll = data.rolling(3, step=step)
|
||||
result = getattr(roll, method)(
|
||||
engine="numba", engine_kwargs=engine_kwargs, **kwargs
|
||||
)
|
||||
expected = getattr(roll, method)(engine="cython", **kwargs)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data", [DataFrame(np.eye(5)), Series(range(5), name="foo")]
|
||||
)
|
||||
def test_numba_vs_cython_expanding_methods(
|
||||
self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
data = DataFrame(np.eye(5))
|
||||
expand = data.expanding()
|
||||
result = getattr(expand, method)(
|
||||
engine="numba", engine_kwargs=engine_kwargs, **kwargs
|
||||
)
|
||||
expected = getattr(expand, method)(engine="cython", **kwargs)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("jit", [True, False])
|
||||
def test_cache_apply(self, jit, nogil, parallel, nopython, step):
|
||||
# Test that the functions are cached correctly if we switch functions
|
||||
def func_1(x):
|
||||
return np.mean(x) + 4
|
||||
|
||||
def func_2(x):
|
||||
return np.std(x) * 5
|
||||
|
||||
if jit:
|
||||
import numba
|
||||
|
||||
func_1 = numba.jit(func_1)
|
||||
func_2 = numba.jit(func_2)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
roll = Series(range(10)).rolling(2, step=step)
|
||||
result = roll.apply(
|
||||
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = roll.apply(func_1, engine="cython", raw=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = roll.apply(
|
||||
func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = roll.apply(func_2, engine="cython", raw=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
# This run should use the cached func_1
|
||||
result = roll.apply(
|
||||
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = roll.apply(func_1, engine="cython", raw=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"window,window_kwargs",
|
||||
[
|
||||
["rolling", {"window": 3, "min_periods": 0}],
|
||||
["expanding", {}],
|
||||
],
|
||||
)
|
||||
def test_dont_cache_args(
|
||||
self, window, window_kwargs, nogil, parallel, nopython, method
|
||||
):
|
||||
# GH 42287
|
||||
|
||||
def add(values, x):
|
||||
return np.sum(values) + x
|
||||
|
||||
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
|
||||
df = DataFrame({"value": [0, 0, 0]})
|
||||
result = getattr(df, window)(method=method, **window_kwargs).apply(
|
||||
add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(1,)
|
||||
)
|
||||
expected = DataFrame({"value": [1.0, 1.0, 1.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = getattr(df, window)(method=method, **window_kwargs).apply(
|
||||
add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(2,)
|
||||
)
|
||||
expected = DataFrame({"value": [2.0, 2.0, 2.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_dont_cache_engine_kwargs(self):
|
||||
# If the user passes a different set of engine_kwargs don't return the same
|
||||
# jitted function
|
||||
nogil = False
|
||||
parallel = True
|
||||
nopython = True
|
||||
|
||||
def func(x):
|
||||
return nogil + parallel + nopython
|
||||
|
||||
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
|
||||
df = DataFrame({"value": [0, 0, 0]})
|
||||
result = df.rolling(1).apply(
|
||||
func, raw=True, engine="numba", engine_kwargs=engine_kwargs
|
||||
)
|
||||
expected = DataFrame({"value": [2.0, 2.0, 2.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
parallel = False
|
||||
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
|
||||
result = df.rolling(1).apply(
|
||||
func, raw=True, engine="numba", engine_kwargs=engine_kwargs
|
||||
)
|
||||
expected = DataFrame({"value": [1.0, 1.0, 1.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
class TestEWM:
|
||||
@pytest.mark.parametrize(
|
||||
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_invalid_engine(self, grouper, method):
|
||||
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
|
||||
with pytest.raises(ValueError, match="engine must be either"):
|
||||
getattr(grouper(df).ewm(com=1.0), method)(engine="foo")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_invalid_engine_kwargs(self, grouper, method):
|
||||
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
|
||||
with pytest.raises(ValueError, match="cython engine does not"):
|
||||
getattr(grouper(df).ewm(com=1.0), method)(
|
||||
engine="cython", engine_kwargs={"nopython": True}
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("grouper", ["None", "groupby"])
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_cython_vs_numba(
|
||||
self, grouper, method, nogil, parallel, nopython, ignore_na, adjust
|
||||
):
|
||||
df = DataFrame({"B": range(4)})
|
||||
if grouper == "None":
|
||||
grouper = lambda x: x
|
||||
else:
|
||||
df["A"] = ["a", "b", "a", "b"]
|
||||
grouper = lambda x: x.groupby("A")
|
||||
if method == "sum":
|
||||
adjust = True
|
||||
ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
|
||||
expected = getattr(ewm, method)(engine="cython")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("grouper", ["None", "groupby"])
|
||||
def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
|
||||
# GH 40951
|
||||
|
||||
df = DataFrame({"B": [0, 0, 1, 1, 2, 2]})
|
||||
if grouper == "None":
|
||||
grouper = lambda x: x
|
||||
else:
|
||||
grouper = lambda x: x.groupby("A")
|
||||
df["A"] = ["a", "b", "a", "b", "b", "a"]
|
||||
|
||||
halflife = "23 days"
|
||||
times = to_datetime(
|
||||
[
|
||||
"2020-01-01",
|
||||
"2020-01-01",
|
||||
"2020-01-02",
|
||||
"2020-01-10",
|
||||
"2020-02-23",
|
||||
"2020-01-03",
|
||||
]
|
||||
)
|
||||
ewm = grouper(df).ewm(
|
||||
halflife=halflife, adjust=True, ignore_na=ignore_na, times=times
|
||||
)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
|
||||
expected = ewm.mean(engine="cython")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
def test_use_global_config():
|
||||
def f(x):
|
||||
return np.mean(x) + 2
|
||||
|
||||
s = Series(range(10))
|
||||
with option_context("compute.use_numba", True):
|
||||
result = s.rolling(2).apply(f, engine=None, raw=True)
|
||||
expected = s.rolling(2).apply(f, engine="numba", raw=True)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
def test_invalid_kwargs_nopython():
|
||||
with pytest.raises(NumbaUtilError, match="numba does not support kwargs with"):
|
||||
Series(range(1)).rolling(1).apply(
|
||||
lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
|
||||
)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.filterwarnings("ignore")
|
||||
# Filter warnings when parallel=True and the function can't be parallelized by Numba
|
||||
class TestTableMethod:
|
||||
def test_table_series_valueerror(self):
|
||||
def f(x):
|
||||
return np.sum(x, axis=0) + 1
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="method='table' not applicable for Series objects."
|
||||
):
|
||||
Series(range(1)).rolling(1, method="table").apply(
|
||||
f, engine="numba", raw=True
|
||||
)
|
||||
|
||||
def test_table_method_rolling_methods(
|
||||
self,
|
||||
axis,
|
||||
nogil,
|
||||
parallel,
|
||||
nopython,
|
||||
arithmetic_numba_supported_operators,
|
||||
step,
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step)
|
||||
if method in ("var", "std"):
|
||||
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
|
||||
getattr(roll_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
else:
|
||||
roll_single = df.rolling(
|
||||
2, method="single", axis=axis, min_periods=0, step=step
|
||||
)
|
||||
result = getattr(roll_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
expected = getattr(roll_single, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step):
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
def f(x):
|
||||
return np.sum(x, axis=0) + 1
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
result = df.rolling(
|
||||
2, method="table", axis=axis, min_periods=0, step=step
|
||||
).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
|
||||
expected = df.rolling(
|
||||
2, method="single", axis=axis, min_periods=0, step=step
|
||||
).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_rolling_weighted_mean(self, step):
|
||||
def weighted_mean(x):
|
||||
arr = np.ones((1, x.shape[1]))
|
||||
arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum()
|
||||
return arr
|
||||
|
||||
df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
|
||||
result = df.rolling(2, method="table", min_periods=0, step=step).apply(
|
||||
weighted_mean, raw=True, engine="numba"
|
||||
)
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 1.0],
|
||||
[1.8, 2.0, 1.0],
|
||||
[3.333333, 2.333333, 1.0],
|
||||
[1.555556, 7, 1.0],
|
||||
]
|
||||
)[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython):
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
def f(x):
|
||||
return np.sum(x, axis=0) + 1
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
result = df.expanding(method="table", axis=axis).apply(
|
||||
f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
expected = df.expanding(method="single", axis=axis).apply(
|
||||
f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_expanding_methods(
|
||||
self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
expand_table = df.expanding(method="table", axis=axis)
|
||||
if method in ("var", "std"):
|
||||
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
|
||||
getattr(expand_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
else:
|
||||
expand_single = df.expanding(method="single", axis=axis)
|
||||
result = getattr(expand_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
expected = getattr(expand_single, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))])
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython):
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
df = DataFrame(data)
|
||||
|
||||
result = getattr(df.ewm(com=1, method="table", axis=axis), method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
expected = getattr(df.ewm(com=1, method="single", axis=axis), method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
def test_npfunc_no_warnings():
|
||||
df = DataFrame({"col1": [1, 2, 3, 4, 5]})
|
||||
with tm.assert_produces_warning(False):
|
||||
df.col1.rolling(2).apply(np.prod, raw=True, engine="numba")
|
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
pytest.importorskip("numba")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore")
|
||||
# Filter warnings when parallel=True and the function can't be parallelized by Numba
|
||||
class TestEWM:
|
||||
def test_invalid_update(self):
|
||||
df = DataFrame({"a": range(5), "b": range(5)})
|
||||
online_ewm = df.head(2).ewm(0.5).online()
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Must call mean with update=None first before passing update",
|
||||
):
|
||||
online_ewm.mean(update=df.head(1))
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")]
|
||||
)
|
||||
def test_online_vs_non_online_mean(
|
||||
self, obj, nogil, parallel, nopython, adjust, ignore_na
|
||||
):
|
||||
expected = obj.ewm(0.5, adjust=adjust, ignore_na=ignore_na).mean()
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
online_ewm = (
|
||||
obj.head(2)
|
||||
.ewm(0.5, adjust=adjust, ignore_na=ignore_na)
|
||||
.online(engine_kwargs=engine_kwargs)
|
||||
)
|
||||
# Test resetting once
|
||||
for _ in range(2):
|
||||
result = online_ewm.mean()
|
||||
tm.assert_equal(result, expected.head(2))
|
||||
|
||||
result = online_ewm.mean(update=obj.tail(3))
|
||||
tm.assert_equal(result, expected.tail(3))
|
||||
|
||||
online_ewm.reset()
|
||||
|
||||
@pytest.mark.xfail(raises=NotImplementedError)
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")]
|
||||
)
|
||||
def test_update_times_mean(
|
||||
self, obj, nogil, parallel, nopython, adjust, ignore_na, halflife_with_times
|
||||
):
|
||||
times = Series(
|
||||
np.array(
|
||||
["2020-01-01", "2020-01-05", "2020-01-07", "2020-01-17", "2020-01-21"],
|
||||
dtype="datetime64[ns]",
|
||||
)
|
||||
)
|
||||
expected = obj.ewm(
|
||||
0.5,
|
||||
adjust=adjust,
|
||||
ignore_na=ignore_na,
|
||||
times=times,
|
||||
halflife=halflife_with_times,
|
||||
).mean()
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
online_ewm = (
|
||||
obj.head(2)
|
||||
.ewm(
|
||||
0.5,
|
||||
adjust=adjust,
|
||||
ignore_na=ignore_na,
|
||||
times=times.head(2),
|
||||
halflife=halflife_with_times,
|
||||
)
|
||||
.online(engine_kwargs=engine_kwargs)
|
||||
)
|
||||
# Test resetting once
|
||||
for _ in range(2):
|
||||
result = online_ewm.mean()
|
||||
tm.assert_equal(result, expected.head(2))
|
||||
|
||||
result = online_ewm.mean(update=obj.tail(3), update_times=times.tail(3))
|
||||
tm.assert_equal(result, expected.tail(3))
|
||||
|
||||
online_ewm.reset()
|
||||
|
||||
@pytest.mark.parametrize("method", ["aggregate", "std", "corr", "cov", "var"])
|
||||
def test_ewm_notimplementederror_raises(self, method):
|
||||
ser = Series(range(10))
|
||||
kwargs = {}
|
||||
if method == "aggregate":
|
||||
kwargs["func"] = lambda x: x
|
||||
|
||||
with pytest.raises(NotImplementedError, match=".* is not implemented."):
|
||||
getattr(ser.ewm(1).online(), method)(**kwargs)
|
@ -0,0 +1,445 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.algorithms import safe_sort
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]),
|
||||
DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]),
|
||||
DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]),
|
||||
]
|
||||
)
|
||||
def pairwise_frames(request):
|
||||
"""Pairwise frames test_pairwise"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pairwise_target_frame():
|
||||
"""Pairwise target frame for test_pairwise"""
|
||||
return DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pairwise_other_frame():
|
||||
"""Pairwise other frame for test_pairwise"""
|
||||
return DataFrame(
|
||||
[[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]],
|
||||
columns=["Y", "Z", "X"],
|
||||
)
|
||||
|
||||
|
||||
def test_rolling_cov(series):
|
||||
A = series
|
||||
B = A + np.random.default_rng(2).standard_normal(len(A))
|
||||
|
||||
result = A.rolling(window=50, min_periods=25).cov(B)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.cov(A[-50:], B[-50:])[0, 1])
|
||||
|
||||
|
||||
def test_rolling_corr(series):
|
||||
A = series
|
||||
B = A + np.random.default_rng(2).standard_normal(len(A))
|
||||
|
||||
result = A.rolling(window=50, min_periods=25).corr(B)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
|
||||
|
||||
|
||||
def test_rolling_corr_bias_correction():
|
||||
# test for correct bias correction
|
||||
a = Series(
|
||||
np.arange(20, dtype=np.float64), index=date_range("2020-01-01", periods=20)
|
||||
)
|
||||
b = a.copy()
|
||||
a[:5] = np.nan
|
||||
b[:10] = np.nan
|
||||
|
||||
result = a.rolling(window=len(a), min_periods=1).corr(b)
|
||||
tm.assert_almost_equal(result.iloc[-1], a.corr(b))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["cov", "corr"])
|
||||
def test_rolling_pairwise_cov_corr(func, frame):
|
||||
result = getattr(frame.rolling(window=10, min_periods=5), func)()
|
||||
result = result.loc[(slice(None), 1), 5]
|
||||
result.index = result.index.droplevel(1)
|
||||
expected = getattr(frame[1].rolling(window=10, min_periods=5), func)(frame[5])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["corr", "cov"])
|
||||
def test_flex_binary_frame(method, frame):
|
||||
series = frame[1]
|
||||
|
||||
res = getattr(series.rolling(window=10), method)(frame)
|
||||
res2 = getattr(frame.rolling(window=10), method)(series)
|
||||
exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x))
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
tm.assert_frame_equal(res2, exp)
|
||||
|
||||
frame2 = frame.copy()
|
||||
frame2 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal(frame2.shape),
|
||||
index=frame2.index,
|
||||
columns=frame2.columns,
|
||||
)
|
||||
|
||||
res3 = getattr(frame.rolling(window=10), method)(frame2)
|
||||
exp = DataFrame(
|
||||
{k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame}
|
||||
)
|
||||
tm.assert_frame_equal(res3, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("window", range(7))
|
||||
def test_rolling_corr_with_zero_variance(window):
|
||||
# GH 18430
|
||||
s = Series(np.zeros(20))
|
||||
other = Series(np.arange(20))
|
||||
|
||||
assert s.rolling(window=window).corr(other=other).isna().all()
|
||||
|
||||
|
||||
def test_corr_sanity():
|
||||
# GH 3155
|
||||
df = DataFrame(
|
||||
np.array(
|
||||
[
|
||||
[0.87024726, 0.18505595],
|
||||
[0.64355431, 0.3091617],
|
||||
[0.92372966, 0.50552513],
|
||||
[0.00203756, 0.04520709],
|
||||
[0.84780328, 0.33394331],
|
||||
[0.78369152, 0.63919667],
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
res = df[0].rolling(5, center=True).corr(df[1])
|
||||
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((30, 2)))
|
||||
res = df[0].rolling(5, center=True).corr(df[1])
|
||||
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
|
||||
|
||||
|
||||
def test_rolling_cov_diff_length():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).cov(s2)
|
||||
expected = Series([None, None, 2.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).cov(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_rolling_corr_diff_length():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).corr(s2)
|
||||
expected = Series([None, None, 1.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).corr(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)),
|
||||
lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)),
|
||||
],
|
||||
)
|
||||
def test_rolling_functions_window_non_shrinkage_binary(f):
|
||||
# corr/cov return a MI DataFrame
|
||||
df = DataFrame(
|
||||
[[1, 5], [3, 2], [3, 9], [-1, 0]],
|
||||
columns=Index(["A", "B"], name="foo"),
|
||||
index=Index(range(4), name="bar"),
|
||||
)
|
||||
df_expected = DataFrame(
|
||||
columns=Index(["A", "B"], name="foo"),
|
||||
index=MultiIndex.from_product([df.index, df.columns], names=["bar", "foo"]),
|
||||
dtype="float64",
|
||||
)
|
||||
df_result = f(df)
|
||||
tm.assert_frame_equal(df_result, df_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)),
|
||||
lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length_pairwise(f):
|
||||
df1 = DataFrame()
|
||||
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
|
||||
df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
|
||||
df2_expected = DataFrame(
|
||||
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
|
||||
columns=Index(["a"], name="foo"),
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
||||
|
||||
|
||||
class TestPairwise:
|
||||
# GH 7738
|
||||
@pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()])
|
||||
def test_no_flex(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame methods (which do not call flex_binary_moment())
|
||||
|
||||
result = f(pairwise_frames)
|
||||
tm.assert_index_equal(result.index, pairwise_frames.columns)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().cov(pairwise=True),
|
||||
lambda x: x.expanding().corr(pairwise=True),
|
||||
lambda x: x.rolling(window=3).cov(pairwise=True),
|
||||
lambda x: x.rolling(window=3).corr(pairwise=True),
|
||||
lambda x: x.ewm(com=3).cov(pairwise=True),
|
||||
lambda x: x.ewm(com=3).corr(pairwise=True),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame with itself, pairwise=True
|
||||
# note that we may construct the 1st level of the MI
|
||||
# in a non-monotonic way, so compare accordingly
|
||||
result = f(pairwise_frames)
|
||||
tm.assert_index_equal(
|
||||
result.index.levels[0], pairwise_frames.index, check_names=False
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
safe_sort(result.index.levels[1]),
|
||||
safe_sort(pairwise_frames.columns.unique()),
|
||||
)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().cov(pairwise=False),
|
||||
lambda x: x.expanding().corr(pairwise=False),
|
||||
lambda x: x.rolling(window=3).cov(pairwise=False),
|
||||
lambda x: x.rolling(window=3).corr(pairwise=False),
|
||||
lambda x: x.ewm(com=3).cov(pairwise=False),
|
||||
lambda x: x.ewm(com=3).corr(pairwise=False),
|
||||
],
|
||||
)
|
||||
def test_no_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame with itself, pairwise=False
|
||||
result = f(pairwise_frames)
|
||||
tm.assert_index_equal(result.index, pairwise_frames.index)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y, pairwise=True),
|
||||
lambda x, y: x.expanding().corr(y, pairwise=True),
|
||||
lambda x, y: x.rolling(window=3).cov(y, pairwise=True),
|
||||
# TODO: We're missing a flag somewhere in meson
|
||||
pytest.param(
|
||||
lambda x, y: x.rolling(window=3).corr(y, pairwise=True),
|
||||
marks=pytest.mark.xfail(
|
||||
not IS64, reason="Precision issues on 32 bit", strict=False
|
||||
),
|
||||
),
|
||||
lambda x, y: x.ewm(com=3).cov(y, pairwise=True),
|
||||
lambda x, y: x.ewm(com=3).corr(y, pairwise=True),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_other(
|
||||
self, pairwise_frames, pairwise_target_frame, pairwise_other_frame, f
|
||||
):
|
||||
# DataFrame with another DataFrame, pairwise=True
|
||||
result = f(pairwise_frames, pairwise_other_frame)
|
||||
tm.assert_index_equal(
|
||||
result.index.levels[0], pairwise_frames.index, check_names=False
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
safe_sort(result.index.levels[1]),
|
||||
safe_sort(pairwise_other_frame.columns.unique()),
|
||||
)
|
||||
expected = f(pairwise_target_frame, pairwise_other_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:RuntimeWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y, pairwise=False),
|
||||
lambda x, y: x.expanding().corr(y, pairwise=False),
|
||||
lambda x, y: x.rolling(window=3).cov(y, pairwise=False),
|
||||
lambda x, y: x.rolling(window=3).corr(y, pairwise=False),
|
||||
lambda x, y: x.ewm(com=3).cov(y, pairwise=False),
|
||||
lambda x, y: x.ewm(com=3).corr(y, pairwise=False),
|
||||
],
|
||||
)
|
||||
def test_no_pairwise_with_other(self, pairwise_frames, pairwise_other_frame, f):
|
||||
# DataFrame with another DataFrame, pairwise=False
|
||||
result = (
|
||||
f(pairwise_frames, pairwise_other_frame)
|
||||
if pairwise_frames.columns.is_unique
|
||||
else None
|
||||
)
|
||||
if result is not None:
|
||||
# we can have int and str columns
|
||||
expected_index = pairwise_frames.index.union(pairwise_other_frame.index)
|
||||
expected_columns = pairwise_frames.columns.union(
|
||||
pairwise_other_frame.columns
|
||||
)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
tm.assert_index_equal(result.columns, expected_columns)
|
||||
else:
|
||||
with pytest.raises(ValueError, match="'arg1' columns are not unique"):
|
||||
f(pairwise_frames, pairwise_other_frame)
|
||||
with pytest.raises(ValueError, match="'arg2' columns are not unique"):
|
||||
f(pairwise_other_frame, pairwise_frames)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y),
|
||||
lambda x, y: x.expanding().corr(y),
|
||||
lambda x, y: x.rolling(window=3).cov(y),
|
||||
lambda x, y: x.rolling(window=3).corr(y),
|
||||
lambda x, y: x.ewm(com=3).cov(y),
|
||||
lambda x, y: x.ewm(com=3).corr(y),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_series(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame with a Series
|
||||
result = f(pairwise_frames, Series([1, 1, 3, 8]))
|
||||
tm.assert_index_equal(result.index, pairwise_frames.index)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame, Series([1, 1, 3, 8]))
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
result = f(Series([1, 1, 3, 8]), pairwise_frames)
|
||||
tm.assert_index_equal(result.index, pairwise_frames.index)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(Series([1, 1, 3, 8]), pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
def test_corr_freq_memory_error(self):
|
||||
# GH 31789
|
||||
s = Series(range(5), index=date_range("2020", periods=5))
|
||||
result = s.rolling("12h").corr(s)
|
||||
expected = Series([np.nan] * 5, index=date_range("2020", periods=5))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_cov_mulittindex(self):
|
||||
# GH 34440
|
||||
|
||||
columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")])
|
||||
index = range(3)
|
||||
df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns)
|
||||
|
||||
result = df.ewm(alpha=0.1).cov()
|
||||
|
||||
index = MultiIndex.from_product([range(3), list("ab"), list("xy"), list("AB")])
|
||||
columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")])
|
||||
expected = DataFrame(
|
||||
np.vstack(
|
||||
(
|
||||
np.full((8, 8), np.nan),
|
||||
np.full((8, 8), 32.000000),
|
||||
np.full((8, 8), 63.881919),
|
||||
)
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multindex_columns_pairwise_func(self):
|
||||
# GH 21157
|
||||
columns = MultiIndex.from_arrays([["M", "N"], ["P", "Q"]], names=["a", "b"])
|
||||
df = DataFrame(np.ones((5, 2)), columns=columns)
|
||||
result = df.rolling(3).corr()
|
||||
expected = DataFrame(
|
||||
np.nan,
|
||||
index=MultiIndex.from_arrays(
|
||||
[
|
||||
np.repeat(np.arange(5, dtype=np.int64), 2),
|
||||
["M", "N"] * 5,
|
||||
["P", "Q"] * 5,
|
||||
],
|
||||
names=[None, "a", "b"],
|
||||
),
|
||||
columns=columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,532 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs",
|
||||
[
|
||||
[np.mean, "mean", {}],
|
||||
[np.nansum, "sum", {}],
|
||||
[
|
||||
lambda x: np.isfinite(x).astype(float).sum(),
|
||||
"count",
|
||||
{},
|
||||
],
|
||||
[np.median, "median", {}],
|
||||
[np.min, "min", {}],
|
||||
[np.max, "max", {}],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
def test_series(series, compare_func, roll_func, kwargs, step):
|
||||
result = getattr(series.rolling(50, step=step), roll_func)(**kwargs)
|
||||
assert isinstance(result, Series)
|
||||
end = range(0, len(series), step or 1)[-1] + 1
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs",
|
||||
[
|
||||
[np.mean, "mean", {}],
|
||||
[np.nansum, "sum", {}],
|
||||
[
|
||||
lambda x: np.isfinite(x).astype(float).sum(),
|
||||
"count",
|
||||
{},
|
||||
],
|
||||
[np.median, "median", {}],
|
||||
[np.min, "min", {}],
|
||||
[np.max, "max", {}],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
def test_frame(raw, frame, compare_func, roll_func, kwargs, step):
|
||||
result = getattr(frame.rolling(50, step=step), roll_func)(**kwargs)
|
||||
assert isinstance(result, DataFrame)
|
||||
end = range(0, len(frame), step or 1)[-1] + 1
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs, minp",
|
||||
[
|
||||
[np.mean, "mean", {}, 10],
|
||||
[np.nansum, "sum", {}, 10],
|
||||
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0],
|
||||
[np.median, "median", {}, 10],
|
||||
[np.min, "min", {}, 10],
|
||||
[np.max, "max", {}, 10],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}, 10],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}, 10],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10],
|
||||
],
|
||||
)
|
||||
def test_time_rule_series(series, compare_func, roll_func, kwargs, minp):
|
||||
win = 25
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = getattr(ser.rolling(window=win, min_periods=minp), roll_func)(
|
||||
**kwargs
|
||||
)
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs, minp",
|
||||
[
|
||||
[np.mean, "mean", {}, 10],
|
||||
[np.nansum, "sum", {}, 10],
|
||||
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0],
|
||||
[np.median, "median", {}, 10],
|
||||
[np.min, "min", {}, 10],
|
||||
[np.max, "max", {}, 10],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}, 10],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}, 10],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10],
|
||||
],
|
||||
)
|
||||
def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp):
|
||||
win = 25
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = getattr(frm.rolling(window=win, min_periods=minp), roll_func)(
|
||||
**kwargs
|
||||
)
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(compare_func, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs",
|
||||
[
|
||||
[np.mean, "mean", {}],
|
||||
[np.nansum, "sum", {}],
|
||||
[np.median, "median", {}],
|
||||
[np.min, "min", {}],
|
||||
[np.max, "max", {}],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
def test_nans(compare_func, roll_func, kwargs):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(50, min_periods=30), roll_func)(**kwargs)
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = getattr(obj.rolling(20, min_periods=15), roll_func)(**kwargs)
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = getattr(obj2.rolling(10, min_periods=5), roll_func)(**kwargs)
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
if roll_func != "sum":
|
||||
result0 = getattr(obj.rolling(20, min_periods=0), roll_func)(**kwargs)
|
||||
result1 = getattr(obj.rolling(20, min_periods=1), roll_func)(**kwargs)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
def test_nans_count():
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
result = obj.rolling(50, min_periods=30).count()
|
||||
tm.assert_almost_equal(
|
||||
result.iloc[-1], np.isfinite(obj[10:-10]).astype(float).sum()
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs",
|
||||
[
|
||||
["mean", {}],
|
||||
["sum", {}],
|
||||
["median", {}],
|
||||
["min", {}],
|
||||
["max", {}],
|
||||
["std", {}],
|
||||
["std", {"ddof": 0}],
|
||||
["var", {}],
|
||||
["var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
def test_min_periods(series, minp, roll_func, kwargs, step):
|
||||
result = getattr(
|
||||
series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
|
||||
)(**kwargs)
|
||||
expected = getattr(
|
||||
series.rolling(len(series), min_periods=minp, step=step), roll_func
|
||||
)(**kwargs)
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
def test_min_periods_count(series, step):
|
||||
result = series.rolling(len(series) + 1, min_periods=0, step=step).count()
|
||||
expected = series.rolling(len(series), min_periods=0, step=step).count()
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs, minp",
|
||||
[
|
||||
["mean", {}, 15],
|
||||
["sum", {}, 15],
|
||||
["count", {}, 0],
|
||||
["median", {}, 15],
|
||||
["min", {}, 15],
|
||||
["max", {}, 15],
|
||||
["std", {}, 15],
|
||||
["std", {"ddof": 0}, 15],
|
||||
["var", {}, 15],
|
||||
["var", {"ddof": 0}, 15],
|
||||
],
|
||||
)
|
||||
def test_center(roll_func, kwargs, minp):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(
|
||||
**kwargs
|
||||
)
|
||||
expected = (
|
||||
getattr(
|
||||
concat([obj, Series([np.nan] * 9)]).rolling(20, min_periods=minp), roll_func
|
||||
)(**kwargs)
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs, minp, fill_value",
|
||||
[
|
||||
["mean", {}, 10, None],
|
||||
["sum", {}, 10, None],
|
||||
["count", {}, 0, 0],
|
||||
["median", {}, 10, None],
|
||||
["min", {}, 10, None],
|
||||
["max", {}, 10, None],
|
||||
["std", {}, 10, None],
|
||||
["std", {"ddof": 0}, 10, None],
|
||||
["var", {}, 10, None],
|
||||
["var", {"ddof": 0}, 10, None],
|
||||
],
|
||||
)
|
||||
def test_center_reindex_series(series, roll_func, kwargs, minp, fill_value):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
series_xp = (
|
||||
getattr(
|
||||
series.reindex(list(series.index) + s).rolling(window=25, min_periods=minp),
|
||||
roll_func,
|
||||
)(**kwargs)
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
series_rs = getattr(
|
||||
series.rolling(window=25, min_periods=minp, center=True), roll_func
|
||||
)(**kwargs)
|
||||
if fill_value is not None:
|
||||
series_xp = series_xp.fillna(fill_value)
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs, minp, fill_value",
|
||||
[
|
||||
["mean", {}, 10, None],
|
||||
["sum", {}, 10, None],
|
||||
["count", {}, 0, 0],
|
||||
["median", {}, 10, None],
|
||||
["min", {}, 10, None],
|
||||
["max", {}, 10, None],
|
||||
["std", {}, 10, None],
|
||||
["std", {"ddof": 0}, 10, None],
|
||||
["var", {}, 10, None],
|
||||
["var", {"ddof": 0}, 10, None],
|
||||
],
|
||||
)
|
||||
def test_center_reindex_frame(frame, roll_func, kwargs, minp, fill_value):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
frame_xp = (
|
||||
getattr(
|
||||
frame.reindex(list(frame.index) + s).rolling(window=25, min_periods=minp),
|
||||
roll_func,
|
||||
)(**kwargs)
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = getattr(
|
||||
frame.rolling(window=25, min_periods=minp, center=True), roll_func
|
||||
)(**kwargs)
|
||||
if fill_value is not None:
|
||||
frame_xp = frame_xp.fillna(fill_value)
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).max(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).min(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).sum(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).mean(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).std(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).var(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).skew(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).kurt(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).quantile(q=0.5),
|
||||
lambda x: x.rolling(window=10, min_periods=5).median(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True),
|
||||
pytest.param(
|
||||
lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(),
|
||||
marks=td.skip_if_no("scipy"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_functions_window_non_shrinkage(f):
|
||||
# GH 7764
|
||||
s = Series(range(4))
|
||||
s_expected = Series(np.nan, index=s.index)
|
||||
df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=["A", "B"])
|
||||
df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
|
||||
|
||||
s_result = f(s)
|
||||
tm.assert_series_equal(s_result, s_expected)
|
||||
|
||||
df_result = f(df)
|
||||
tm.assert_frame_equal(df_result, df_expected)
|
||||
|
||||
|
||||
def test_rolling_max_gh6297(step):
|
||||
"""Replicate result expected in GH #6297"""
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 2 datapoints on one of the days
|
||||
indices.append(datetime(1975, 1, 3, 6, 0))
|
||||
series = Series(range(1, 7), index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
expected = Series(
|
||||
[1.0, 2.0, 6.0, 4.0, 5.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").max().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_max_resample(step):
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
||||
indices.append(datetime(1975, 1, 5, 1))
|
||||
indices.append(datetime(1975, 1, 5, 2))
|
||||
series = Series(list(range(5)) + [10, 20], index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
# Default how should be max
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 20.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").max().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# Now specify median (10.0)
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 10.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").median().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# Now specify mean (4+10+20)/3
|
||||
v = (4.0 + 10.0 + 20.0) / 3.0
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, v],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").mean().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_min_resample(step):
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
||||
indices.append(datetime(1975, 1, 5, 1))
|
||||
indices.append(datetime(1975, 1, 5, 2))
|
||||
series = Series(list(range(5)) + [10, 20], index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
# Default how should be min
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 4.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
r = series.resample("D").min().rolling(window=1, step=step)
|
||||
tm.assert_series_equal(expected, r.min())
|
||||
|
||||
|
||||
def test_rolling_median_resample():
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
||||
indices.append(datetime(1975, 1, 5, 1))
|
||||
indices.append(datetime(1975, 1, 5, 2))
|
||||
series = Series(list(range(5)) + [10, 20], index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
# Default how should be median
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 10],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)
|
||||
x = series.resample("D").median().rolling(window=1).median()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_median_memory_error():
|
||||
# GH11722
|
||||
n = 20000
|
||||
Series(np.random.default_rng(2).standard_normal(n)).rolling(
|
||||
window=2, center=False
|
||||
).median()
|
||||
Series(np.random.default_rng(2).standard_normal(n)).rolling(
|
||||
window=2, center=False
|
||||
).median()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_type",
|
||||
[np.dtype(f"f{width}") for width in [4, 8]]
|
||||
+ [np.dtype(f"{sign}{width}") for width in [1, 2, 4, 8] for sign in "ui"],
|
||||
)
|
||||
def test_rolling_min_max_numeric_types(data_type):
|
||||
# GH12373
|
||||
|
||||
# Just testing that these don't throw exceptions and that
|
||||
# the return type is float64. Other tests will cover quantitative
|
||||
# correctness
|
||||
result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).max()
|
||||
assert result.dtypes[0] == np.dtype("f8")
|
||||
result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).min()
|
||||
assert result.dtypes[0] == np.dtype("f8")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.rolling(window=10, min_periods=0).count(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).max(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).min(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).sum(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).mean(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).std(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).var(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).skew(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).kurt(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).quantile(0.5),
|
||||
lambda x: x.rolling(window=10, min_periods=5).median(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True),
|
||||
pytest.param(
|
||||
lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(),
|
||||
marks=td.skip_if_no("scipy"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length(f):
|
||||
# GH 8056
|
||||
s = Series(dtype=np.float64)
|
||||
s_expected = s
|
||||
df1 = DataFrame()
|
||||
df1_expected = df1
|
||||
df2 = DataFrame(columns=["a"])
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
df2_expected = df2
|
||||
|
||||
s_result = f(s)
|
||||
tm.assert_series_equal(s_result, s_expected)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
@ -0,0 +1,182 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
def scoreatpercentile(a, per):
|
||||
values = np.sort(a, axis=0)
|
||||
|
||||
idx = int(per / 1.0 * (values.shape[0] - 1))
|
||||
|
||||
if idx == values.shape[0] - 1:
|
||||
retval = values[-1]
|
||||
|
||||
else:
|
||||
qlow = idx / (values.shape[0] - 1)
|
||||
qhig = (idx + 1) / (values.shape[0] - 1)
|
||||
vlow = values[idx]
|
||||
vhig = values[idx + 1]
|
||||
retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)
|
||||
|
||||
return retval
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_series(series, q, step):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
result = series.rolling(50, step=step).quantile(q)
|
||||
assert isinstance(result, Series)
|
||||
end = range(0, len(series), step or 1)[-1] + 1
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_frame(raw, frame, q, step):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
result = frame.rolling(50, step=step).quantile(q)
|
||||
assert isinstance(result, DataFrame)
|
||||
end = range(0, len(frame), step or 1)[-1] + 1
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_time_rule_series(series, q):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
win = 25
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = ser.rolling(window=win, min_periods=10).quantile(q)
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_time_rule_frame(raw, frame, q):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
win = 25
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = frm.rolling(window=win, min_periods=10).quantile(q)
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(compare_func, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_nans(q):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(50, min_periods=30).quantile(q)
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = obj.rolling(20, min_periods=15).quantile(q)
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = obj2.rolling(10, min_periods=5).quantile(q)
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
result0 = obj.rolling(20, min_periods=0).quantile(q)
|
||||
result1 = obj.rolling(20, min_periods=1).quantile(q)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_min_periods(series, minp, q, step):
|
||||
result = series.rolling(len(series) + 1, min_periods=minp, step=step).quantile(q)
|
||||
expected = series.rolling(len(series), min_periods=minp, step=step).quantile(q)
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_center(q):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(20, center=True).quantile(q)
|
||||
expected = (
|
||||
concat([obj, Series([np.nan] * 9)])
|
||||
.rolling(20)
|
||||
.quantile(q)
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_center_reindex_series(series, q):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
series_xp = (
|
||||
series.reindex(list(series.index) + s)
|
||||
.rolling(window=25)
|
||||
.quantile(q)
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
|
||||
series_rs = series.rolling(window=25, center=True).quantile(q)
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_center_reindex_frame(frame, q):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
frame_xp = (
|
||||
frame.reindex(list(frame.index) + s)
|
||||
.rolling(window=25)
|
||||
.quantile(q)
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = frame.rolling(window=25, center=True).quantile(q)
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
def test_keyword_quantile_deprecated():
|
||||
# GH #52550
|
||||
s = Series([1, 2, 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.rolling(2).quantile(quantile=0.4)
|
@ -0,0 +1,227 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_series(series, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
result = getattr(series.rolling(50), roll_func)()
|
||||
assert isinstance(result, Series)
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_frame(raw, frame, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
result = getattr(frame.rolling(50), roll_func)()
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_time_rule_series(series, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
win = 25
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)()
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_time_rule_frame(raw, frame, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
win = 25
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)()
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(compare_func, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_nans(sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(50, min_periods=30), roll_func)()
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = getattr(obj.rolling(20, min_periods=15), roll_func)()
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = getattr(obj2.rolling(10, min_periods=5), roll_func)()
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
result0 = getattr(obj.rolling(20, min_periods=0), roll_func)()
|
||||
result1 = getattr(obj.rolling(20, min_periods=1), roll_func)()
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_min_periods(series, minp, roll_func, step):
|
||||
result = getattr(
|
||||
series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
|
||||
)()
|
||||
expected = getattr(
|
||||
series.rolling(len(series), min_periods=minp, step=step), roll_func
|
||||
)()
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_center(roll_func):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(20, center=True), roll_func)()
|
||||
expected = (
|
||||
getattr(concat([obj, Series([np.nan] * 9)]).rolling(20), roll_func)()
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_center_reindex_series(series, roll_func):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
series_xp = (
|
||||
getattr(
|
||||
series.reindex(list(series.index) + s).rolling(window=25),
|
||||
roll_func,
|
||||
)()
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
series_rs = getattr(series.rolling(window=25, center=True), roll_func)()
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_center_reindex_frame(frame, roll_func):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
frame_xp = (
|
||||
getattr(
|
||||
frame.reindex(list(frame.index) + s).rolling(window=25),
|
||||
roll_func,
|
||||
)()
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = getattr(frame.rolling(window=25, center=True), roll_func)()
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
def test_rolling_skew_edge_cases(step):
|
||||
expected = Series([np.nan] * 4 + [0.0])[::step]
|
||||
# yields all NaN (0 variance)
|
||||
d = Series([1] * 5)
|
||||
x = d.rolling(window=5, step=step).skew()
|
||||
# index 4 should be 0 as it contains 5 same obs
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
expected = Series([np.nan] * 5)[::step]
|
||||
# yields all NaN (window too small)
|
||||
d = Series(np.random.default_rng(2).standard_normal(5))
|
||||
x = d.rolling(window=2, step=step).skew()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# yields [NaN, NaN, NaN, 0.177994, 1.548824]
|
||||
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
|
||||
expected = Series([np.nan, np.nan, np.nan, 0.177994, 1.548824])[::step]
|
||||
x = d.rolling(window=4, step=step).skew()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_kurt_edge_cases(step):
|
||||
expected = Series([np.nan] * 4 + [-3.0])[::step]
|
||||
|
||||
# yields all NaN (0 variance)
|
||||
d = Series([1] * 5)
|
||||
x = d.rolling(window=5, step=step).kurt()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# yields all NaN (window too small)
|
||||
expected = Series([np.nan] * 5)[::step]
|
||||
d = Series(np.random.default_rng(2).standard_normal(5))
|
||||
x = d.rolling(window=3, step=step).kurt()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# yields [NaN, NaN, NaN, 1.224307, 2.671499]
|
||||
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
|
||||
expected = Series([np.nan, np.nan, np.nan, 1.224307, 2.671499])[::step]
|
||||
x = d.rolling(window=4, step=step).kurt()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_skew_eq_value_fperr(step):
|
||||
# #18804 all rolling skew for all equal values should return Nan
|
||||
# #46717 update: all equal values should return 0 instead of NaN
|
||||
a = Series([1.1] * 15).rolling(window=10, step=step).skew()
|
||||
assert (a[a.index >= 9] == 0).all()
|
||||
assert a[a.index < 9].isna().all()
|
||||
|
||||
|
||||
def test_rolling_kurt_eq_value_fperr(step):
|
||||
# #18804 all rolling kurt for all equal values should return Nan
|
||||
# #46717 update: all equal values should return -3 instead of NaN
|
||||
a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
|
||||
assert (a[a.index >= 9] == -3).all()
|
||||
assert a[a.index < 9].isna().all()
|
@ -0,0 +1,715 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def regular():
|
||||
return DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
).set_index("A")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ragged():
|
||||
df = DataFrame({"B": range(5)})
|
||||
df.index = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
return df
|
||||
|
||||
|
||||
class TestRollingTS:
|
||||
# rolling time-series friendly
|
||||
# xref GH13327
|
||||
|
||||
def test_doc_string(self):
|
||||
df = DataFrame(
|
||||
{"B": [0, 1, 2, np.nan, 4]},
|
||||
index=[
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
],
|
||||
)
|
||||
df
|
||||
df.rolling("2s").sum()
|
||||
|
||||
def test_invalid_window_non_int(self, regular):
|
||||
# not a valid freq
|
||||
msg = "passed window foobar is not compatible with a datetimelike index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window="foobar")
|
||||
# not a datetimelike index
|
||||
msg = "window must be an integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.reset_index().rolling(window="foobar")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)])
|
||||
def test_invalid_window_nonfixed(self, freq, regular):
|
||||
# non-fixed freqs
|
||||
msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window=freq)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"])
|
||||
def test_valid_window(self, freq, regular):
|
||||
regular.rolling(window=freq)
|
||||
|
||||
@pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])])
|
||||
def test_invalid_minp(self, minp, regular):
|
||||
# non-integer min_periods
|
||||
msg = (
|
||||
r"local variable 'minp' referenced before assignment|"
|
||||
"min_periods must be an integer"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window="1D", min_periods=minp)
|
||||
|
||||
def test_on(self, regular):
|
||||
df = regular
|
||||
|
||||
# not a valid column
|
||||
msg = (
|
||||
r"invalid on specified as foobar, must be a column "
|
||||
"\\(of DataFrame\\), an Index or None"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling(window="2s", on="foobar")
|
||||
|
||||
# column is valid
|
||||
df = df.copy()
|
||||
df["C"] = date_range("20130101", periods=len(df))
|
||||
df.rolling(window="2d", on="C").sum()
|
||||
|
||||
# invalid columns
|
||||
msg = "window must be an integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling(window="2d", on="B")
|
||||
|
||||
# ok even though on non-selected
|
||||
df.rolling(window="2d", on="C").B.sum()
|
||||
|
||||
def test_monotonic_on(self):
|
||||
# on/index must be monotonic
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
)
|
||||
|
||||
assert df.A.is_monotonic_increasing
|
||||
df.rolling("2s", on="A").sum()
|
||||
|
||||
df = df.set_index("A")
|
||||
assert df.index.is_monotonic_increasing
|
||||
df.rolling("2s").sum()
|
||||
|
||||
def test_non_monotonic_on(self):
|
||||
# GH 19248
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
)
|
||||
df = df.set_index("A")
|
||||
non_monotonic_index = df.index.to_list()
|
||||
non_monotonic_index[0] = non_monotonic_index[3]
|
||||
df.index = non_monotonic_index
|
||||
|
||||
assert not df.index.is_monotonic_increasing
|
||||
|
||||
msg = "index values must be monotonic"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling("2s").sum()
|
||||
|
||||
df = df.reset_index()
|
||||
|
||||
msg = (
|
||||
r"invalid on specified as A, must be a column "
|
||||
"\\(of DataFrame\\), an Index or None"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling("2s", on="A").sum()
|
||||
|
||||
def test_frame_on(self):
|
||||
df = DataFrame(
|
||||
{"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
|
||||
)
|
||||
|
||||
df["A"] = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
|
||||
# we are doing simulating using 'on'
|
||||
expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
|
||||
|
||||
result = df.rolling("2s", on="A").B.sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# test as a frame
|
||||
# we should be ignoring the 'on' as an aggregation column
|
||||
# note that the expected is setting, computing, and resetting
|
||||
# so the columns need to be switched compared
|
||||
# to the actual result where they are ordered as in the
|
||||
# original
|
||||
expected = (
|
||||
df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
|
||||
)
|
||||
|
||||
result = df.rolling("2s", on="A")[["B"]].sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_on2(self, unit):
|
||||
# using multiple aggregation columns
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
).as_unit(unit)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [0, 1, 2, 3, 4],
|
||||
"B": [0, 1, 2, np.nan, 4],
|
||||
"C": dti,
|
||||
},
|
||||
columns=["A", "C", "B"],
|
||||
)
|
||||
|
||||
expected1 = DataFrame(
|
||||
{"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
|
||||
columns=["A", "C", "B"],
|
||||
)
|
||||
|
||||
result = df.rolling("2s", on="C").sum()
|
||||
expected = expected1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([0, 1, 3, np.nan, 4], name="B")
|
||||
result = df.rolling("2s", on="C").B.sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = expected1[["A", "B", "C"]]
|
||||
result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_basic_regular(self, regular):
|
||||
df = regular.copy()
|
||||
|
||||
df.index = date_range("20130101", periods=5, freq="D")
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="1D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df.index = date_range("20130101", periods=5, freq="2D")
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="2D", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="2D", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(window=1).sum()
|
||||
result = df.rolling(window="2D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_min_periods(self, regular):
|
||||
# compare for min_periods
|
||||
df = regular
|
||||
|
||||
# these slightly different
|
||||
expected = df.rolling(2, min_periods=1).sum()
|
||||
result = df.rolling("2s").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(2, min_periods=1).sum()
|
||||
result = df.rolling("2s", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_closed(self, regular, unit):
|
||||
# xref GH13965
|
||||
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
Timestamp("20130101 09:00:01"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:04"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
).as_unit(unit)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": [1] * 5},
|
||||
index=dti,
|
||||
)
|
||||
|
||||
# closed must be 'right', 'left', 'both', 'neither'
|
||||
msg = "closed must be 'right', 'left', 'both' or 'neither'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window="2s", closed="blabla")
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [1.0, 2, 2, 2, 1]
|
||||
result = df.rolling("2s", closed="right").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# default should be 'right'
|
||||
result = df.rolling("2s").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [1.0, 2, 3, 3, 2]
|
||||
result = df.rolling("2s", closed="both").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [np.nan, 1.0, 2, 2, 1]
|
||||
result = df.rolling("2s", closed="left").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
|
||||
result = df.rolling("2s", closed="neither").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_sum(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 3, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=2).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 5, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s").sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 5, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="4s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 6, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="4s", min_periods=3).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 3, 6, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 6, 10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_mean(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).mean()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).mean()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_median(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).median()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).median()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_quantile(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).quantile(0.5)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).quantile(0.5)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_std(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).std(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).std(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).std(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] + [0.5] * 4
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).std(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_var(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).var(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).var(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).var(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] + [0.25] * 4
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).var(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_skew(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="3s", min_periods=1).skew()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).skew()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_kurt(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="3s", min_periods=1).kurt()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).kurt()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 4 + [-1.2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_count(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [1.0, 1, 1, 1, 1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = ragged
|
||||
result = df.rolling(window="1s").count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [1.0, 1, 2, 1, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=2).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_regular_min(self):
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
|
||||
).set_index("A")
|
||||
result = df.rolling("1s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
|
||||
).set_index("A")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.rolling("2s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [5.0, 4, 3, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling("5s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [5.0, 4, 3, 3, 3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_min(self, ragged):
|
||||
df = ragged
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1, 3, 3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 0, 0, 1, 1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_perf_min(self):
|
||||
N = 10000
|
||||
|
||||
dfp = DataFrame(
|
||||
{"B": np.random.default_rng(2).standard_normal(N)},
|
||||
index=date_range("20130101", periods=N, freq="s"),
|
||||
)
|
||||
expected = dfp.rolling(2, min_periods=1).min()
|
||||
result = dfp.rolling("2s").min()
|
||||
assert ((result - expected) < 0.01).all().all()
|
||||
|
||||
expected = dfp.rolling(200, min_periods=1).min()
|
||||
result = dfp.rolling("200s").min()
|
||||
assert ((result - expected) < 0.01).all().all()
|
||||
|
||||
def test_ragged_max(self, ragged):
|
||||
df = ragged
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, op, result_data",
|
||||
[
|
||||
("ms", "min", [0.0] * 10),
|
||||
("ms", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("ms", "max", [0.0] * 9 + [2.0]),
|
||||
("s", "min", [0.0] * 10),
|
||||
("s", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("s", "max", [0.0] * 9 + [2.0]),
|
||||
("min", "min", [0.0] * 10),
|
||||
("min", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("min", "max", [0.0] * 9 + [2.0]),
|
||||
("h", "min", [0.0] * 10),
|
||||
("h", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("h", "max", [0.0] * 9 + [2.0]),
|
||||
("D", "min", [0.0] * 10),
|
||||
("D", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("D", "max", [0.0] * 9 + [2.0]),
|
||||
],
|
||||
)
|
||||
def test_freqs_ops(self, freq, op, result_data):
|
||||
# GH 21096
|
||||
index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10)
|
||||
# Explicit cast to float to avoid implicit cast when setting nan
|
||||
s = Series(data=0, index=index, dtype="float")
|
||||
s.iloc[1] = np.nan
|
||||
s.iloc[-1] = 2
|
||||
result = getattr(s.rolling(window=f"10{freq}"), op)()
|
||||
expected = Series(data=result_data, index=index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
"sum",
|
||||
"mean",
|
||||
"count",
|
||||
"median",
|
||||
"std",
|
||||
"var",
|
||||
"kurt",
|
||||
"skew",
|
||||
"min",
|
||||
"max",
|
||||
],
|
||||
)
|
||||
def test_all(self, f, regular):
|
||||
# simple comparison of integer vs time-based windowing
|
||||
df = regular * 2
|
||||
er = df.rolling(window=1)
|
||||
r = df.rolling(window="1s")
|
||||
|
||||
result = getattr(r, f)()
|
||||
expected = getattr(er, f)()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.quantile(0.5)
|
||||
expected = er.quantile(0.5)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_all2(self, arithmetic_win_operators):
|
||||
f = arithmetic_win_operators
|
||||
# more sophisticated comparison of integer vs.
|
||||
# time-based windowing
|
||||
df = DataFrame(
|
||||
{"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="h")
|
||||
)
|
||||
# in-range data
|
||||
dft = df.between_time("09:00", "16:00")
|
||||
|
||||
r = dft.rolling(window="5h")
|
||||
|
||||
result = getattr(r, f)()
|
||||
|
||||
# we need to roll the days separately
|
||||
# to compare with a time-based roll
|
||||
# finally groupby-apply will return a multi-index
|
||||
# so we need to drop the day
|
||||
def agg_by_day(x):
|
||||
x = x.between_time("09:00", "16:00")
|
||||
return getattr(x.rolling(5, min_periods=1), f)()
|
||||
|
||||
expected = (
|
||||
df.groupby(df.index.day).apply(agg_by_day).reset_index(level=0, drop=True)
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_cov_offset(self):
|
||||
# GH16058
|
||||
|
||||
idx = date_range("2017-01-01", periods=24, freq="1h")
|
||||
ss = Series(np.arange(len(idx)), index=idx)
|
||||
|
||||
result = ss.rolling("2h").cov()
|
||||
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected2 = ss.rolling(2, min_periods=1).cov()
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
result = ss.rolling("3h").cov()
|
||||
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected2 = ss.rolling(3, min_periods=1).cov()
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
def test_rolling_on_decreasing_index(self, unit):
|
||||
# GH-19248, GH-32385
|
||||
index = DatetimeIndex(
|
||||
[
|
||||
Timestamp("20190101 09:00:30"),
|
||||
Timestamp("20190101 09:00:27"),
|
||||
Timestamp("20190101 09:00:20"),
|
||||
Timestamp("20190101 09:00:18"),
|
||||
Timestamp("20190101 09:00:10"),
|
||||
]
|
||||
).as_unit(unit)
|
||||
|
||||
df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
|
||||
result = df.rolling("5s").min()
|
||||
expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_on_empty(self):
|
||||
# GH-32385
|
||||
df = DataFrame({"column": []}, index=[])
|
||||
result = df.rolling("5s").min()
|
||||
expected = DataFrame({"column": []}, index=[])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_on_multi_index_level(self):
|
||||
# GH-15584
|
||||
df = DataFrame(
|
||||
{"column": range(6)},
|
||||
index=MultiIndex.from_product(
|
||||
[date_range("20190101", periods=3), range(2)], names=["date", "seq"]
|
||||
),
|
||||
)
|
||||
result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
|
||||
expected = DataFrame(
|
||||
{"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
|
||||
def test_nat_axis_error(msg, axis):
|
||||
idx = [Timestamp("2020"), NaT]
|
||||
kwargs = {"columns" if axis == 1 else "index": idx}
|
||||
df = DataFrame(np.eye(2), **kwargs)
|
||||
warn_msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
|
||||
if axis == 1:
|
||||
warn_msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
df.rolling("D", axis=axis).mean()
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_arrow_datetime_axis():
|
||||
# GH 55849
|
||||
expected = Series(
|
||||
np.arange(5, dtype=np.float64),
|
||||
index=Index(
|
||||
date_range("2020-01-01", periods=5), dtype="timestamp[ns][pyarrow]"
|
||||
),
|
||||
)
|
||||
result = expected.rolling("1D").sum()
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,688 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timedelta,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import BaseIndexer
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"triang",
|
||||
"blackman",
|
||||
"hamming",
|
||||
"bartlett",
|
||||
"bohman",
|
||||
"blackmanharris",
|
||||
"nuttall",
|
||||
"barthann",
|
||||
]
|
||||
)
|
||||
def win_types(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"])
|
||||
def win_types_special(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_constructor(frame_or_series):
|
||||
# GH 12669
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
|
||||
# valid
|
||||
c(win_type="boxcar", window=2, min_periods=1)
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=True)
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
|
||||
def test_invalid_constructor(frame_or_series, w):
|
||||
# not valid
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
with pytest.raises(ValueError, match="min_periods must be an integer"):
|
||||
c(win_type="boxcar", window=2, min_periods=w)
|
||||
with pytest.raises(ValueError, match="center must be a boolean"):
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=w)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("wt", ["foobar", 1])
|
||||
def test_invalid_constructor_wintype(frame_or_series, wt):
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
with pytest.raises(ValueError, match="Invalid win_type"):
|
||||
c(win_type=wt, window=2)
|
||||
|
||||
|
||||
def test_constructor_with_win_type(frame_or_series, win_types):
|
||||
# GH 12669
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
c(win_type=win_types, window=2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["median", "kurt", "skew"])
|
||||
def test_agg_function_support(arg):
|
||||
pytest.importorskip("scipy")
|
||||
df = DataFrame({"A": np.arange(5)})
|
||||
roll = df.rolling(2, win_type="triang")
|
||||
|
||||
msg = f"'{arg}' is not a valid function for 'Window' object"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg(arg)
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg([arg])
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg({"A": arg})
|
||||
|
||||
|
||||
def test_invalid_scipy_arg():
|
||||
# This error is raised by scipy
|
||||
pytest.importorskip("scipy")
|
||||
msg = r"boxcar\(\) got an unexpected"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Series(range(3)).rolling(1, win_type="boxcar").mean(foo="bar")
|
||||
|
||||
|
||||
def test_constructor_with_win_type_invalid(frame_or_series):
|
||||
# GH 13383
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
|
||||
msg = "window must be an integer 0 or greater"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(-1, win_type="boxcar")
|
||||
|
||||
|
||||
def test_window_with_args(step):
|
||||
# make sure that we are aggregating window functions correctly with arg
|
||||
pytest.importorskip("scipy")
|
||||
r = Series(np.random.default_rng(2).standard_normal(100)).rolling(
|
||||
window=10, min_periods=1, win_type="gaussian", step=step
|
||||
)
|
||||
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
|
||||
expected.columns = ["<lambda>", "<lambda>"]
|
||||
result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def a(x):
|
||||
return x.mean(std=10)
|
||||
|
||||
def b(x):
|
||||
return x.mean(std=0.01)
|
||||
|
||||
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
|
||||
expected.columns = ["a", "b"]
|
||||
result = r.aggregate([a, b])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_win_type_with_method_invalid():
|
||||
pytest.importorskip("scipy")
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="'single' is the only supported method type."
|
||||
):
|
||||
Series(range(1)).rolling(1, win_type="triang", method="table")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")])
|
||||
def test_consistent_win_type_freq(arg):
|
||||
# GH 15969
|
||||
pytest.importorskip("scipy")
|
||||
s = Series(range(1))
|
||||
with pytest.raises(ValueError, match="Invalid win_type freq"):
|
||||
s.rolling(arg, win_type="freq")
|
||||
|
||||
|
||||
def test_win_type_freq_return_none():
|
||||
# GH 48838
|
||||
freq_roll = Series(range(2), index=date_range("2020", periods=2)).rolling("2s")
|
||||
assert freq_roll.win_type is None
|
||||
|
||||
|
||||
def test_win_type_not_implemented():
|
||||
pytest.importorskip("scipy")
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([0, 1]), np.array([1, 2])
|
||||
|
||||
df = DataFrame({"values": range(2)})
|
||||
indexer = CustomIndexer()
|
||||
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
|
||||
df.rolling(indexer, win_type="boxcar")
|
||||
|
||||
|
||||
def test_cmov_mean(step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
result = Series(vals).rolling(5, center=True, step=step).mean()
|
||||
expected_values = [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.962,
|
||||
11.27,
|
||||
11.564,
|
||||
12.516,
|
||||
12.818,
|
||||
12.952,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
expected = Series(expected_values)[::step]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
def test_cmov_window(step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
result = Series(vals).rolling(5, win_type="boxcar", center=True, step=step).mean()
|
||||
expected_values = [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.962,
|
||||
11.27,
|
||||
11.564,
|
||||
12.516,
|
||||
12.818,
|
||||
12.952,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
expected = Series(expected_values)[::step]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
def test_cmov_window_corner(step):
|
||||
# GH 8238
|
||||
# all nan
|
||||
pytest.importorskip("scipy")
|
||||
vals = Series([np.nan] * 10)
|
||||
result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
|
||||
assert np.isnan(result).all()
|
||||
|
||||
# empty
|
||||
vals = Series([], dtype=object)
|
||||
result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
|
||||
assert len(result) == 0
|
||||
|
||||
# shorter than window
|
||||
vals = Series(np.random.default_rng(2).standard_normal(5))
|
||||
result = vals.rolling(10, win_type="boxcar", step=step).mean()
|
||||
assert np.isnan(result).all()
|
||||
assert len(result) == len(range(0, 5, step or 1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f,xp",
|
||||
[
|
||||
(
|
||||
"mean",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[9.252, 9.392],
|
||||
[8.644, 9.906],
|
||||
[8.87, 10.208],
|
||||
[6.81, 8.588],
|
||||
[7.792, 8.644],
|
||||
[9.05, 7.824],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
(
|
||||
"std",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[3.789706, 4.068313],
|
||||
[3.429232, 3.237411],
|
||||
[3.589269, 3.220810],
|
||||
[3.405195, 2.380655],
|
||||
[3.281839, 2.369869],
|
||||
[3.676846, 1.801799],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
(
|
||||
"var",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[14.36187, 16.55117],
|
||||
[11.75963, 10.48083],
|
||||
[12.88285, 10.37362],
|
||||
[11.59535, 5.66752],
|
||||
[10.77047, 5.61628],
|
||||
[13.51920, 3.24648],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
(
|
||||
"sum",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[46.26, 46.96],
|
||||
[43.22, 49.53],
|
||||
[44.35, 51.04],
|
||||
[34.05, 42.94],
|
||||
[38.96, 43.22],
|
||||
[45.25, 39.12],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_cmov_window_frame(f, xp, step):
|
||||
# Gh 8238
|
||||
pytest.importorskip("scipy")
|
||||
df = DataFrame(
|
||||
np.array(
|
||||
[
|
||||
[12.18, 3.64],
|
||||
[10.18, 9.16],
|
||||
[13.24, 14.61],
|
||||
[4.51, 8.11],
|
||||
[6.15, 11.44],
|
||||
[9.14, 6.21],
|
||||
[11.31, 10.67],
|
||||
[2.94, 6.51],
|
||||
[9.42, 8.39],
|
||||
[12.44, 7.34],
|
||||
]
|
||||
)
|
||||
)
|
||||
xp = DataFrame(np.array(xp))[::step]
|
||||
|
||||
roll = df.rolling(5, win_type="boxcar", center=True, step=step)
|
||||
rs = getattr(roll, f)()
|
||||
|
||||
tm.assert_frame_equal(xp, rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4, 5])
|
||||
def test_cmov_window_na_min_periods(step, min_periods):
|
||||
pytest.importorskip("scipy")
|
||||
vals = Series(np.random.default_rng(2).standard_normal(10))
|
||||
vals[4] = np.nan
|
||||
vals[8] = np.nan
|
||||
|
||||
xp = vals.rolling(5, min_periods=min_periods, center=True, step=step).mean()
|
||||
rs = vals.rolling(
|
||||
5, win_type="boxcar", min_periods=min_periods, center=True, step=step
|
||||
).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_regular(win_types, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
xps = {
|
||||
"hamming": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.71384,
|
||||
9.56348,
|
||||
12.38009,
|
||||
14.03687,
|
||||
13.8567,
|
||||
11.81473,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"triang": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.28667,
|
||||
10.34667,
|
||||
12.00556,
|
||||
13.33889,
|
||||
13.38,
|
||||
12.33667,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"barthann": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
14.0825,
|
||||
11.5675,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"bohman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
7.61599,
|
||||
9.1764,
|
||||
12.83559,
|
||||
14.17267,
|
||||
14.65923,
|
||||
11.10401,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"blackmanharris": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
6.97691,
|
||||
9.16438,
|
||||
13.05052,
|
||||
14.02156,
|
||||
15.10512,
|
||||
10.74574,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"nuttall": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
7.04618,
|
||||
9.16786,
|
||||
13.02671,
|
||||
14.03559,
|
||||
15.05657,
|
||||
10.78514,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"blackman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
7.73345,
|
||||
9.17869,
|
||||
12.79607,
|
||||
14.20036,
|
||||
14.57726,
|
||||
11.16988,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"bartlett": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
14.0825,
|
||||
11.5675,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
}
|
||||
|
||||
xp = Series(xps[win_types])[::step]
|
||||
rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_regular_linear_range(win_types, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array(range(10), dtype=float)
|
||||
xp = vals.copy()
|
||||
xp[:2] = np.nan
|
||||
xp[-2:] = np.nan
|
||||
xp = Series(xp)[::step]
|
||||
|
||||
rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_regular_missing_data(win_types, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array(
|
||||
[6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48]
|
||||
)
|
||||
xps = {
|
||||
"bartlett": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.70333,
|
||||
10.5225,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
15.61667,
|
||||
13.655,
|
||||
],
|
||||
"blackman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.04582,
|
||||
11.41536,
|
||||
7.73345,
|
||||
9.17869,
|
||||
12.79607,
|
||||
14.20036,
|
||||
15.8706,
|
||||
13.655,
|
||||
],
|
||||
"barthann": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.70333,
|
||||
10.5225,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
15.61667,
|
||||
13.655,
|
||||
],
|
||||
"bohman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.9444,
|
||||
11.56327,
|
||||
7.61599,
|
||||
9.1764,
|
||||
12.83559,
|
||||
14.17267,
|
||||
15.90976,
|
||||
13.655,
|
||||
],
|
||||
"hamming": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.59321,
|
||||
10.29694,
|
||||
8.71384,
|
||||
9.56348,
|
||||
12.38009,
|
||||
14.20565,
|
||||
15.24694,
|
||||
13.69758,
|
||||
],
|
||||
"nuttall": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.47693,
|
||||
12.2821,
|
||||
7.04618,
|
||||
9.16786,
|
||||
13.02671,
|
||||
14.03673,
|
||||
16.08759,
|
||||
13.65553,
|
||||
],
|
||||
"triang": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.33167,
|
||||
9.76125,
|
||||
9.28667,
|
||||
10.34667,
|
||||
12.00556,
|
||||
13.82125,
|
||||
14.49429,
|
||||
13.765,
|
||||
],
|
||||
"blackmanharris": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.42526,
|
||||
12.36824,
|
||||
6.97691,
|
||||
9.16438,
|
||||
13.05052,
|
||||
14.02175,
|
||||
16.1098,
|
||||
13.65509,
|
||||
],
|
||||
}
|
||||
|
||||
xp = Series(xps[win_types])[::step]
|
||||
rs = Series(vals).rolling(5, win_type=win_types, min_periods=3, step=step).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_special(win_types_special, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
kwds = {
|
||||
"kaiser": {"beta": 1.0},
|
||||
"gaussian": {"std": 1.0},
|
||||
"general_gaussian": {"p": 2.0, "sig": 2.0},
|
||||
"exponential": {"tau": 10},
|
||||
}
|
||||
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
|
||||
xps = {
|
||||
"gaussian": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.97297,
|
||||
9.76077,
|
||||
12.24763,
|
||||
13.89053,
|
||||
13.65671,
|
||||
12.01002,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"general_gaussian": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.85011,
|
||||
10.71589,
|
||||
11.73161,
|
||||
13.08516,
|
||||
12.95111,
|
||||
12.74577,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"kaiser": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.86851,
|
||||
11.02969,
|
||||
11.65161,
|
||||
12.75129,
|
||||
12.90702,
|
||||
12.83757,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"exponential": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.83364,
|
||||
11.10472,
|
||||
11.64551,
|
||||
12.66138,
|
||||
12.92379,
|
||||
12.83770,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
}
|
||||
|
||||
xp = Series(xps[win_types_special])[::step]
|
||||
rs = (
|
||||
Series(vals)
|
||||
.rolling(5, win_type=win_types_special, center=True, step=step)
|
||||
.mean(**kwds[win_types_special])
|
||||
)
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_special_linear_range(win_types_special, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
kwds = {
|
||||
"kaiser": {"beta": 1.0},
|
||||
"gaussian": {"std": 1.0},
|
||||
"general_gaussian": {"p": 2.0, "sig": 2.0},
|
||||
"slepian": {"width": 0.5},
|
||||
"exponential": {"tau": 10},
|
||||
}
|
||||
|
||||
vals = np.array(range(10), dtype=float)
|
||||
xp = vals.copy()
|
||||
xp[:2] = np.nan
|
||||
xp[-2:] = np.nan
|
||||
xp = Series(xp)[::step]
|
||||
|
||||
rs = (
|
||||
Series(vals)
|
||||
.rolling(5, win_type=win_types_special, center=True, step=step)
|
||||
.mean(**kwds[win_types_special])
|
||||
)
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_weighted_var_big_window_no_segfault(win_types, center):
|
||||
# GitHub Issue #46772
|
||||
pytest.importorskip("scipy")
|
||||
x = Series(0)
|
||||
result = x.rolling(window=16, center=center, win_type=win_types).var()
|
||||
expected = Series(np.nan)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_rolling_center_axis_1():
|
||||
pytest.importorskip("scipy")
|
||||
df = DataFrame(
|
||||
{"a": [1, 1, 0, 0, 0, 1], "b": [1, 0, 0, 1, 0, 0], "c": [1, 0, 0, 1, 0, 1]}
|
||||
)
|
||||
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.rolling(window=3, axis=1, win_type="boxcar", center=True).sum()
|
||||
|
||||
expected = DataFrame(
|
||||
{"a": [np.nan] * 6, "b": [3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "c": [np.nan] * 6}
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected, check_dtype=True)
|
Reference in New Issue
Block a user