This commit is contained in:
2024-12-04 13:35:57 +05:00
parent d346bf4b2a
commit 73ce681a55
7059 changed files with 1196501 additions and 0 deletions

View File

@ -0,0 +1,146 @@
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
Series,
bdate_range,
)
@pytest.fixture(params=[True, False])
def raw(request):
"""raw keyword argument for rolling.apply"""
return request.param
@pytest.fixture(
params=[
"sum",
"mean",
"median",
"max",
"min",
"var",
"std",
"kurt",
"skew",
"count",
"sem",
]
)
def arithmetic_win_operators(request):
return request.param
@pytest.fixture(params=[True, False])
def center(request):
return request.param
@pytest.fixture(params=[None, 1])
def min_periods(request):
return request.param
@pytest.fixture(params=[True, False])
def parallel(request):
"""parallel keyword argument for numba.jit"""
return request.param
# Can parameterize nogil & nopython over True | False, but limiting per
# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472
@pytest.fixture(params=[False])
def nogil(request):
"""nogil keyword argument for numba.jit"""
return request.param
@pytest.fixture(params=[True])
def nopython(request):
"""nopython keyword argument for numba.jit"""
return request.param
@pytest.fixture(params=[True, False])
def adjust(request):
"""adjust keyword argument for ewm"""
return request.param
@pytest.fixture(params=[True, False])
def ignore_na(request):
"""ignore_na keyword argument for ewm"""
return request.param
@pytest.fixture(params=[True, False])
def numeric_only(request):
"""numeric_only keyword argument"""
return request.param
@pytest.fixture(
params=[
pytest.param("numba", marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]),
"cython",
]
)
def engine(request):
"""engine keyword argument for rolling.apply"""
return request.param
@pytest.fixture(
params=[
pytest.param(
("numba", True), marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]
),
("cython", True),
("cython", False),
]
)
def engine_and_raw(request):
"""engine and raw keyword arguments for rolling.apply"""
return request.param
@pytest.fixture(params=["1 day", timedelta(days=1), np.timedelta64(1, "D")])
def halflife_with_times(request):
"""Halflife argument for EWM when times is specified."""
return request.param
@pytest.fixture
def series():
"""Make mocked series as fixture."""
arr = np.random.default_rng(2).standard_normal(100)
locs = np.arange(20, 40)
arr[locs] = np.nan
series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100))
return series
@pytest.fixture
def frame():
"""Make mocked frame as fixture."""
return DataFrame(
np.random.default_rng(2).standard_normal((100, 10)),
index=bdate_range(datetime(2009, 1, 1), periods=100),
)
@pytest.fixture(params=[None, 1, 2, 5, 10])
def step(request):
"""step keyword argument for rolling window operations."""
return request.param

View File

@ -0,0 +1,72 @@
import itertools
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
notna,
)
def create_series():
return [
Series(dtype=np.float64, name="a"),
Series([np.nan] * 5),
Series([1.0] * 5),
Series(range(5, 0, -1)),
Series(range(5)),
Series([np.nan, 1.0, np.nan, 1.0, 1.0]),
Series([np.nan, 1.0, np.nan, 2.0, 3.0]),
Series([np.nan, 1.0, np.nan, 3.0, 2.0]),
]
def create_dataframes():
return [
DataFrame(columns=["a", "a"]),
DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]),
] + [DataFrame(s) for s in create_series()]
def is_constant(x):
values = x.values.ravel("K")
return len(set(values[notna(values)])) == 1
@pytest.fixture(
params=(
obj
for obj in itertools.chain(create_series(), create_dataframes())
if is_constant(obj)
),
)
def consistent_data(request):
return request.param
@pytest.fixture(params=create_series())
def series_data(request):
return request.param
@pytest.fixture(params=itertools.chain(create_series(), create_dataframes()))
def all_data(request):
"""
Test:
- Empty Series / DataFrame
- All NaN
- All consistent value
- Monotonically decreasing
- Monotonically increasing
- Monotonically consistent with NaNs
- Monotonically increasing with NaNs
- Monotonically decreasing with NaNs
"""
return request.param
@pytest.fixture(params=[0, 2])
def min_periods(request):
return request.param

View File

@ -0,0 +1,243 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
concat,
)
import pandas._testing as tm
def create_mock_weights(obj, com, adjust, ignore_na):
if isinstance(obj, DataFrame):
if not len(obj.columns):
return DataFrame(index=obj.index, columns=obj.columns)
w = concat(
[
create_mock_series_weights(
obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na
)
for i in range(len(obj.columns))
],
axis=1,
)
w.index = obj.index
w.columns = obj.columns
return w
else:
return create_mock_series_weights(obj, com, adjust, ignore_na)
def create_mock_series_weights(s, com, adjust, ignore_na):
w = Series(np.nan, index=s.index, name=s.name)
alpha = 1.0 / (1.0 + com)
if adjust:
count = 0
for i in range(len(s)):
if s.iat[i] == s.iat[i]:
w.iat[i] = pow(1.0 / (1.0 - alpha), count)
count += 1
elif not ignore_na:
count += 1
else:
sum_wts = 0.0
prev_i = -1
count = 0
for i in range(len(s)):
if s.iat[i] == s.iat[i]:
if prev_i == -1:
w.iat[i] = 1.0
else:
w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i)
sum_wts += w.iat[i]
prev_i = count
count += 1
elif not ignore_na:
count += 1
return w
def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods):
com = 3.0
result = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean()
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
expected = all_data.multiply(weights).cumsum().divide(weights.cumsum()).ffill()
expected[
all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1)
] = np.nan
tm.assert_equal(result, expected.astype("float64"))
def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods):
com = 3.0
count_x = consistent_data.expanding().count()
mean_x = consistent_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean()
# check that correlation of a series with itself is either 1 or NaN
corr_x_x = consistent_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).corr(consistent_data)
exp = (
consistent_data.max()
if isinstance(consistent_data, Series)
else consistent_data.max().max()
)
# check mean of constant series
expected = consistent_data * np.nan
expected[count_x >= max(min_periods, 1)] = exp
tm.assert_equal(mean_x, expected)
# check correlation of constant series with itself is NaN
expected[:] = np.nan
tm.assert_equal(corr_x_x, expected)
def test_ewm_consistency_var_debiasing_factors(
all_data, adjust, ignore_na, min_periods
):
com = 3.0
# check variance debiasing factors
var_unbiased_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=False)
var_biased_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=True)
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
cum_sum = weights.cumsum().ffill()
cum_sum_sq = (weights * weights).cumsum().ffill()
numerator = cum_sum * cum_sum
denominator = numerator - cum_sum_sq
denominator[denominator <= 0.0] = np.nan
var_debiasing_factors_x = numerator / denominator
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
@pytest.mark.parametrize("bias", [True, False])
def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias):
com = 3.0
mean_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean()
var_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=bias)
assert not (var_x < 0).any().any()
if bias:
# check that biased var(x) == mean(x^2) - mean(x)^2
mean_x2 = (
(all_data * all_data)
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
.mean()
)
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
@pytest.mark.parametrize("bias", [True, False])
def test_moments_consistency_var_constant(
consistent_data, adjust, ignore_na, min_periods, bias
):
com = 3.0
count_x = consistent_data.expanding(min_periods=min_periods).count()
var_x = consistent_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=bias)
# check that variance of constant series is identically 0
assert not (var_x > 0).any().any()
expected = consistent_data * np.nan
expected[count_x >= max(min_periods, 1)] = 0.0
if not bias:
expected[count_x < 2] = np.nan
tm.assert_equal(var_x, expected)
@pytest.mark.parametrize("bias", [True, False])
def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias):
com = 3.0
var_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=bias)
assert not (var_x < 0).any().any()
std_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).std(bias=bias)
assert not (std_x < 0).any().any()
# check that var(x) == std(x)^2
tm.assert_equal(var_x, std_x * std_x)
cov_x_x = all_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).cov(all_data, bias=bias)
assert not (cov_x_x < 0).any().any()
# check that var(x) == cov(x, x)
tm.assert_equal(var_x, cov_x_x)
@pytest.mark.parametrize("bias", [True, False])
def test_ewm_consistency_series_cov_corr(
series_data, adjust, ignore_na, min_periods, bias
):
com = 3.0
var_x_plus_y = (
(series_data + series_data)
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
.var(bias=bias)
)
var_x = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=bias)
var_y = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=bias)
cov_x_y = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).cov(series_data, bias=bias)
# check that cov(x, y) == (var(x+y) - var(x) -
# var(y)) / 2
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
# check that corr(x, y) == cov(x, y) / (std(x) *
# std(y))
corr_x_y = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).corr(series_data)
std_x = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).std(bias=bias)
std_y = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).std(bias=bias)
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
if bias:
# check that biased cov(x, y) == mean(x*y) -
# mean(x)*mean(y)
mean_x = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean()
mean_y = series_data.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean()
mean_x_times_y = (
(series_data * series_data)
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
.mean()
)
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))

View File

@ -0,0 +1,144 @@
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
def no_nans(x):
return x.notna().all().all()
def all_na(x):
return x.isnull().all().all()
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f):
if f is np.sum:
if not no_nans(all_data) and not (
all_na(all_data) and not all_data.empty and min_periods > 0
):
request.applymarker(
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
)
expanding_f_result = all_data.expanding(min_periods=min_periods).sum()
expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply(
func=f, raw=True
)
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
@pytest.mark.parametrize("ddof", [0, 1])
def test_moments_consistency_var(all_data, min_periods, ddof):
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
assert not (var_x < 0).any().any()
if ddof == 0:
# check that biased var(x) == mean(x^2) - mean(x)^2
mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean()
mean_x = all_data.expanding(min_periods=min_periods).mean()
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
@pytest.mark.parametrize("ddof", [0, 1])
def test_moments_consistency_var_constant(consistent_data, min_periods, ddof):
count_x = consistent_data.expanding(min_periods=min_periods).count()
var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof)
# check that variance of constant series is identically 0
assert not (var_x > 0).any().any()
expected = consistent_data * np.nan
expected[count_x >= max(min_periods, 1)] = 0.0
if ddof == 1:
expected[count_x < 2] = np.nan
tm.assert_equal(var_x, expected)
@pytest.mark.parametrize("ddof", [0, 1])
def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof):
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
assert not (var_x < 0).any().any()
std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof)
assert not (std_x < 0).any().any()
# check that var(x) == std(x)^2
tm.assert_equal(var_x, std_x * std_x)
cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof)
assert not (cov_x_x < 0).any().any()
# check that var(x) == cov(x, x)
tm.assert_equal(var_x, cov_x_x)
@pytest.mark.parametrize("ddof", [0, 1])
def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof):
var_x_plus_y = (
(series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof)
)
var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof)
# check that cov(x, y) == (var(x+y) - var(x) -
# var(y)) / 2
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
# check that corr(x, y) == cov(x, y) / (std(x) *
# std(y))
corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data)
std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
if ddof == 0:
# check that biased cov(x, y) == mean(x*y) -
# mean(x)*mean(y)
mean_x = series_data.expanding(min_periods=min_periods).mean()
mean_y = series_data.expanding(min_periods=min_periods).mean()
mean_x_times_y = (
(series_data * series_data).expanding(min_periods=min_periods).mean()
)
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
def test_expanding_consistency_mean(all_data, min_periods):
result = all_data.expanding(min_periods=min_periods).mean()
expected = (
all_data.expanding(min_periods=min_periods).sum()
/ all_data.expanding(min_periods=min_periods).count()
)
tm.assert_equal(result, expected.astype("float64"))
def test_expanding_consistency_constant(consistent_data, min_periods):
count_x = consistent_data.expanding().count()
mean_x = consistent_data.expanding(min_periods=min_periods).mean()
# check that correlation of a series with itself is either 1 or NaN
corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data)
exp = (
consistent_data.max()
if isinstance(consistent_data, Series)
else consistent_data.max().max()
)
# check mean of constant series
expected = consistent_data * np.nan
expected[count_x >= max(min_periods, 1)] = exp
tm.assert_equal(mean_x, expected)
# check correlation of constant series with itself is NaN
expected[:] = np.nan
tm.assert_equal(corr_x_x, expected)
def test_expanding_consistency_var_debiasing_factors(all_data, min_periods):
# check variance debiasing factors
var_unbiased_x = all_data.expanding(min_periods=min_periods).var()
var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0)
var_debiasing_factors_x = all_data.expanding().count() / (
all_data.expanding().count() - 1.0
).replace(0.0, np.nan)
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)

View File

@ -0,0 +1,244 @@
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
def no_nans(x):
return x.notna().all().all()
def all_na(x):
return x.isnull().all().all()
@pytest.fixture(params=[(1, 0), (5, 1)])
def rolling_consistency_cases(request):
"""window, min_periods"""
return request.param
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
def test_rolling_apply_consistency_sum(
request, all_data, rolling_consistency_cases, center, f
):
window, min_periods = rolling_consistency_cases
if f is np.sum:
if not no_nans(all_data) and not (
all_na(all_data) and not all_data.empty and min_periods > 0
):
request.applymarker(
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
)
rolling_f_result = all_data.rolling(
window=window, min_periods=min_periods, center=center
).sum()
rolling_apply_f_result = all_data.rolling(
window=window, min_periods=min_periods, center=center
).apply(func=f, raw=True)
tm.assert_equal(rolling_f_result, rolling_apply_f_result)
@pytest.mark.parametrize("ddof", [0, 1])
def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof):
window, min_periods = rolling_consistency_cases
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
ddof=ddof
)
assert not (var_x < 0).any().any()
if ddof == 0:
# check that biased var(x) == mean(x^2) - mean(x)^2
mean_x = all_data.rolling(
window=window, min_periods=min_periods, center=center
).mean()
mean_x2 = (
(all_data * all_data)
.rolling(window=window, min_periods=min_periods, center=center)
.mean()
)
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
@pytest.mark.parametrize("ddof", [0, 1])
def test_moments_consistency_var_constant(
consistent_data, rolling_consistency_cases, center, ddof
):
window, min_periods = rolling_consistency_cases
count_x = consistent_data.rolling(
window=window, min_periods=min_periods, center=center
).count()
var_x = consistent_data.rolling(
window=window, min_periods=min_periods, center=center
).var(ddof=ddof)
# check that variance of constant series is identically 0
assert not (var_x > 0).any().any()
expected = consistent_data * np.nan
expected[count_x >= max(min_periods, 1)] = 0.0
if ddof == 1:
expected[count_x < 2] = np.nan
tm.assert_equal(var_x, expected)
@pytest.mark.parametrize("ddof", [0, 1])
def test_rolling_consistency_var_std_cov(
all_data, rolling_consistency_cases, center, ddof
):
window, min_periods = rolling_consistency_cases
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
ddof=ddof
)
assert not (var_x < 0).any().any()
std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std(
ddof=ddof
)
assert not (std_x < 0).any().any()
# check that var(x) == std(x)^2
tm.assert_equal(var_x, std_x * std_x)
cov_x_x = all_data.rolling(
window=window, min_periods=min_periods, center=center
).cov(all_data, ddof=ddof)
assert not (cov_x_x < 0).any().any()
# check that var(x) == cov(x, x)
tm.assert_equal(var_x, cov_x_x)
@pytest.mark.parametrize("ddof", [0, 1])
def test_rolling_consistency_series_cov_corr(
series_data, rolling_consistency_cases, center, ddof
):
window, min_periods = rolling_consistency_cases
var_x_plus_y = (
(series_data + series_data)
.rolling(window=window, min_periods=min_periods, center=center)
.var(ddof=ddof)
)
var_x = series_data.rolling(
window=window, min_periods=min_periods, center=center
).var(ddof=ddof)
var_y = series_data.rolling(
window=window, min_periods=min_periods, center=center
).var(ddof=ddof)
cov_x_y = series_data.rolling(
window=window, min_periods=min_periods, center=center
).cov(series_data, ddof=ddof)
# check that cov(x, y) == (var(x+y) - var(x) -
# var(y)) / 2
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
# check that corr(x, y) == cov(x, y) / (std(x) *
# std(y))
corr_x_y = series_data.rolling(
window=window, min_periods=min_periods, center=center
).corr(series_data)
std_x = series_data.rolling(
window=window, min_periods=min_periods, center=center
).std(ddof=ddof)
std_y = series_data.rolling(
window=window, min_periods=min_periods, center=center
).std(ddof=ddof)
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
if ddof == 0:
# check that biased cov(x, y) == mean(x*y) -
# mean(x)*mean(y)
mean_x = series_data.rolling(
window=window, min_periods=min_periods, center=center
).mean()
mean_y = series_data.rolling(
window=window, min_periods=min_periods, center=center
).mean()
mean_x_times_y = (
(series_data * series_data)
.rolling(window=window, min_periods=min_periods, center=center)
.mean()
)
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center):
window, min_periods = rolling_consistency_cases
result = all_data.rolling(
window=window, min_periods=min_periods, center=center
).mean()
expected = (
all_data.rolling(window=window, min_periods=min_periods, center=center)
.sum()
.divide(
all_data.rolling(
window=window, min_periods=min_periods, center=center
).count()
)
)
tm.assert_equal(result, expected.astype("float64"))
def test_rolling_consistency_constant(
consistent_data, rolling_consistency_cases, center
):
window, min_periods = rolling_consistency_cases
count_x = consistent_data.rolling(
window=window, min_periods=min_periods, center=center
).count()
mean_x = consistent_data.rolling(
window=window, min_periods=min_periods, center=center
).mean()
# check that correlation of a series with itself is either 1 or NaN
corr_x_x = consistent_data.rolling(
window=window, min_periods=min_periods, center=center
).corr(consistent_data)
exp = (
consistent_data.max()
if isinstance(consistent_data, Series)
else consistent_data.max().max()
)
# check mean of constant series
expected = consistent_data * np.nan
expected[count_x >= max(min_periods, 1)] = exp
tm.assert_equal(mean_x, expected)
# check correlation of constant series with itself is NaN
expected[:] = np.nan
tm.assert_equal(corr_x_x, expected)
def test_rolling_consistency_var_debiasing_factors(
all_data, rolling_consistency_cases, center
):
window, min_periods = rolling_consistency_cases
# check variance debiasing factors
var_unbiased_x = all_data.rolling(
window=window, min_periods=min_periods, center=center
).var()
var_biased_x = all_data.rolling(
window=window, min_periods=min_periods, center=center
).var(ddof=0)
var_debiasing_factors_x = (
all_data.rolling(window=window, min_periods=min_periods, center=center)
.count()
.divide(
(
all_data.rolling(
window=window, min_periods=min_periods, center=center
).count()
- 1.0
).replace(0.0, np.nan)
)
)
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)

View File

@ -0,0 +1,398 @@
import numpy as np
import pytest
from pandas.errors import (
DataError,
SpecificationError,
)
from pandas import (
DataFrame,
Index,
MultiIndex,
Period,
Series,
Timestamp,
concat,
date_range,
timedelta_range,
)
import pandas._testing as tm
def test_getitem(step):
frame = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
r = frame.rolling(window=5, step=step)
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns)
r = frame.rolling(window=5, step=step)[1]
assert r._selected_obj.name == frame[::step].columns[1]
# technically this is allowed
r = frame.rolling(window=5, step=step)[1, 3]
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
r = frame.rolling(window=5, step=step)[[1, 3]]
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
def test_select_bad_cols():
df = DataFrame([[1, 2]], columns=["A", "B"])
g = df.rolling(window=5)
with pytest.raises(KeyError, match="Columns not found: 'C'"):
g[["C"]]
with pytest.raises(KeyError, match="^[^A]+$"):
# A should not be referenced as a bad column...
# will have to rethink regex if you change message!
g[["A", "C"]]
def test_attribute_access():
df = DataFrame([[1, 2]], columns=["A", "B"])
r = df.rolling(window=5)
tm.assert_series_equal(r.A.sum(), r["A"].sum())
msg = "'Rolling' object has no attribute 'F'"
with pytest.raises(AttributeError, match=msg):
r.F
def tests_skip_nuisance(step):
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
r = df.rolling(window=3, step=step)
result = r[["A", "B"]].sum()
expected = DataFrame(
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
columns=list("AB"),
)[::step]
tm.assert_frame_equal(result, expected)
def test_sum_object_str_raises(step):
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
r = df.rolling(window=3, step=step)
with pytest.raises(
DataError, match="Cannot aggregate non-numeric type: object|string"
):
# GH#42738, enforced in 2.0
r.sum()
def test_agg(step):
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3, step=step)
a_mean = r["A"].mean()
a_std = r["A"].std()
a_sum = r["A"].sum()
b_mean = r["B"].mean()
b_std = r["B"].std()
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
result = r.aggregate([np.mean, np.std])
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
tm.assert_frame_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
result = r.aggregate({"A": np.mean, "B": np.std})
expected = concat([a_mean, b_std], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)
result = r.aggregate({"A": ["mean", "std"]})
expected = concat([a_mean, a_std], axis=1)
expected.columns = MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
tm.assert_frame_equal(result, expected)
result = r["A"].aggregate(["mean", "sum"])
expected = concat([a_mean, a_sum], axis=1)
expected.columns = ["mean", "sum"]
tm.assert_frame_equal(result, expected)
msg = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
# using a dict with renaming
r.aggregate({"A": {"mean": "mean", "sum": "sum"}})
with pytest.raises(SpecificationError, match=msg):
r.aggregate(
{"A": {"mean": "mean", "sum": "sum"}, "B": {"mean2": "mean", "sum2": "sum"}}
)
result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
expected.columns = MultiIndex.from_tuples(exp_cols)
tm.assert_frame_equal(result, expected, check_like=True)
@pytest.mark.parametrize(
"func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}]
)
def test_multi_axis_1_raises(func):
# GH#46904
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]})
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
r = df.rolling(window=3, axis=1)
with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
r.agg(func)
def test_agg_apply(raw):
# passed lambda
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3)
a_sum = r["A"].sum()
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|std]"):
result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw)
expected = concat([a_sum, rcustom], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)
def test_agg_consistency(step):
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3, step=step)
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
result = r.agg([np.sum, np.mean]).columns
expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
result = r["A"].agg([np.sum, np.mean]).columns
expected = Index(["sum", "mean"])
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
result = r.agg({"A": [np.sum, np.mean]}).columns
expected = MultiIndex.from_tuples([("A", "sum"), ("A", "mean")])
tm.assert_index_equal(result, expected)
def test_agg_nested_dicts():
# API change for disallowing these types of nested dicts
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3)
msg = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
expected = concat(
[r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1
)
expected.columns = MultiIndex.from_tuples(
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
)
with pytest.raises(SpecificationError, match=msg):
r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
with pytest.raises(SpecificationError, match=msg):
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
def test_count_nonnumeric_types(step):
# GH12541
cols = [
"int",
"float",
"string",
"datetime",
"timedelta",
"periods",
"fl_inf",
"fl_nan",
"str_nan",
"dt_nat",
"periods_nat",
]
dt_nat_col = [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None)]
df = DataFrame(
{
"int": [1, 2, 3],
"float": [4.0, 5.0, 6.0],
"string": list("abc"),
"datetime": date_range("20170101", periods=3),
"timedelta": timedelta_range("1 s", periods=3, freq="s"),
"periods": [
Period("2012-01"),
Period("2012-02"),
Period("2012-03"),
],
"fl_inf": [1.0, 2.0, np.inf],
"fl_nan": [1.0, 2.0, np.nan],
"str_nan": ["aa", "bb", np.nan],
"dt_nat": dt_nat_col,
"periods_nat": [
Period("2012-01"),
Period("2012-02"),
Period(None),
],
},
columns=cols,
)
expected = DataFrame(
{
"int": [1.0, 2.0, 2.0],
"float": [1.0, 2.0, 2.0],
"string": [1.0, 2.0, 2.0],
"datetime": [1.0, 2.0, 2.0],
"timedelta": [1.0, 2.0, 2.0],
"periods": [1.0, 2.0, 2.0],
"fl_inf": [1.0, 2.0, 2.0],
"fl_nan": [1.0, 2.0, 1.0],
"str_nan": [1.0, 2.0, 1.0],
"dt_nat": [1.0, 2.0, 1.0],
"periods_nat": [1.0, 2.0, 1.0],
},
columns=cols,
)[::step]
result = df.rolling(window=2, min_periods=0, step=step).count()
tm.assert_frame_equal(result, expected)
result = df.rolling(1, min_periods=0, step=step).count()
expected = df.notna().astype(float)[::step]
tm.assert_frame_equal(result, expected)
def test_preserve_metadata():
# GH 10565
s = Series(np.arange(100), name="foo")
s2 = s.rolling(30).sum()
s3 = s.rolling(20).sum()
assert s2.name == "foo"
assert s3.name == "foo"
@pytest.mark.parametrize(
"func,window_size,expected_vals",
[
(
"rolling",
2,
[
[np.nan, np.nan, np.nan, np.nan],
[15.0, 20.0, 25.0, 20.0],
[25.0, 30.0, 35.0, 30.0],
[np.nan, np.nan, np.nan, np.nan],
[20.0, 30.0, 35.0, 30.0],
[35.0, 40.0, 60.0, 40.0],
[60.0, 80.0, 85.0, 80],
],
),
(
"expanding",
None,
[
[10.0, 10.0, 20.0, 20.0],
[15.0, 20.0, 25.0, 20.0],
[20.0, 30.0, 30.0, 20.0],
[10.0, 10.0, 30.0, 30.0],
[20.0, 30.0, 35.0, 30.0],
[26.666667, 40.0, 50.0, 30.0],
[40.0, 80.0, 60.0, 30.0],
],
),
],
)
def test_multiple_agg_funcs(func, window_size, expected_vals):
# GH 15072
df = DataFrame(
[
["A", 10, 20],
["A", 20, 30],
["A", 30, 40],
["B", 10, 30],
["B", 30, 40],
["B", 40, 80],
["B", 80, 90],
],
columns=["stock", "low", "high"],
)
f = getattr(df.groupby("stock"), func)
if window_size:
window = f(window_size)
else:
window = f()
index = MultiIndex.from_tuples(
[("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
names=["stock", None],
)
columns = MultiIndex.from_tuples(
[("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
)
expected = DataFrame(expected_vals, index=index, columns=columns)
result = window.agg({"low": ["mean", "max"], "high": ["mean", "min"]})
tm.assert_frame_equal(result, expected)
def test_dont_modify_attributes_after_methods(
arithmetic_win_operators, closed, center, min_periods, step
):
# GH 39554
roll_obj = Series(range(1)).rolling(
1, center=center, closed=closed, min_periods=min_periods, step=step
)
expected = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
getattr(roll_obj, arithmetic_win_operators)()
result = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
assert result == expected
def test_centered_axis_validation(step):
# ok
msg = "The 'axis' keyword in Series.rolling is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean()
# bad axis
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean()
# ok ok
df = DataFrame(np.ones((10, 10)))
msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.rolling(window=3, center=True, axis=0, step=step).mean()
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.rolling(window=3, center=True, axis=1, step=step).mean()
# bad axis
msg = "No axis named 2 for object type DataFrame"
with pytest.raises(ValueError, match=msg):
(df.rolling(window=3, center=True, axis=2, step=step).mean())
def test_rolling_min_min_periods(step):
a = Series([1, 2, 3, 4, 5])
result = a.rolling(window=100, min_periods=1, step=step).min()
expected = Series(np.ones(len(a)))[::step]
tm.assert_series_equal(result, expected)
msg = "min_periods 5 must be <= window 3"
with pytest.raises(ValueError, match=msg):
Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).min()
def test_rolling_max_min_periods(step):
a = Series([1, 2, 3, 4, 5], dtype=np.float64)
result = a.rolling(window=100, min_periods=1, step=step).max()
expected = a[::step]
tm.assert_almost_equal(result, expected)
msg = "min_periods 5 must be <= window 3"
with pytest.raises(ValueError, match=msg):
Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).max()

View File

@ -0,0 +1,328 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
Timestamp,
concat,
date_range,
isna,
notna,
)
import pandas._testing as tm
from pandas.tseries import offsets
# suppress warnings about empty slices, as we are deliberately testing
# with a 0-length Series
pytestmark = pytest.mark.filterwarnings(
"ignore:.*(empty slice|0 for slice).*:RuntimeWarning"
)
def f(x):
return x[np.isfinite(x)].mean()
@pytest.mark.parametrize("bad_raw", [None, 1, 0])
def test_rolling_apply_invalid_raw(bad_raw):
with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"):
Series(range(3)).rolling(1).apply(len, raw=bad_raw)
def test_rolling_apply_out_of_bounds(engine_and_raw):
# gh-1850
engine, raw = engine_and_raw
vals = Series([1, 2, 3, 4])
result = vals.rolling(10).apply(np.sum, engine=engine, raw=raw)
assert result.isna().all()
result = vals.rolling(10, min_periods=1).apply(np.sum, engine=engine, raw=raw)
expected = Series([1, 3, 6, 10], dtype=float)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize("window", [2, "2s"])
def test_rolling_apply_with_pandas_objects(window):
# 5071
df = DataFrame(
{
"A": np.random.default_rng(2).standard_normal(5),
"B": np.random.default_rng(2).integers(0, 10, size=5),
},
index=date_range("20130101", periods=5, freq="s"),
)
# we have an equal spaced timeseries index
# so simulate removing the first period
def f(x):
if x.index[0] == df.index[0]:
return np.nan
return x.iloc[-1]
result = df.rolling(window).apply(f, raw=False)
expected = df.iloc[2:].reindex_like(df)
tm.assert_frame_equal(result, expected)
with tm.external_error_raised(AttributeError):
df.rolling(window).apply(f, raw=True)
def test_rolling_apply(engine_and_raw, step):
engine, raw = engine_and_raw
expected = Series([], dtype="float64")
result = expected.rolling(10, step=step).apply(
lambda x: x.mean(), engine=engine, raw=raw
)
tm.assert_series_equal(result, expected)
# gh-8080
s = Series([None, None, None])
result = s.rolling(2, min_periods=0, step=step).apply(
lambda x: len(x), engine=engine, raw=raw
)
expected = Series([1.0, 2.0, 2.0])[::step]
tm.assert_series_equal(result, expected)
result = s.rolling(2, min_periods=0, step=step).apply(len, engine=engine, raw=raw)
tm.assert_series_equal(result, expected)
def test_all_apply(engine_and_raw):
engine, raw = engine_and_raw
df = (
DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
).set_index("A")
* 2
)
er = df.rolling(window=1)
r = df.rolling(window="1s")
result = r.apply(lambda x: 1, engine=engine, raw=raw)
expected = er.apply(lambda x: 1, engine=engine, raw=raw)
tm.assert_frame_equal(result, expected)
def test_ragged_apply(engine_and_raw):
engine, raw = engine_and_raw
df = DataFrame({"B": range(5)})
df.index = [
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
f = lambda x: 1
result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw)
expected = df.copy()
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw)
expected = df.copy()
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw)
expected = df.copy()
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
def test_invalid_engine():
with pytest.raises(ValueError, match="engine must be either 'numba' or 'cython'"):
Series(range(1)).rolling(1).apply(lambda x: x, engine="foo")
def test_invalid_engine_kwargs_cython():
with pytest.raises(ValueError, match="cython engine does not accept engine_kwargs"):
Series(range(1)).rolling(1).apply(
lambda x: x, engine="cython", engine_kwargs={"nopython": False}
)
def test_invalid_raw_numba():
with pytest.raises(
ValueError, match="raw must be `True` when using the numba engine"
):
Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]])
def test_rolling_apply_args_kwargs(args_kwargs):
# GH 33433
def numpysum(x, par):
return np.sum(x + par)
df = DataFrame({"gr": [1, 1], "a": [1, 2]})
idx = Index(["gr", "a"])
expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx)
result = df.rolling(1).apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1])
tm.assert_frame_equal(result, expected)
midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None])
expected = Series([11.0, 12.0], index=midx, name="a")
gb_rolling = df.groupby("gr")["a"].rolling(1)
result = gb_rolling.apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1])
tm.assert_series_equal(result, expected)
def test_nans(raw):
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = obj.rolling(50, min_periods=30).apply(f, raw=raw)
tm.assert_almost_equal(result.iloc[-1], np.mean(obj[10:-10]))
# min_periods is working correctly
result = obj.rolling(20, min_periods=15).apply(f, raw=raw)
assert isna(result.iloc[23])
assert not isna(result.iloc[24])
assert not isna(result.iloc[-6])
assert isna(result.iloc[-5])
obj2 = Series(np.random.default_rng(2).standard_normal(20))
result = obj2.rolling(10, min_periods=5).apply(f, raw=raw)
assert isna(result.iloc[3])
assert notna(result.iloc[4])
result0 = obj.rolling(20, min_periods=0).apply(f, raw=raw)
result1 = obj.rolling(20, min_periods=1).apply(f, raw=raw)
tm.assert_almost_equal(result0, result1)
def test_center(raw):
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = obj.rolling(20, min_periods=15, center=True).apply(f, raw=raw)
expected = (
concat([obj, Series([np.nan] * 9)])
.rolling(20, min_periods=15)
.apply(f, raw=raw)
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)
def test_series(raw, series):
result = series.rolling(50).apply(f, raw=raw)
assert isinstance(result, Series)
tm.assert_almost_equal(result.iloc[-1], np.mean(series[-50:]))
def test_frame(raw, frame):
result = frame.rolling(50).apply(f, raw=raw)
assert isinstance(result, DataFrame)
tm.assert_series_equal(
result.iloc[-1, :],
frame.iloc[-50:, :].apply(np.mean, axis=0, raw=raw),
check_names=False,
)
def test_time_rule_series(raw, series):
win = 25
minp = 10
ser = series[::2].resample("B").mean()
series_result = ser.rolling(window=win, min_periods=minp).apply(f, raw=raw)
last_date = series_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_series = series[::2].truncate(prev_date, last_date)
tm.assert_almost_equal(series_result.iloc[-1], np.mean(trunc_series))
def test_time_rule_frame(raw, frame):
win = 25
minp = 10
frm = frame[::2].resample("B").mean()
frame_result = frm.rolling(window=win, min_periods=minp).apply(f, raw=raw)
last_date = frame_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_frame = frame[::2].truncate(prev_date, last_date)
tm.assert_series_equal(
frame_result.xs(last_date),
trunc_frame.apply(np.mean, raw=raw),
check_names=False,
)
@pytest.mark.parametrize("minp", [0, 99, 100])
def test_min_periods(raw, series, minp, step):
result = series.rolling(len(series) + 1, min_periods=minp, step=step).apply(
f, raw=raw
)
expected = series.rolling(len(series), min_periods=minp, step=step).apply(
f, raw=raw
)
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
nan_mask = ~nan_mask
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
def test_center_reindex_series(raw, series):
# shifter index
s = [f"x{x:d}" for x in range(12)]
minp = 10
series_xp = (
series.reindex(list(series.index) + s)
.rolling(window=25, min_periods=minp)
.apply(f, raw=raw)
.shift(-12)
.reindex(series.index)
)
series_rs = series.rolling(window=25, min_periods=minp, center=True).apply(
f, raw=raw
)
tm.assert_series_equal(series_xp, series_rs)
def test_center_reindex_frame(raw):
# shifter index
frame = DataFrame(range(100), index=date_range("2020-01-01", freq="D", periods=100))
s = [f"x{x:d}" for x in range(12)]
minp = 10
frame_xp = (
frame.reindex(list(frame.index) + s)
.rolling(window=25, min_periods=minp)
.apply(f, raw=raw)
.shift(-12)
.reindex(frame.index)
)
frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw)
tm.assert_frame_equal(frame_xp, frame_rs)
def test_axis1(raw):
# GH 45912
df = DataFrame([1, 2])
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.rolling(window=1, axis=1).apply(np.sum, raw=raw)
expected = DataFrame([1.0, 2.0])
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,519 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
concat,
date_range,
)
import pandas._testing as tm
from pandas.api.indexers import (
BaseIndexer,
FixedForwardWindowIndexer,
)
from pandas.core.indexers.objects import (
ExpandingIndexer,
FixedWindowIndexer,
VariableOffsetWindowIndexer,
)
from pandas.tseries.offsets import BusinessDay
def test_bad_get_window_bounds_signature():
class BadIndexer(BaseIndexer):
def get_window_bounds(self):
return None
indexer = BadIndexer()
with pytest.raises(ValueError, match="BadIndexer does not implement"):
Series(range(5)).rolling(indexer)
def test_expanding_indexer():
s = Series(range(10))
indexer = ExpandingIndexer()
result = s.rolling(indexer).mean()
expected = s.expanding().mean()
tm.assert_series_equal(result, expected)
def test_indexer_constructor_arg():
# Example found in computation.rst
use_expanding = [True, False, True, False, True]
df = DataFrame({"values": range(5)})
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if self.use_expanding[i]:
start[i] = 0
end[i] = i + 1
else:
start[i] = i
end[i] = i + self.window_size
return start, end
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
result = df.rolling(indexer).sum()
expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]})
tm.assert_frame_equal(result, expected)
def test_indexer_accepts_rolling_args():
df = DataFrame({"values": range(5)})
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if (
center
and min_periods == 1
and closed == "both"
and step == 1
and i == 2
):
start[i] = 0
end[i] = num_values
else:
start[i] = i
end[i] = i + self.window_size
return start, end
indexer = CustomIndexer(window_size=1)
result = df.rolling(
indexer, center=True, min_periods=1, closed="both", step=1
).sum()
expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"func,np_func,expected,np_kwargs",
[
("count", len, [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, np.nan], {}),
("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan], {}),
(
"max",
np.max,
[2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan],
{},
),
(
"std",
np.std,
[
1.0,
1.0,
1.0,
55.71654452,
54.85739087,
53.9845657,
1.0,
1.0,
0.70710678,
np.nan,
],
{"ddof": 1},
),
(
"var",
np.var,
[
1.0,
1.0,
1.0,
3104.333333,
3009.333333,
2914.333333,
1.0,
1.0,
0.500000,
np.nan,
],
{"ddof": 1},
),
(
"median",
np.median,
[1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan],
{},
),
],
)
def test_rolling_forward_window(
frame_or_series, func, np_func, expected, np_kwargs, step
):
# GH 32865
values = np.arange(10.0)
values[5] = 100.0
indexer = FixedForwardWindowIndexer(window_size=3)
match = "Forward-looking windows can't have center=True"
with pytest.raises(ValueError, match=match):
rolling = frame_or_series(values).rolling(window=indexer, center=True)
getattr(rolling, func)()
match = "Forward-looking windows don't support setting the closed argument"
with pytest.raises(ValueError, match=match):
rolling = frame_or_series(values).rolling(window=indexer, closed="right")
getattr(rolling, func)()
rolling = frame_or_series(values).rolling(window=indexer, min_periods=2, step=step)
result = getattr(rolling, func)()
# Check that the function output matches the explicitly provided array
expected = frame_or_series(expected)[::step]
tm.assert_equal(result, expected)
# Check that the rolling function output matches applying an alternative
# function to the rolling window object
expected2 = frame_or_series(rolling.apply(lambda x: np_func(x, **np_kwargs)))
tm.assert_equal(result, expected2)
# Check that the function output matches applying an alternative function
# if min_periods isn't specified
# GH 39604: After count-min_periods deprecation, apply(lambda x: len(x))
# is equivalent to count after setting min_periods=0
min_periods = 0 if func == "count" else None
rolling3 = frame_or_series(values).rolling(window=indexer, min_periods=min_periods)
result3 = getattr(rolling3, func)()
expected3 = frame_or_series(rolling3.apply(lambda x: np_func(x, **np_kwargs)))
tm.assert_equal(result3, expected3)
def test_rolling_forward_skewness(frame_or_series, step):
values = np.arange(10.0)
values[5] = 100.0
indexer = FixedForwardWindowIndexer(window_size=5)
rolling = frame_or_series(values).rolling(window=indexer, min_periods=3, step=step)
result = rolling.skew()
expected = frame_or_series(
[
0.0,
2.232396,
2.229508,
2.228340,
2.229091,
2.231989,
0.0,
0.0,
np.nan,
np.nan,
]
)[::step]
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"func,expected",
[
("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan]),
(
"corr",
[
1.0,
1.0,
1.0,
0.8704775290207161,
0.018229084250926637,
-0.861357304646493,
1.0,
1.0,
np.nan,
np.nan,
],
),
],
)
def test_rolling_forward_cov_corr(func, expected):
values1 = np.arange(10).reshape(-1, 1)
values2 = values1 * 2
values1[5, 0] = 100
values = np.concatenate([values1, values2], axis=1)
indexer = FixedForwardWindowIndexer(window_size=3)
rolling = DataFrame(values).rolling(window=indexer, min_periods=3)
# We are interested in checking only pairwise covariance / correlation
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
result = result.reset_index(drop=True)
expected = Series(expected).reset_index(drop=True)
expected.name = result.name
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"closed,expected_data",
[
["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]],
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
],
)
def test_non_fixed_variable_window_indexer(closed, expected_data):
index = date_range("2020", periods=10)
df = DataFrame(range(10), index=index)
offset = BusinessDay(1)
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
result = df.rolling(indexer, closed=closed).sum()
expected = DataFrame(expected_data, index=index)
tm.assert_frame_equal(result, expected)
def test_variableoffsetwindowindexer_not_dti():
# GH 54379
with pytest.raises(ValueError, match="index must be a DatetimeIndex."):
VariableOffsetWindowIndexer(index="foo", offset=BusinessDay(1))
def test_variableoffsetwindowindexer_not_offset():
# GH 54379
idx = date_range("2020", periods=10)
with pytest.raises(ValueError, match="offset must be a DateOffset-like object."):
VariableOffsetWindowIndexer(index=idx, offset="foo")
def test_fixed_forward_indexer_count(step):
# GH: 35579
df = DataFrame({"b": [None, None, None, 7]})
indexer = FixedForwardWindowIndexer(window_size=2)
result = df.rolling(window=indexer, min_periods=0, step=step).count()
expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})[::step]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
("end_value", "values"), [(1, [0.0, 1, 1, 3, 2]), (-1, [0.0, 1, 0, 3, 1])]
)
@pytest.mark.parametrize(("func", "args"), [("median", []), ("quantile", [0.5])])
def test_indexer_quantile_sum(end_value, values, func, args):
# GH 37153
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if self.use_expanding[i]:
start[i] = 0
end[i] = max(i + end_value, 1)
else:
start[i] = i
end[i] = i + self.window_size
return start, end
use_expanding = [True, False, True, False, True]
df = DataFrame({"values": range(5)})
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
result = getattr(df.rolling(indexer), func)(*args)
expected = DataFrame({"values": values})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer]
)
@pytest.mark.parametrize("window_size", [1, 2, 12])
@pytest.mark.parametrize(
"df_data",
[
{"a": [1, 1], "b": [0, 1]},
{"a": [1, 2], "b": [0, 1]},
{"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))},
],
)
def test_indexers_are_reusable_after_groupby_rolling(
indexer_class, window_size, df_data
):
# GH 43267
df = DataFrame(df_data)
num_trials = 3
indexer = indexer_class(window_size=window_size)
original_window_size = indexer.window_size
for i in range(num_trials):
df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
assert indexer.window_size == original_window_size
@pytest.mark.parametrize(
"window_size, num_values, expected_start, expected_end",
[
(1, 1, [0], [1]),
(1, 2, [0, 1], [1, 2]),
(2, 1, [0], [1]),
(2, 2, [0, 1], [2, 2]),
(5, 12, range(12), list(range(5, 12)) + [12] * 5),
(12, 5, range(5), [5] * 5),
(0, 0, np.array([]), np.array([])),
(1, 0, np.array([]), np.array([])),
(0, 1, [0], [0]),
],
)
def test_fixed_forward_indexer_bounds(
window_size, num_values, expected_start, expected_end, step
):
# GH 43267
indexer = FixedForwardWindowIndexer(window_size=window_size)
start, end = indexer.get_window_bounds(num_values=num_values, step=step)
tm.assert_numpy_array_equal(
start, np.array(expected_start[::step]), check_dtype=False
)
tm.assert_numpy_array_equal(end, np.array(expected_end[::step]), check_dtype=False)
assert len(start) == len(end)
@pytest.mark.parametrize(
"df, window_size, expected",
[
(
DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}),
2,
Series(
[0, 1.5, 2.0],
index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]),
name="b",
dtype=np.float64,
),
),
(
DataFrame(
{
"b": [np.nan, 1, 2, np.nan] + list(range(4, 18)),
"a": [1] * 7 + [2] * 11,
"c": range(18),
}
),
12,
Series(
[
3.6,
3.6,
4.25,
5.0,
5.0,
5.5,
6.0,
12.0,
12.5,
13.0,
13.5,
14.0,
14.5,
15.0,
15.5,
16.0,
16.5,
17.0,
],
index=MultiIndex.from_arrays(
[[1] * 7 + [2] * 11, range(18)], names=["a", None]
),
name="b",
dtype=np.float64,
),
),
],
)
def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected):
# GH 43267
indexer = FixedForwardWindowIndexer(window_size=window_size)
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"group_keys",
[
(1,),
(1, 2),
(2, 1),
(1, 1, 2),
(1, 2, 1),
(1, 1, 2, 2),
(1, 2, 3, 2, 3),
(1, 1, 2) * 4,
(1, 2, 3) * 5,
],
)
@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20])
def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
# GH 43267
df = DataFrame(
{
"a": np.array(list(group_keys)),
"b": np.arange(len(group_keys), dtype=np.float64) + 17,
"c": np.arange(len(group_keys), dtype=np.int64),
}
)
indexer = FixedForwardWindowIndexer(window_size=window_size)
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum()
result.index.names = ["a", "c"]
groups = df.groupby("a")[["a", "b", "c"]]
manual = concat(
[
g.assign(
b=[
g["b"].iloc[i : i + window_size].sum(min_count=1)
for i in range(len(g))
]
)
for _, g in groups
]
)
manual = manual.set_index(["a", "c"])["b"]
tm.assert_series_equal(result, manual)
def test_unequal_start_end_bounds():
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed, step):
return np.array([1]), np.array([1, 2])
indexer = CustomIndexer()
roll = Series(1).rolling(indexer)
match = "start"
with pytest.raises(ValueError, match=match):
roll.mean()
with pytest.raises(ValueError, match=match):
next(iter(roll))
with pytest.raises(ValueError, match=match):
roll.corr(pairwise=True)
with pytest.raises(ValueError, match=match):
roll.cov(pairwise=True)
def test_unequal_bounds_to_object():
# GH 44470
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed, step):
return np.array([1]), np.array([2])
indexer = CustomIndexer()
roll = Series([1, 1]).rolling(indexer)
match = "start and end"
with pytest.raises(ValueError, match=match):
roll.mean()
with pytest.raises(ValueError, match=match):
next(iter(roll))
with pytest.raises(ValueError, match=match):
roll.corr(pairwise=True)
with pytest.raises(ValueError, match=match):
roll.cov(pairwise=True)

View File

@ -0,0 +1,111 @@
from functools import partial
import sys
import numpy as np
import pytest
import pandas._libs.window.aggregations as window_aggregations
from pandas import Series
import pandas._testing as tm
def _get_rolling_aggregations():
# list pairs of name and function
# each function has this signature:
# (const float64_t[:] values, ndarray[int64_t] start,
# ndarray[int64_t] end, int64_t minp) -> np.ndarray
named_roll_aggs = (
[
("roll_sum", window_aggregations.roll_sum),
("roll_mean", window_aggregations.roll_mean),
]
+ [
(f"roll_var({ddof})", partial(window_aggregations.roll_var, ddof=ddof))
for ddof in [0, 1]
]
+ [
("roll_skew", window_aggregations.roll_skew),
("roll_kurt", window_aggregations.roll_kurt),
("roll_median_c", window_aggregations.roll_median_c),
("roll_max", window_aggregations.roll_max),
("roll_min", window_aggregations.roll_min),
]
+ [
(
f"roll_quantile({quantile},{interpolation})",
partial(
window_aggregations.roll_quantile,
quantile=quantile,
interpolation=interpolation,
),
)
for quantile in [0.0001, 0.5, 0.9999]
for interpolation in window_aggregations.interpolation_types
]
+ [
(
f"roll_rank({percentile},{method},{ascending})",
partial(
window_aggregations.roll_rank,
percentile=percentile,
method=method,
ascending=ascending,
),
)
for percentile in [True, False]
for method in window_aggregations.rolling_rank_tiebreakers.keys()
for ascending in [True, False]
]
)
# unzip to a list of 2 tuples, names and functions
unzipped = list(zip(*named_roll_aggs))
return {"ids": unzipped[0], "params": unzipped[1]}
_rolling_aggregations = _get_rolling_aggregations()
@pytest.fixture(
params=_rolling_aggregations["params"], ids=_rolling_aggregations["ids"]
)
def rolling_aggregation(request):
"""Make a rolling aggregation function as fixture."""
return request.param
def test_rolling_aggregation_boundary_consistency(rolling_aggregation):
# GH-45647
minp, step, width, size, selection = 0, 1, 3, 11, [2, 7]
values = np.arange(1, 1 + size, dtype=np.float64)
end = np.arange(width, size, step, dtype=np.int64)
start = end - width
selarr = np.array(selection, dtype=np.int32)
result = Series(rolling_aggregation(values, start[selarr], end[selarr], minp))
expected = Series(rolling_aggregation(values, start, end, minp)[selarr])
tm.assert_equal(expected, result)
def test_rolling_aggregation_with_unused_elements(rolling_aggregation):
# GH-45647
minp, width = 0, 5 # width at least 4 for kurt
size = 2 * width + 5
values = np.arange(1, size + 1, dtype=np.float64)
values[width : width + 2] = sys.float_info.min
values[width + 2] = np.nan
values[width + 3 : width + 5] = sys.float_info.max
start = np.array([0, size - width], dtype=np.int64)
end = np.array([width, size], dtype=np.int64)
loc = np.array(
[j for i in range(len(start)) for j in range(start[i], end[i])],
dtype=np.int32,
)
result = Series(rolling_aggregation(values, start, end, minp))
compact_values = np.array(values[loc], dtype=np.float64)
compact_start = np.arange(0, len(start) * width, width, dtype=np.int64)
compact_end = compact_start + width
expected = Series(
rolling_aggregation(compact_values, compact_start, compact_end, minp)
)
assert np.isfinite(expected.values).all(), "Not all expected values are finite"
tm.assert_equal(expected, result)

View File

@ -0,0 +1,173 @@
import numpy as np
import pytest
from pandas.errors import DataError
from pandas.core.dtypes.common import pandas_dtype
from pandas import (
NA,
DataFrame,
Series,
)
import pandas._testing as tm
# gh-12373 : rolling functions error on float32 data
# make sure rolling functions works for different dtypes
#
# further note that we are only checking rolling for fully dtype
# compliance (though both expanding and ewm inherit)
def get_dtype(dtype, coerce_int=None):
if coerce_int is False and "int" in dtype:
return None
return pandas_dtype(dtype)
@pytest.fixture(
params=[
"object",
"category",
"int8",
"int16",
"int32",
"int64",
"uint8",
"uint16",
"uint32",
"uint64",
"float16",
"float32",
"float64",
"m8[ns]",
"M8[ns]",
"datetime64[ns, UTC]",
]
)
def dtypes(request):
"""Dtypes for window tests"""
return request.param
@pytest.mark.parametrize(
"method, data, expected_data, coerce_int, min_periods",
[
("count", np.arange(5), [1, 2, 2, 2, 2], True, 0),
("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0),
("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0),
("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None),
("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None),
("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None),
("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None),
("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None),
("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None),
("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None),
("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None),
("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None),
("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None),
("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None),
("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None),
(
"std",
[0, 1, 2, np.nan, 4],
[np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2,
False,
None,
),
("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None),
("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None),
("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None),
("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
(
"median",
[0, 1, 2, np.nan, 4],
[np.nan, 0.5, 1.5, np.nan, np.nan],
False,
None,
),
],
)
def test_series_dtypes(
method, data, expected_data, coerce_int, dtypes, min_periods, step
):
ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
rolled = ser.rolling(2, min_periods=min_periods, step=step)
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
msg = "No numeric types to aggregate"
with pytest.raises(DataError, match=msg):
getattr(rolled, method)()
else:
result = getattr(rolled, method)()
expected = Series(expected_data, dtype="float64")[::step]
tm.assert_almost_equal(result, expected)
def test_series_nullable_int(any_signed_int_ea_dtype, step):
# GH 43016
ser = Series([0, 1, NA], dtype=any_signed_int_ea_dtype)
result = ser.rolling(2, step=step).mean()
expected = Series([np.nan, 0.5, np.nan])[::step]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"method, expected_data, min_periods",
[
("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0),
(
"max",
{0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
None,
),
(
"min",
{0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
None,
),
(
"sum",
{0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])},
None,
),
(
"mean",
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
None,
),
(
"std",
{
0: Series([np.nan] + [np.sqrt(2)] * 4),
1: Series([np.nan] + [np.sqrt(2)] * 4),
},
None,
),
(
"var",
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
None,
),
(
"median",
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
None,
),
],
)
def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step):
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
rolled = df.rolling(2, min_periods=min_periods, step=step)
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
msg = "Cannot aggregate non-numeric type"
with pytest.raises(DataError, match=msg):
getattr(rolled, method)()
else:
result = getattr(rolled, method)()
expected = DataFrame(expected_data, dtype="float64")[::step]
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,727 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Series,
date_range,
)
import pandas._testing as tm
def test_doc_string():
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
df
df.ewm(com=0.5).mean()
def test_constructor(frame_or_series):
c = frame_or_series(range(5)).ewm
# valid
c(com=0.5)
c(span=1.5)
c(alpha=0.5)
c(halflife=0.75)
c(com=0.5, span=None)
c(alpha=0.5, com=None)
c(halflife=0.75, alpha=None)
# not valid: mutually exclusive
msg = "comass, span, halflife, and alpha are mutually exclusive"
with pytest.raises(ValueError, match=msg):
c(com=0.5, alpha=0.5)
with pytest.raises(ValueError, match=msg):
c(span=1.5, halflife=0.75)
with pytest.raises(ValueError, match=msg):
c(alpha=0.5, span=1.5)
# not valid: com < 0
msg = "comass must satisfy: comass >= 0"
with pytest.raises(ValueError, match=msg):
c(com=-0.5)
# not valid: span < 1
msg = "span must satisfy: span >= 1"
with pytest.raises(ValueError, match=msg):
c(span=0.5)
# not valid: halflife <= 0
msg = "halflife must satisfy: halflife > 0"
with pytest.raises(ValueError, match=msg):
c(halflife=0)
# not valid: alpha <= 0 or alpha > 1
msg = "alpha must satisfy: 0 < alpha <= 1"
for alpha in (-0.5, 1.5):
with pytest.raises(ValueError, match=msg):
c(alpha=alpha)
def test_ewma_times_not_datetime_type():
msg = r"times must be datetime64 dtype."
with pytest.raises(ValueError, match=msg):
Series(range(5)).ewm(times=np.arange(5))
def test_ewma_times_not_same_length():
msg = "times must be the same length as the object."
with pytest.raises(ValueError, match=msg):
Series(range(5)).ewm(times=np.arange(4).astype("datetime64[ns]"))
def test_ewma_halflife_not_correct_type():
msg = "halflife must be a timedelta convertible object"
with pytest.raises(ValueError, match=msg):
Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]"))
def test_ewma_halflife_without_times(halflife_with_times):
msg = "halflife can only be a timedelta convertible argument if times is not None."
with pytest.raises(ValueError, match=msg):
Series(range(5)).ewm(halflife=halflife_with_times)
@pytest.mark.parametrize(
"times",
[
np.arange(10).astype("datetime64[D]").astype("datetime64[ns]"),
date_range("2000", freq="D", periods=10),
date_range("2000", freq="D", periods=10).tz_localize("UTC"),
],
)
@pytest.mark.parametrize("min_periods", [0, 2])
def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
halflife = halflife_with_times
data = np.arange(10.0)
data[::2] = np.nan
df = DataFrame({"A": data})
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
tm.assert_frame_equal(result, expected)
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
tz = tz_aware_fixture
halflife = "23 days"
times = (
DatetimeIndex(["2020-01-01", "2020-01-10T00:04:05", "2020-02-23T05:00:23"])
.tz_localize(tz)
.as_unit(unit)
)
data = np.arange(3)
df = DataFrame(data)
result = df.ewm(halflife=halflife, times=times).mean()
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
tm.assert_frame_equal(result, expected)
def test_ewm_with_nat_raises(halflife_with_times):
# GH#38535
ser = Series(range(1))
times = DatetimeIndex(["NaT"])
with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
def test_ewm_with_times_getitem(halflife_with_times):
# GH 40164
halflife = halflife_with_times
data = np.arange(10.0)
data[::2] = np.nan
times = date_range("2000", freq="D", periods=10)
df = DataFrame({"A": data, "B": data})
result = df.ewm(halflife=halflife, times=times)["A"].mean()
expected = df.ewm(halflife=1.0)["A"].mean()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("arg", ["com", "halflife", "span", "alpha"])
def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na):
# GH 40164
kwargs = {arg: 1, "adjust": adjust, "ignore_na": ignore_na}
ewm = DataFrame({"A": range(1), "B": range(1)}).ewm(**kwargs)
expected = {attr: getattr(ewm, attr) for attr in ewm._attributes}
ewm_slice = ewm["A"]
result = {attr: getattr(ewm, attr) for attr in ewm_slice._attributes}
assert result == expected
def test_ewma_times_adjust_false_raises():
# GH 40098
with pytest.raises(
NotImplementedError, match="times is not supported with adjust=False."
):
Series(range(1)).ewm(
0.1, adjust=False, times=date_range("2000", freq="D", periods=1)
)
@pytest.mark.parametrize(
"func, expected",
[
[
"mean",
DataFrame(
{
0: range(5),
1: range(4, 9),
2: [7.428571, 9, 10.571429, 12.142857, 13.714286],
},
dtype=float,
),
],
[
"std",
DataFrame(
{
0: [np.nan] * 5,
1: [4.242641] * 5,
2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788],
}
),
],
[
"var",
DataFrame(
{
0: [np.nan] * 5,
1: [18.0] * 5,
2: [21.428571, 27, 33.428571, 40.714286, 48.857143],
}
),
],
],
)
def test_float_dtype_ewma(func, expected, float_numpy_dtype):
# GH#42452
df = DataFrame(
{0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype
)
msg = "Support for axis=1 in DataFrame.ewm is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
e = df.ewm(alpha=0.5, axis=1)
result = getattr(e, func)()
tm.assert_frame_equal(result, expected)
def test_times_string_col_raises():
# GH 43265
df = DataFrame(
{"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)}
)
with pytest.raises(ValueError, match="times must be datetime64"):
df.ewm(halflife="1 day", min_periods=0, times="time_col")
def test_ewm_sum_adjust_false_notimplemented():
data = Series(range(1)).ewm(com=1, adjust=False)
with pytest.raises(NotImplementedError, match="sum is not"):
data.sum()
@pytest.mark.parametrize(
"expected_data, ignore",
[[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]],
)
def test_ewm_sum(expected_data, ignore):
# xref from Numbagg tests
# https://github.com/numbagg/numbagg/blob/v0.2.1/numbagg/test/test_moving.py#L50
data = Series([10, 0, np.nan, 10])
result = data.ewm(alpha=0.5, ignore_na=ignore).sum()
expected = Series(expected_data)
tm.assert_series_equal(result, expected)
def test_ewma_adjust():
vals = Series(np.zeros(1000))
vals[5] = 1
result = vals.ewm(span=100, adjust=False).mean().sum()
assert np.abs(result - 1) < 1e-2
def test_ewma_cases(adjust, ignore_na):
# try adjust/ignore_na args matrix
s = Series([1.0, 2.0, 4.0, 8.0])
if adjust:
expected = Series([1.0, 1.6, 2.736842, 4.923077])
else:
expected = Series([1.0, 1.333333, 2.222222, 4.148148])
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
tm.assert_series_equal(result, expected)
def test_ewma_nan_handling():
s = Series([1.0] + [np.nan] * 5 + [1.0])
result = s.ewm(com=5).mean()
tm.assert_series_equal(result, Series([1.0] * len(s)))
s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0])
result = s.ewm(com=5).mean()
tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4))
@pytest.mark.parametrize(
"s, adjust, ignore_na, w",
[
(
Series([np.nan, 1.0, 101.0]),
True,
False,
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
),
(
Series([np.nan, 1.0, 101.0]),
True,
True,
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
),
(
Series([np.nan, 1.0, 101.0]),
False,
False,
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
),
(
Series([np.nan, 1.0, 101.0]),
False,
True,
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
),
(
Series([1.0, np.nan, 101.0]),
True,
False,
[(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0],
),
(
Series([1.0, np.nan, 101.0]),
True,
True,
[(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0],
),
(
Series([1.0, np.nan, 101.0]),
False,
False,
[(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))],
),
(
Series([1.0, np.nan, 101.0]),
False,
True,
[(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))],
),
(
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
True,
False,
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan],
),
(
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
True,
True,
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan],
),
(
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
False,
False,
[
np.nan,
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
np.nan,
np.nan,
(1.0 / (1.0 + 2.0)),
np.nan,
],
),
(
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
False,
True,
[
np.nan,
(1.0 - (1.0 / (1.0 + 2.0))),
np.nan,
np.nan,
(1.0 / (1.0 + 2.0)),
np.nan,
],
),
(
Series([1.0, np.nan, 101.0, 50.0]),
True,
False,
[
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
np.nan,
(1.0 - (1.0 / (1.0 + 2.0))),
1.0,
],
),
(
Series([1.0, np.nan, 101.0, 50.0]),
True,
True,
[
(1.0 - (1.0 / (1.0 + 2.0))) ** 2,
np.nan,
(1.0 - (1.0 / (1.0 + 2.0))),
1.0,
],
),
(
Series([1.0, np.nan, 101.0, 50.0]),
False,
False,
[
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
np.nan,
(1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
(1.0 / (1.0 + 2.0))
* ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))),
],
),
(
Series([1.0, np.nan, 101.0, 50.0]),
False,
True,
[
(1.0 - (1.0 / (1.0 + 2.0))) ** 2,
np.nan,
(1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
(1.0 / (1.0 + 2.0)),
],
),
],
)
def test_ewma_nan_handling_cases(s, adjust, ignore_na, w):
# GH 7603
expected = (s.multiply(w).cumsum() / Series(w).cumsum()).ffill()
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
tm.assert_series_equal(result, expected)
if ignore_na is False:
# check that ignore_na defaults to False
result = s.ewm(com=2.0, adjust=adjust).mean()
tm.assert_series_equal(result, expected)
def test_ewm_alpha():
# GH 10789
arr = np.random.default_rng(2).standard_normal(100)
locs = np.arange(20, 40)
arr[locs] = np.nan
s = Series(arr)
a = s.ewm(alpha=0.61722699889169674).mean()
b = s.ewm(com=0.62014947789973052).mean()
c = s.ewm(span=2.240298955799461).mean()
d = s.ewm(halflife=0.721792864318).mean()
tm.assert_series_equal(a, b)
tm.assert_series_equal(a, c)
tm.assert_series_equal(a, d)
def test_ewm_domain_checks():
# GH 12492
arr = np.random.default_rng(2).standard_normal(100)
locs = np.arange(20, 40)
arr[locs] = np.nan
s = Series(arr)
msg = "comass must satisfy: comass >= 0"
with pytest.raises(ValueError, match=msg):
s.ewm(com=-0.1)
s.ewm(com=0.0)
s.ewm(com=0.1)
msg = "span must satisfy: span >= 1"
with pytest.raises(ValueError, match=msg):
s.ewm(span=-0.1)
with pytest.raises(ValueError, match=msg):
s.ewm(span=0.0)
with pytest.raises(ValueError, match=msg):
s.ewm(span=0.9)
s.ewm(span=1.0)
s.ewm(span=1.1)
msg = "halflife must satisfy: halflife > 0"
with pytest.raises(ValueError, match=msg):
s.ewm(halflife=-0.1)
with pytest.raises(ValueError, match=msg):
s.ewm(halflife=0.0)
s.ewm(halflife=0.1)
msg = "alpha must satisfy: 0 < alpha <= 1"
with pytest.raises(ValueError, match=msg):
s.ewm(alpha=-0.1)
with pytest.raises(ValueError, match=msg):
s.ewm(alpha=0.0)
s.ewm(alpha=0.1)
s.ewm(alpha=1.0)
with pytest.raises(ValueError, match=msg):
s.ewm(alpha=1.1)
@pytest.mark.parametrize("method", ["mean", "std", "var"])
def test_ew_empty_series(method):
vals = Series([], dtype=np.float64)
ewm = vals.ewm(3)
result = getattr(ewm, method)()
tm.assert_almost_equal(result, vals)
@pytest.mark.parametrize("min_periods", [0, 1])
@pytest.mark.parametrize("name", ["mean", "var", "std"])
def test_ew_min_periods(min_periods, name):
# excluding NaNs correctly
arr = np.random.default_rng(2).standard_normal(50)
arr[:10] = np.nan
arr[-10:] = np.nan
s = Series(arr)
# check min_periods
# GH 7898
result = getattr(s.ewm(com=50, min_periods=2), name)()
assert result[:11].isna().all()
assert not result[11:].isna().any()
result = getattr(s.ewm(com=50, min_periods=min_periods), name)()
if name == "mean":
assert result[:10].isna().all()
assert not result[10:].isna().any()
else:
# ewm.std, ewm.var (with bias=False) require at least
# two values
assert result[:11].isna().all()
assert not result[11:].isna().any()
# check series of length 0
result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)()
tm.assert_series_equal(result, Series(dtype="float64"))
# check series of length 1
result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)()
if name == "mean":
tm.assert_series_equal(result, Series([1.0]))
else:
# ewm.std, ewm.var with bias=False require at least
# two values
tm.assert_series_equal(result, Series([np.nan]))
# pass in ints
result2 = getattr(Series(np.arange(50)).ewm(span=10), name)()
assert result2.dtype == np.float64
@pytest.mark.parametrize("name", ["cov", "corr"])
def test_ewm_corr_cov(name):
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
B = A[2:] + np.random.default_rng(2).standard_normal(48)
A[:10] = np.nan
B.iloc[-10:] = np.nan
result = getattr(A.ewm(com=20, min_periods=5), name)(B)
assert np.isnan(result.values[:14]).all()
assert not np.isnan(result.values[14:]).any()
@pytest.mark.parametrize("min_periods", [0, 1, 2])
@pytest.mark.parametrize("name", ["cov", "corr"])
def test_ewm_corr_cov_min_periods(name, min_periods):
# GH 7898
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
B = A[2:] + np.random.default_rng(2).standard_normal(48)
A[:10] = np.nan
B.iloc[-10:] = np.nan
result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
# binary functions (ewmcov, ewmcorr) with bias=False require at
# least two values
assert np.isnan(result.values[:11]).all()
assert not np.isnan(result.values[11:]).any()
# check series of length 0
empty = Series([], dtype=np.float64)
result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty)
tm.assert_series_equal(result, empty)
# check series of length 1
result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)(
Series([1.0])
)
tm.assert_series_equal(result, Series([np.nan]))
@pytest.mark.parametrize("name", ["cov", "corr"])
def test_different_input_array_raise_exception(name):
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
A[:10] = np.nan
msg = "other must be a DataFrame or Series"
# exception raised is Exception
with pytest.raises(ValueError, match=msg):
getattr(A.ewm(com=20, min_periods=5), name)(
np.random.default_rng(2).standard_normal(50)
)
@pytest.mark.parametrize("name", ["var", "std", "mean"])
def test_ewma_series(series, name):
series_result = getattr(series.ewm(com=10), name)()
assert isinstance(series_result, Series)
@pytest.mark.parametrize("name", ["var", "std", "mean"])
def test_ewma_frame(frame, name):
frame_result = getattr(frame.ewm(com=10), name)()
assert isinstance(frame_result, DataFrame)
def test_ewma_span_com_args(series):
A = series.ewm(com=9.5).mean()
B = series.ewm(span=20).mean()
tm.assert_almost_equal(A, B)
msg = "comass, span, halflife, and alpha are mutually exclusive"
with pytest.raises(ValueError, match=msg):
series.ewm(com=9.5, span=20)
msg = "Must pass one of comass, span, halflife, or alpha"
with pytest.raises(ValueError, match=msg):
series.ewm().mean()
def test_ewma_halflife_arg(series):
A = series.ewm(com=13.932726172912965).mean()
B = series.ewm(halflife=10.0).mean()
tm.assert_almost_equal(A, B)
msg = "comass, span, halflife, and alpha are mutually exclusive"
with pytest.raises(ValueError, match=msg):
series.ewm(span=20, halflife=50)
with pytest.raises(ValueError, match=msg):
series.ewm(com=9.5, halflife=50)
with pytest.raises(ValueError, match=msg):
series.ewm(com=9.5, span=20, halflife=50)
msg = "Must pass one of comass, span, halflife, or alpha"
with pytest.raises(ValueError, match=msg):
series.ewm()
def test_ewm_alpha_arg(series):
# GH 10789
s = series
msg = "Must pass one of comass, span, halflife, or alpha"
with pytest.raises(ValueError, match=msg):
s.ewm()
msg = "comass, span, halflife, and alpha are mutually exclusive"
with pytest.raises(ValueError, match=msg):
s.ewm(com=10.0, alpha=0.5)
with pytest.raises(ValueError, match=msg):
s.ewm(span=10.0, alpha=0.5)
with pytest.raises(ValueError, match=msg):
s.ewm(halflife=10.0, alpha=0.5)
@pytest.mark.parametrize("func", ["cov", "corr"])
def test_ewm_pairwise_cov_corr(func, frame):
result = getattr(frame.ewm(span=10, min_periods=5), func)()
result = result.loc[(slice(None), 1), 5]
result.index = result.index.droplevel(1)
expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5])
tm.assert_series_equal(result, expected, check_names=False)
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
# GH#46560
kernel = arithmetic_win_operators
df = DataFrame({"a": [1], "b": 2, "c": 3})
df["c"] = df["c"].astype(object)
ewm = df.ewm(span=2, min_periods=1)
op = getattr(ewm, kernel, None)
if op is not None:
result = op(numeric_only=numeric_only)
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
assert list(expected.columns) == columns
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("kernel", ["corr", "cov"])
@pytest.mark.parametrize("use_arg", [True, False])
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
# GH#46560
df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
df["c"] = df["c"].astype(object)
arg = (df,) if use_arg else ()
ewm = df.ewm(span=2, min_periods=1)
op = getattr(ewm, kernel)
result = op(*arg, numeric_only=numeric_only)
# Compare result to op using float dtypes, dropping c when numeric_only is True
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
df2 = df[columns].astype(float)
arg2 = (df2,) if use_arg else ()
ewm2 = df2.ewm(span=2, min_periods=1)
op2 = getattr(ewm2, kernel)
expected = op2(*arg2, numeric_only=numeric_only)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("dtype", [int, object])
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
# GH#46560
kernel = arithmetic_win_operators
ser = Series([1], dtype=dtype)
ewm = ser.ewm(span=2, min_periods=1)
op = getattr(ewm, kernel, None)
if op is None:
# Nothing to test
pytest.skip("No op to test")
if numeric_only and dtype is object:
msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
with pytest.raises(NotImplementedError, match=msg):
op(numeric_only=numeric_only)
else:
result = op(numeric_only=numeric_only)
expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kernel", ["corr", "cov"])
@pytest.mark.parametrize("use_arg", [True, False])
@pytest.mark.parametrize("dtype", [int, object])
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
# GH#46560
ser = Series([1, 2, 3], dtype=dtype)
arg = (ser,) if use_arg else ()
ewm = ser.ewm(span=2, min_periods=1)
op = getattr(ewm, kernel)
if numeric_only and dtype is object:
msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
with pytest.raises(NotImplementedError, match=msg):
op(*arg, numeric_only=numeric_only)
else:
result = op(*arg, numeric_only=numeric_only)
ser2 = ser.astype(float)
arg2 = (ser2,) if use_arg else ()
ewm2 = ser2.ewm(span=2, min_periods=1)
op2 = getattr(ewm2, kernel)
expected = op2(*arg2, numeric_only=numeric_only)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,723 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
isna,
notna,
)
import pandas._testing as tm
def test_doc_string():
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
df
df.expanding(2).sum()
def test_constructor(frame_or_series):
# GH 12669
c = frame_or_series(range(5)).expanding
# valid
c(min_periods=1)
@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
def test_constructor_invalid(frame_or_series, w):
# not valid
c = frame_or_series(range(5)).expanding
msg = "min_periods must be an integer"
with pytest.raises(ValueError, match=msg):
c(min_periods=w)
@pytest.mark.parametrize(
"expander",
[
1,
pytest.param(
"ls",
marks=pytest.mark.xfail(
reason="GH#16425 expanding with offset not supported"
),
),
],
)
def test_empty_df_expanding(expander):
# GH 15819 Verifies that datetime and integer expanding windows can be
# applied to empty DataFrames
expected = DataFrame()
result = DataFrame().expanding(expander).sum()
tm.assert_frame_equal(result, expected)
# Verifies that datetime and integer expanding windows can be applied
# to empty DataFrames with datetime index
expected = DataFrame(index=DatetimeIndex([]))
result = DataFrame(index=DatetimeIndex([])).expanding(expander).sum()
tm.assert_frame_equal(result, expected)
def test_missing_minp_zero():
# https://github.com/pandas-dev/pandas/pull/18921
# minp=0
x = Series([np.nan])
result = x.expanding(min_periods=0).sum()
expected = Series([0.0])
tm.assert_series_equal(result, expected)
# minp=1
result = x.expanding(min_periods=1).sum()
expected = Series([np.nan])
tm.assert_series_equal(result, expected)
def test_expanding_axis(axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
axis = df._get_axis_number(axis_frame)
if axis == 0:
msg = "The 'axis' keyword in DataFrame.expanding is deprecated"
expected = DataFrame(
{i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
)
else:
# axis == 1
msg = "Support for axis=1 in DataFrame.expanding is deprecated"
expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.expanding(3, axis=axis_frame).sum()
tm.assert_frame_equal(result, expected)
def test_expanding_count_with_min_periods(frame_or_series):
# GH 26996
result = frame_or_series(range(5)).expanding(min_periods=3).count()
expected = frame_or_series([np.nan, np.nan, 3.0, 4.0, 5.0])
tm.assert_equal(result, expected)
def test_expanding_count_default_min_periods_with_null_values(frame_or_series):
# GH 26996
values = [1, 2, 3, np.nan, 4, 5, 6]
expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
result = frame_or_series(values).expanding().count()
expected = frame_or_series(expected_counts)
tm.assert_equal(result, expected)
def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_series):
# GH 25857
result = frame_or_series(range(5)).expanding(min_periods=6).count()
expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan])
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"df,expected,min_periods",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 2),
(DataFrame(), [({}, [])], 1),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1.0], "B": [np.nan]}, [0]),
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
3,
),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1.0], "B": [np.nan]}, [0]),
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1.0], "B": [np.nan]}, [0]),
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
1,
),
],
)
def test_iter_expanding_dataframe(df, expected, min_periods):
# GH 11704
expected = [DataFrame(values, index=index) for (values, index) in expected]
for expected, actual in zip(expected, df.expanding(min_periods)):
tm.assert_frame_equal(actual, expected)
@pytest.mark.parametrize(
"ser,expected,min_periods",
[
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
(Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
(Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
(Series([], dtype="int64"), [], 2),
],
)
def test_iter_expanding_series(ser, expected, min_periods):
# GH 11704
expected = [Series(values, index=index) for (values, index) in expected]
for expected, actual in zip(expected, ser.expanding(min_periods)):
tm.assert_series_equal(actual, expected)
def test_center_invalid():
# GH 20647
df = DataFrame()
with pytest.raises(TypeError, match=".* got an unexpected keyword"):
df.expanding(center=True)
def test_expanding_sem(frame_or_series):
# GH: 26476
obj = frame_or_series([0, 1, 2])
result = obj.expanding().sem()
if isinstance(result, DataFrame):
result = Series(result[0].values)
expected = Series([np.nan] + [0.707107] * 2)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["skew", "kurt"])
def test_expanding_skew_kurt_numerical_stability(method):
# GH: 6929
s = Series(np.random.default_rng(2).random(10))
expected = getattr(s.expanding(3), method)()
s = s + 5000
result = getattr(s.expanding(3), method)()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("window", [1, 3, 10, 20])
@pytest.mark.parametrize("method", ["min", "max", "average"])
@pytest.mark.parametrize("pct", [True, False])
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
def test_rank(window, method, pct, ascending, test_data):
length = 20
if test_data == "default":
ser = Series(data=np.random.default_rng(2).random(length))
elif test_data == "duplicates":
ser = Series(data=np.random.default_rng(2).choice(3, length))
elif test_data == "nans":
ser = Series(
data=np.random.default_rng(2).choice(
[1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
)
)
expected = ser.expanding(window).apply(
lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
)
result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending)
tm.assert_series_equal(result, expected)
def test_expanding_corr(series):
A = series.dropna()
B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
result = A.expanding().corr(B)
rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
tm.assert_almost_equal(rolling_result, result)
def test_expanding_count(series):
result = series.expanding(min_periods=0).count()
tm.assert_almost_equal(
result, series.rolling(window=len(series), min_periods=0).count()
)
def test_expanding_quantile(series):
result = series.expanding().quantile(0.5)
rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)
tm.assert_almost_equal(result, rolling_result)
def test_expanding_cov(series):
A = series
B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
result = A.expanding().cov(B)
rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
tm.assert_almost_equal(rolling_result, result)
def test_expanding_cov_pairwise(frame):
result = frame.expanding().cov()
rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()
tm.assert_frame_equal(result, rolling_result)
def test_expanding_corr_pairwise(frame):
result = frame.expanding().corr()
rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
tm.assert_frame_equal(result, rolling_result)
@pytest.mark.parametrize(
"func,static_comp",
[
("sum", np.sum),
("mean", lambda x: np.mean(x, axis=0)),
("max", lambda x: np.max(x, axis=0)),
("min", lambda x: np.min(x, axis=0)),
],
ids=["sum", "mean", "max", "min"],
)
def test_expanding_func(func, static_comp, frame_or_series):
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
msg = "The 'axis' keyword in (Series|DataFrame).expanding is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
obj = data.expanding(min_periods=1, axis=0)
result = getattr(obj, func)()
assert isinstance(result, frame_or_series)
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
warn = None
if frame_or_series is DataFrame and static_comp is np.sum:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
expected = static_comp(data[:11])
if frame_or_series is Series:
tm.assert_almost_equal(result[10], expected)
else:
tm.assert_series_equal(result.iloc[10], expected, check_names=False)
@pytest.mark.parametrize(
"func,static_comp",
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
ids=["sum", "mean", "max", "min"],
)
def test_expanding_min_periods(func, static_comp):
ser = Series(np.random.default_rng(2).standard_normal(50))
msg = "The 'axis' keyword in Series.expanding is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = getattr(ser.expanding(min_periods=30, axis=0), func)()
assert result[:29].isna().all()
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
# min_periods is working correctly
with tm.assert_produces_warning(FutureWarning, match=msg):
result = getattr(ser.expanding(min_periods=15, axis=0), func)()
assert isna(result.iloc[13])
assert notna(result.iloc[14])
ser2 = Series(np.random.default_rng(2).standard_normal(20))
with tm.assert_produces_warning(FutureWarning, match=msg):
result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
assert isna(result[3])
assert notna(result[4])
# min_periods=0
with tm.assert_produces_warning(FutureWarning, match=msg):
result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
with tm.assert_produces_warning(FutureWarning, match=msg):
result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
tm.assert_almost_equal(result0, result1)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = getattr(ser.expanding(min_periods=1, axis=0), func)()
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
def test_expanding_apply(engine_and_raw, frame_or_series):
engine, raw = engine_and_raw
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
result = data.expanding(min_periods=1).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert isinstance(result, frame_or_series)
if frame_or_series is Series:
tm.assert_almost_equal(result[9], np.mean(data[:11], axis=0))
else:
tm.assert_series_equal(
result.iloc[9], np.mean(data[:11], axis=0), check_names=False
)
def test_expanding_min_periods_apply(engine_and_raw):
engine, raw = engine_and_raw
ser = Series(np.random.default_rng(2).standard_normal(50))
result = ser.expanding(min_periods=30).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert result[:29].isna().all()
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
# min_periods is working correctly
result = ser.expanding(min_periods=15).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert isna(result.iloc[13])
assert notna(result.iloc[14])
ser2 = Series(np.random.default_rng(2).standard_normal(20))
result = ser2.expanding(min_periods=5).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
assert isna(result[3])
assert notna(result[4])
# min_periods=0
result0 = ser.expanding(min_periods=0).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
result1 = ser.expanding(min_periods=1).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
tm.assert_almost_equal(result0, result1)
result = ser.expanding(min_periods=1).apply(
lambda x: x.mean(), raw=raw, engine=engine
)
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
@pytest.mark.parametrize(
"f",
[
lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
],
)
def test_moment_functions_zero_length_pairwise(f):
df1 = DataFrame()
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
df2["a"] = df2["a"].astype("float64")
df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
df2_expected = DataFrame(
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
columns=Index(["a"], name="foo"),
dtype="float64",
)
df1_result = f(df1)
tm.assert_frame_equal(df1_result, df1_expected)
df2_result = f(df2)
tm.assert_frame_equal(df2_result, df2_expected)
@pytest.mark.parametrize(
"f",
[
lambda x: x.expanding().count(),
lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
lambda x: x.expanding(min_periods=5).max(),
lambda x: x.expanding(min_periods=5).min(),
lambda x: x.expanding(min_periods=5).sum(),
lambda x: x.expanding(min_periods=5).mean(),
lambda x: x.expanding(min_periods=5).std(),
lambda x: x.expanding(min_periods=5).var(),
lambda x: x.expanding(min_periods=5).skew(),
lambda x: x.expanding(min_periods=5).kurt(),
lambda x: x.expanding(min_periods=5).quantile(0.5),
lambda x: x.expanding(min_periods=5).median(),
lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
],
)
def test_moment_functions_zero_length(f):
# GH 8056
s = Series(dtype=np.float64)
s_expected = s
df1 = DataFrame()
df1_expected = df1
df2 = DataFrame(columns=["a"])
df2["a"] = df2["a"].astype("float64")
df2_expected = df2
s_result = f(s)
tm.assert_series_equal(s_result, s_expected)
df1_result = f(df1)
tm.assert_frame_equal(df1_result, df1_expected)
df2_result = f(df2)
tm.assert_frame_equal(df2_result, df2_expected)
def test_expanding_apply_empty_series(engine_and_raw):
engine, raw = engine_and_raw
ser = Series([], dtype=np.float64)
tm.assert_series_equal(
ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
)
def test_expanding_apply_min_periods_0(engine_and_raw):
# GH 8080
engine, raw = engine_and_raw
s = Series([None, None, None])
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
expected = Series([1.0, 2.0, 3.0])
tm.assert_series_equal(result, expected)
def test_expanding_cov_diff_index():
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.expanding().cov(s2)
expected = Series([None, None, 2.0])
tm.assert_series_equal(result, expected)
s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.expanding().cov(s2a)
tm.assert_series_equal(result, expected)
s1 = Series([7, 8, 10], index=[0, 1, 3])
s2 = Series([7, 9, 10], index=[0, 2, 3])
result = s1.expanding().cov(s2)
expected = Series([None, None, None, 4.5])
tm.assert_series_equal(result, expected)
def test_expanding_corr_diff_index():
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.expanding().corr(s2)
expected = Series([None, None, 1.0])
tm.assert_series_equal(result, expected)
s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.expanding().corr(s2a)
tm.assert_series_equal(result, expected)
s1 = Series([7, 8, 10], index=[0, 1, 3])
s2 = Series([7, 9, 10], index=[0, 2, 3])
result = s1.expanding().corr(s2)
expected = Series([None, None, None, 1.0])
tm.assert_series_equal(result, expected)
def test_expanding_cov_pairwise_diff_length():
# GH 7512
df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
df1a = DataFrame(
[[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
)
df2 = DataFrame(
[[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
)
df2a = DataFrame(
[[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
)
# TODO: xref gh-15826
# .loc is not preserving the names
result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
expected = DataFrame(
[[-3.0, -6.0], [-5.0, -10.0]],
columns=Index(["A", "B"], name="foo"),
index=Index(["X", "Y"], name="foo"),
)
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)
tm.assert_frame_equal(result4, expected)
def test_expanding_corr_pairwise_diff_length():
# GH 7512
df1 = DataFrame(
[[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
)
df1a = DataFrame(
[[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
)
df2 = DataFrame(
[[5, 6], [None, None], [2, 1]],
columns=["X", "Y"],
index=Index(range(3), name="bar"),
)
df2a = DataFrame(
[[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
)
result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
expected = DataFrame(
[[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
)
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)
tm.assert_frame_equal(result4, expected)
def test_expanding_apply_args_kwargs(engine_and_raw):
def mean_w_arg(x, const):
return np.mean(x) + const
engine, raw = engine_and_raw
df = DataFrame(np.random.default_rng(2).random((20, 3)))
expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0
result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
tm.assert_frame_equal(result, expected)
result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
tm.assert_frame_equal(result, expected)
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
# GH#46560
kernel = arithmetic_win_operators
df = DataFrame({"a": [1], "b": 2, "c": 3})
df["c"] = df["c"].astype(object)
expanding = df.expanding()
op = getattr(expanding, kernel, None)
if op is not None:
result = op(numeric_only=numeric_only)
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
assert list(expected.columns) == columns
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("kernel", ["corr", "cov"])
@pytest.mark.parametrize("use_arg", [True, False])
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
# GH#46560
df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
df["c"] = df["c"].astype(object)
arg = (df,) if use_arg else ()
expanding = df.expanding()
op = getattr(expanding, kernel)
result = op(*arg, numeric_only=numeric_only)
# Compare result to op using float dtypes, dropping c when numeric_only is True
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
df2 = df[columns].astype(float)
arg2 = (df2,) if use_arg else ()
expanding2 = df2.expanding()
op2 = getattr(expanding2, kernel)
expected = op2(*arg2, numeric_only=numeric_only)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("dtype", [int, object])
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
# GH#46560
kernel = arithmetic_win_operators
ser = Series([1], dtype=dtype)
expanding = ser.expanding()
op = getattr(expanding, kernel)
if numeric_only and dtype is object:
msg = f"Expanding.{kernel} does not implement numeric_only"
with pytest.raises(NotImplementedError, match=msg):
op(numeric_only=numeric_only)
else:
result = op(numeric_only=numeric_only)
expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kernel", ["corr", "cov"])
@pytest.mark.parametrize("use_arg", [True, False])
@pytest.mark.parametrize("dtype", [int, object])
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
# GH#46560
ser = Series([1, 2, 3], dtype=dtype)
arg = (ser,) if use_arg else ()
expanding = ser.expanding()
op = getattr(expanding, kernel)
if numeric_only and dtype is object:
msg = f"Expanding.{kernel} does not implement numeric_only"
with pytest.raises(NotImplementedError, match=msg):
op(*arg, numeric_only=numeric_only)
else:
result = op(*arg, numeric_only=numeric_only)
ser2 = ser.astype(float)
arg2 = (ser2,) if use_arg else ()
expanding2 = ser2.expanding()
op2 = getattr(expanding2, kernel)
expected = op2(*arg2, numeric_only=numeric_only)
tm.assert_series_equal(result, expected)
def test_keyword_quantile_deprecated():
# GH #52550
ser = Series([1, 2, 3, 4])
with tm.assert_produces_warning(FutureWarning):
ser.expanding().quantile(quantile=0.5)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,455 @@
import numpy as np
import pytest
from pandas.errors import NumbaUtilError
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
Series,
option_context,
to_datetime,
)
import pandas._testing as tm
pytestmark = pytest.mark.single_cpu
@pytest.fixture(params=["single", "table"])
def method(request):
"""method keyword in rolling/expanding/ewm constructor"""
return request.param
@pytest.fixture(
params=[
["sum", {}],
["mean", {}],
["median", {}],
["max", {}],
["min", {}],
["var", {}],
["var", {"ddof": 0}],
["std", {}],
["std", {"ddof": 0}],
]
)
def arithmetic_numba_supported_operators(request):
return request.param
@td.skip_if_no("numba")
@pytest.mark.filterwarnings("ignore")
# Filter warnings when parallel=True and the function can't be parallelized by Numba
class TestEngine:
@pytest.mark.parametrize("jit", [True, False])
def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step):
def f(x, *args):
arg_sum = 0
for arg in args:
arg_sum += arg
return np.mean(x) + arg_sum
if jit:
import numba
f = numba.jit(f)
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
args = (2,)
s = Series(range(10))
result = s.rolling(2, center=center, step=step).apply(
f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True
)
expected = s.rolling(2, center=center, step=step).apply(
f, engine="cython", args=args, raw=True
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
DataFrame(np.eye(5)),
DataFrame(
[
[5, 7, 7, 7, np.nan, np.inf, 4, 3, 3, 3],
[5, 7, 7, 7, np.nan, np.inf, 7, 3, 3, 3],
[np.nan, np.nan, 5, 6, 7, 5, 5, 5, 5, 5],
]
).T,
Series(range(5), name="foo"),
Series([20, 10, 10, np.inf, 1, 1, 2, 3]),
Series([20, 10, 10, np.nan, 10, 1, 2, 3]),
],
)
def test_numba_vs_cython_rolling_methods(
self,
data,
nogil,
parallel,
nopython,
arithmetic_numba_supported_operators,
step,
):
method, kwargs = arithmetic_numba_supported_operators
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
roll = data.rolling(3, step=step)
result = getattr(roll, method)(
engine="numba", engine_kwargs=engine_kwargs, **kwargs
)
expected = getattr(roll, method)(engine="cython", **kwargs)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"data", [DataFrame(np.eye(5)), Series(range(5), name="foo")]
)
def test_numba_vs_cython_expanding_methods(
self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators
):
method, kwargs = arithmetic_numba_supported_operators
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
data = DataFrame(np.eye(5))
expand = data.expanding()
result = getattr(expand, method)(
engine="numba", engine_kwargs=engine_kwargs, **kwargs
)
expected = getattr(expand, method)(engine="cython", **kwargs)
tm.assert_equal(result, expected)
@pytest.mark.parametrize("jit", [True, False])
def test_cache_apply(self, jit, nogil, parallel, nopython, step):
# Test that the functions are cached correctly if we switch functions
def func_1(x):
return np.mean(x) + 4
def func_2(x):
return np.std(x) * 5
if jit:
import numba
func_1 = numba.jit(func_1)
func_2 = numba.jit(func_2)
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
roll = Series(range(10)).rolling(2, step=step)
result = roll.apply(
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
)
expected = roll.apply(func_1, engine="cython", raw=True)
tm.assert_series_equal(result, expected)
result = roll.apply(
func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True
)
expected = roll.apply(func_2, engine="cython", raw=True)
tm.assert_series_equal(result, expected)
# This run should use the cached func_1
result = roll.apply(
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
)
expected = roll.apply(func_1, engine="cython", raw=True)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"window,window_kwargs",
[
["rolling", {"window": 3, "min_periods": 0}],
["expanding", {}],
],
)
def test_dont_cache_args(
self, window, window_kwargs, nogil, parallel, nopython, method
):
# GH 42287
def add(values, x):
return np.sum(values) + x
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
df = DataFrame({"value": [0, 0, 0]})
result = getattr(df, window)(method=method, **window_kwargs).apply(
add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(1,)
)
expected = DataFrame({"value": [1.0, 1.0, 1.0]})
tm.assert_frame_equal(result, expected)
result = getattr(df, window)(method=method, **window_kwargs).apply(
add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(2,)
)
expected = DataFrame({"value": [2.0, 2.0, 2.0]})
tm.assert_frame_equal(result, expected)
def test_dont_cache_engine_kwargs(self):
# If the user passes a different set of engine_kwargs don't return the same
# jitted function
nogil = False
parallel = True
nopython = True
def func(x):
return nogil + parallel + nopython
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
df = DataFrame({"value": [0, 0, 0]})
result = df.rolling(1).apply(
func, raw=True, engine="numba", engine_kwargs=engine_kwargs
)
expected = DataFrame({"value": [2.0, 2.0, 2.0]})
tm.assert_frame_equal(result, expected)
parallel = False
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
result = df.rolling(1).apply(
func, raw=True, engine="numba", engine_kwargs=engine_kwargs
)
expected = DataFrame({"value": [1.0, 1.0, 1.0]})
tm.assert_frame_equal(result, expected)
@td.skip_if_no("numba")
class TestEWM:
@pytest.mark.parametrize(
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
)
@pytest.mark.parametrize("method", ["mean", "sum"])
def test_invalid_engine(self, grouper, method):
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
with pytest.raises(ValueError, match="engine must be either"):
getattr(grouper(df).ewm(com=1.0), method)(engine="foo")
@pytest.mark.parametrize(
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
)
@pytest.mark.parametrize("method", ["mean", "sum"])
def test_invalid_engine_kwargs(self, grouper, method):
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
with pytest.raises(ValueError, match="cython engine does not"):
getattr(grouper(df).ewm(com=1.0), method)(
engine="cython", engine_kwargs={"nopython": True}
)
@pytest.mark.parametrize("grouper", ["None", "groupby"])
@pytest.mark.parametrize("method", ["mean", "sum"])
def test_cython_vs_numba(
self, grouper, method, nogil, parallel, nopython, ignore_na, adjust
):
df = DataFrame({"B": range(4)})
if grouper == "None":
grouper = lambda x: x
else:
df["A"] = ["a", "b", "a", "b"]
grouper = lambda x: x.groupby("A")
if method == "sum":
adjust = True
ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
expected = getattr(ewm, method)(engine="cython")
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("grouper", ["None", "groupby"])
def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
# GH 40951
df = DataFrame({"B": [0, 0, 1, 1, 2, 2]})
if grouper == "None":
grouper = lambda x: x
else:
grouper = lambda x: x.groupby("A")
df["A"] = ["a", "b", "a", "b", "b", "a"]
halflife = "23 days"
times = to_datetime(
[
"2020-01-01",
"2020-01-01",
"2020-01-02",
"2020-01-10",
"2020-02-23",
"2020-01-03",
]
)
ewm = grouper(df).ewm(
halflife=halflife, adjust=True, ignore_na=ignore_na, times=times
)
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
expected = ewm.mean(engine="cython")
tm.assert_frame_equal(result, expected)
@td.skip_if_no("numba")
def test_use_global_config():
def f(x):
return np.mean(x) + 2
s = Series(range(10))
with option_context("compute.use_numba", True):
result = s.rolling(2).apply(f, engine=None, raw=True)
expected = s.rolling(2).apply(f, engine="numba", raw=True)
tm.assert_series_equal(expected, result)
@td.skip_if_no("numba")
def test_invalid_kwargs_nopython():
with pytest.raises(NumbaUtilError, match="numba does not support kwargs with"):
Series(range(1)).rolling(1).apply(
lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
)
@td.skip_if_no("numba")
@pytest.mark.slow
@pytest.mark.filterwarnings("ignore")
# Filter warnings when parallel=True and the function can't be parallelized by Numba
class TestTableMethod:
def test_table_series_valueerror(self):
def f(x):
return np.sum(x, axis=0) + 1
with pytest.raises(
ValueError, match="method='table' not applicable for Series objects."
):
Series(range(1)).rolling(1, method="table").apply(
f, engine="numba", raw=True
)
def test_table_method_rolling_methods(
self,
axis,
nogil,
parallel,
nopython,
arithmetic_numba_supported_operators,
step,
):
method, kwargs = arithmetic_numba_supported_operators
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
df = DataFrame(np.eye(3))
roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step)
if method in ("var", "std"):
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
getattr(roll_table, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
else:
roll_single = df.rolling(
2, method="single", axis=axis, min_periods=0, step=step
)
result = getattr(roll_table, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
expected = getattr(roll_single, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
tm.assert_frame_equal(result, expected)
def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step):
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
def f(x):
return np.sum(x, axis=0) + 1
df = DataFrame(np.eye(3))
result = df.rolling(
2, method="table", axis=axis, min_periods=0, step=step
).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
expected = df.rolling(
2, method="single", axis=axis, min_periods=0, step=step
).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
tm.assert_frame_equal(result, expected)
def test_table_method_rolling_weighted_mean(self, step):
def weighted_mean(x):
arr = np.ones((1, x.shape[1]))
arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum()
return arr
df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
result = df.rolling(2, method="table", min_periods=0, step=step).apply(
weighted_mean, raw=True, engine="numba"
)
expected = DataFrame(
[
[1.0, 2.0, 1.0],
[1.8, 2.0, 1.0],
[3.333333, 2.333333, 1.0],
[1.555556, 7, 1.0],
]
)[::step]
tm.assert_frame_equal(result, expected)
def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython):
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
def f(x):
return np.sum(x, axis=0) + 1
df = DataFrame(np.eye(3))
result = df.expanding(method="table", axis=axis).apply(
f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
)
expected = df.expanding(method="single", axis=axis).apply(
f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
)
tm.assert_frame_equal(result, expected)
def test_table_method_expanding_methods(
self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators
):
method, kwargs = arithmetic_numba_supported_operators
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
df = DataFrame(np.eye(3))
expand_table = df.expanding(method="table", axis=axis)
if method in ("var", "std"):
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
getattr(expand_table, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
else:
expand_single = df.expanding(method="single", axis=axis)
result = getattr(expand_table, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
expected = getattr(expand_single, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))])
@pytest.mark.parametrize("method", ["mean", "sum"])
def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython):
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
df = DataFrame(data)
result = getattr(df.ewm(com=1, method="table", axis=axis), method)(
engine_kwargs=engine_kwargs, engine="numba"
)
expected = getattr(df.ewm(com=1, method="single", axis=axis), method)(
engine_kwargs=engine_kwargs, engine="numba"
)
tm.assert_frame_equal(result, expected)
@td.skip_if_no("numba")
def test_npfunc_no_warnings():
df = DataFrame({"col1": [1, 2, 3, 4, 5]})
with tm.assert_produces_warning(False):
df.col1.rolling(2).apply(np.prod, raw=True, engine="numba")

View File

@ -0,0 +1,103 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
pytestmark = pytest.mark.single_cpu
pytest.importorskip("numba")
@pytest.mark.filterwarnings("ignore")
# Filter warnings when parallel=True and the function can't be parallelized by Numba
class TestEWM:
def test_invalid_update(self):
df = DataFrame({"a": range(5), "b": range(5)})
online_ewm = df.head(2).ewm(0.5).online()
with pytest.raises(
ValueError,
match="Must call mean with update=None first before passing update",
):
online_ewm.mean(update=df.head(1))
@pytest.mark.slow
@pytest.mark.parametrize(
"obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")]
)
def test_online_vs_non_online_mean(
self, obj, nogil, parallel, nopython, adjust, ignore_na
):
expected = obj.ewm(0.5, adjust=adjust, ignore_na=ignore_na).mean()
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
online_ewm = (
obj.head(2)
.ewm(0.5, adjust=adjust, ignore_na=ignore_na)
.online(engine_kwargs=engine_kwargs)
)
# Test resetting once
for _ in range(2):
result = online_ewm.mean()
tm.assert_equal(result, expected.head(2))
result = online_ewm.mean(update=obj.tail(3))
tm.assert_equal(result, expected.tail(3))
online_ewm.reset()
@pytest.mark.xfail(raises=NotImplementedError)
@pytest.mark.parametrize(
"obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")]
)
def test_update_times_mean(
self, obj, nogil, parallel, nopython, adjust, ignore_na, halflife_with_times
):
times = Series(
np.array(
["2020-01-01", "2020-01-05", "2020-01-07", "2020-01-17", "2020-01-21"],
dtype="datetime64[ns]",
)
)
expected = obj.ewm(
0.5,
adjust=adjust,
ignore_na=ignore_na,
times=times,
halflife=halflife_with_times,
).mean()
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
online_ewm = (
obj.head(2)
.ewm(
0.5,
adjust=adjust,
ignore_na=ignore_na,
times=times.head(2),
halflife=halflife_with_times,
)
.online(engine_kwargs=engine_kwargs)
)
# Test resetting once
for _ in range(2):
result = online_ewm.mean()
tm.assert_equal(result, expected.head(2))
result = online_ewm.mean(update=obj.tail(3), update_times=times.tail(3))
tm.assert_equal(result, expected.tail(3))
online_ewm.reset()
@pytest.mark.parametrize("method", ["aggregate", "std", "corr", "cov", "var"])
def test_ewm_notimplementederror_raises(self, method):
ser = Series(range(10))
kwargs = {}
if method == "aggregate":
kwargs["func"] = lambda x: x
with pytest.raises(NotImplementedError, match=".* is not implemented."):
getattr(ser.ewm(1).online(), method)(**kwargs)

View File

@ -0,0 +1,445 @@
import numpy as np
import pytest
from pandas.compat import IS64
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
date_range,
)
import pandas._testing as tm
from pandas.core.algorithms import safe_sort
@pytest.fixture(
params=[
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]),
DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]),
DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]),
]
)
def pairwise_frames(request):
"""Pairwise frames test_pairwise"""
return request.param
@pytest.fixture
def pairwise_target_frame():
"""Pairwise target frame for test_pairwise"""
return DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1])
@pytest.fixture
def pairwise_other_frame():
"""Pairwise other frame for test_pairwise"""
return DataFrame(
[[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]],
columns=["Y", "Z", "X"],
)
def test_rolling_cov(series):
A = series
B = A + np.random.default_rng(2).standard_normal(len(A))
result = A.rolling(window=50, min_periods=25).cov(B)
tm.assert_almost_equal(result.iloc[-1], np.cov(A[-50:], B[-50:])[0, 1])
def test_rolling_corr(series):
A = series
B = A + np.random.default_rng(2).standard_normal(len(A))
result = A.rolling(window=50, min_periods=25).corr(B)
tm.assert_almost_equal(result.iloc[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
def test_rolling_corr_bias_correction():
# test for correct bias correction
a = Series(
np.arange(20, dtype=np.float64), index=date_range("2020-01-01", periods=20)
)
b = a.copy()
a[:5] = np.nan
b[:10] = np.nan
result = a.rolling(window=len(a), min_periods=1).corr(b)
tm.assert_almost_equal(result.iloc[-1], a.corr(b))
@pytest.mark.parametrize("func", ["cov", "corr"])
def test_rolling_pairwise_cov_corr(func, frame):
result = getattr(frame.rolling(window=10, min_periods=5), func)()
result = result.loc[(slice(None), 1), 5]
result.index = result.index.droplevel(1)
expected = getattr(frame[1].rolling(window=10, min_periods=5), func)(frame[5])
tm.assert_series_equal(result, expected, check_names=False)
@pytest.mark.parametrize("method", ["corr", "cov"])
def test_flex_binary_frame(method, frame):
series = frame[1]
res = getattr(series.rolling(window=10), method)(frame)
res2 = getattr(frame.rolling(window=10), method)(series)
exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x))
tm.assert_frame_equal(res, exp)
tm.assert_frame_equal(res2, exp)
frame2 = frame.copy()
frame2 = DataFrame(
np.random.default_rng(2).standard_normal(frame2.shape),
index=frame2.index,
columns=frame2.columns,
)
res3 = getattr(frame.rolling(window=10), method)(frame2)
exp = DataFrame(
{k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame}
)
tm.assert_frame_equal(res3, exp)
@pytest.mark.parametrize("window", range(7))
def test_rolling_corr_with_zero_variance(window):
# GH 18430
s = Series(np.zeros(20))
other = Series(np.arange(20))
assert s.rolling(window=window).corr(other=other).isna().all()
def test_corr_sanity():
# GH 3155
df = DataFrame(
np.array(
[
[0.87024726, 0.18505595],
[0.64355431, 0.3091617],
[0.92372966, 0.50552513],
[0.00203756, 0.04520709],
[0.84780328, 0.33394331],
[0.78369152, 0.63919667],
]
)
)
res = df[0].rolling(5, center=True).corr(df[1])
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
df = DataFrame(np.random.default_rng(2).random((30, 2)))
res = df[0].rolling(5, center=True).corr(df[1])
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
def test_rolling_cov_diff_length():
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.rolling(window=3, min_periods=2).cov(s2)
expected = Series([None, None, 2.0])
tm.assert_series_equal(result, expected)
s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.rolling(window=3, min_periods=2).cov(s2a)
tm.assert_series_equal(result, expected)
def test_rolling_corr_diff_length():
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.rolling(window=3, min_periods=2).corr(s2)
expected = Series([None, None, 1.0])
tm.assert_series_equal(result, expected)
s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.rolling(window=3, min_periods=2).corr(s2a)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"f",
[
lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)),
lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)),
],
)
def test_rolling_functions_window_non_shrinkage_binary(f):
# corr/cov return a MI DataFrame
df = DataFrame(
[[1, 5], [3, 2], [3, 9], [-1, 0]],
columns=Index(["A", "B"], name="foo"),
index=Index(range(4), name="bar"),
)
df_expected = DataFrame(
columns=Index(["A", "B"], name="foo"),
index=MultiIndex.from_product([df.index, df.columns], names=["bar", "foo"]),
dtype="float64",
)
df_result = f(df)
tm.assert_frame_equal(df_result, df_expected)
@pytest.mark.parametrize(
"f",
[
lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)),
lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)),
],
)
def test_moment_functions_zero_length_pairwise(f):
df1 = DataFrame()
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
df2["a"] = df2["a"].astype("float64")
df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
df2_expected = DataFrame(
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
columns=Index(["a"], name="foo"),
dtype="float64",
)
df1_result = f(df1)
tm.assert_frame_equal(df1_result, df1_expected)
df2_result = f(df2)
tm.assert_frame_equal(df2_result, df2_expected)
class TestPairwise:
# GH 7738
@pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()])
def test_no_flex(self, pairwise_frames, pairwise_target_frame, f):
# DataFrame methods (which do not call flex_binary_moment())
result = f(pairwise_frames)
tm.assert_index_equal(result.index, pairwise_frames.columns)
tm.assert_index_equal(result.columns, pairwise_frames.columns)
expected = f(pairwise_target_frame)
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize(
"f",
[
lambda x: x.expanding().cov(pairwise=True),
lambda x: x.expanding().corr(pairwise=True),
lambda x: x.rolling(window=3).cov(pairwise=True),
lambda x: x.rolling(window=3).corr(pairwise=True),
lambda x: x.ewm(com=3).cov(pairwise=True),
lambda x: x.ewm(com=3).corr(pairwise=True),
],
)
def test_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
# DataFrame with itself, pairwise=True
# note that we may construct the 1st level of the MI
# in a non-monotonic way, so compare accordingly
result = f(pairwise_frames)
tm.assert_index_equal(
result.index.levels[0], pairwise_frames.index, check_names=False
)
tm.assert_index_equal(
safe_sort(result.index.levels[1]),
safe_sort(pairwise_frames.columns.unique()),
)
tm.assert_index_equal(result.columns, pairwise_frames.columns)
expected = f(pairwise_target_frame)
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize(
"f",
[
lambda x: x.expanding().cov(pairwise=False),
lambda x: x.expanding().corr(pairwise=False),
lambda x: x.rolling(window=3).cov(pairwise=False),
lambda x: x.rolling(window=3).corr(pairwise=False),
lambda x: x.ewm(com=3).cov(pairwise=False),
lambda x: x.ewm(com=3).corr(pairwise=False),
],
)
def test_no_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
# DataFrame with itself, pairwise=False
result = f(pairwise_frames)
tm.assert_index_equal(result.index, pairwise_frames.index)
tm.assert_index_equal(result.columns, pairwise_frames.columns)
expected = f(pairwise_target_frame)
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize(
"f",
[
lambda x, y: x.expanding().cov(y, pairwise=True),
lambda x, y: x.expanding().corr(y, pairwise=True),
lambda x, y: x.rolling(window=3).cov(y, pairwise=True),
# TODO: We're missing a flag somewhere in meson
pytest.param(
lambda x, y: x.rolling(window=3).corr(y, pairwise=True),
marks=pytest.mark.xfail(
not IS64, reason="Precision issues on 32 bit", strict=False
),
),
lambda x, y: x.ewm(com=3).cov(y, pairwise=True),
lambda x, y: x.ewm(com=3).corr(y, pairwise=True),
],
)
def test_pairwise_with_other(
self, pairwise_frames, pairwise_target_frame, pairwise_other_frame, f
):
# DataFrame with another DataFrame, pairwise=True
result = f(pairwise_frames, pairwise_other_frame)
tm.assert_index_equal(
result.index.levels[0], pairwise_frames.index, check_names=False
)
tm.assert_index_equal(
safe_sort(result.index.levels[1]),
safe_sort(pairwise_other_frame.columns.unique()),
)
expected = f(pairwise_target_frame, pairwise_other_frame)
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.filterwarnings("ignore:RuntimeWarning")
@pytest.mark.parametrize(
"f",
[
lambda x, y: x.expanding().cov(y, pairwise=False),
lambda x, y: x.expanding().corr(y, pairwise=False),
lambda x, y: x.rolling(window=3).cov(y, pairwise=False),
lambda x, y: x.rolling(window=3).corr(y, pairwise=False),
lambda x, y: x.ewm(com=3).cov(y, pairwise=False),
lambda x, y: x.ewm(com=3).corr(y, pairwise=False),
],
)
def test_no_pairwise_with_other(self, pairwise_frames, pairwise_other_frame, f):
# DataFrame with another DataFrame, pairwise=False
result = (
f(pairwise_frames, pairwise_other_frame)
if pairwise_frames.columns.is_unique
else None
)
if result is not None:
# we can have int and str columns
expected_index = pairwise_frames.index.union(pairwise_other_frame.index)
expected_columns = pairwise_frames.columns.union(
pairwise_other_frame.columns
)
tm.assert_index_equal(result.index, expected_index)
tm.assert_index_equal(result.columns, expected_columns)
else:
with pytest.raises(ValueError, match="'arg1' columns are not unique"):
f(pairwise_frames, pairwise_other_frame)
with pytest.raises(ValueError, match="'arg2' columns are not unique"):
f(pairwise_other_frame, pairwise_frames)
@pytest.mark.parametrize(
"f",
[
lambda x, y: x.expanding().cov(y),
lambda x, y: x.expanding().corr(y),
lambda x, y: x.rolling(window=3).cov(y),
lambda x, y: x.rolling(window=3).corr(y),
lambda x, y: x.ewm(com=3).cov(y),
lambda x, y: x.ewm(com=3).corr(y),
],
)
def test_pairwise_with_series(self, pairwise_frames, pairwise_target_frame, f):
# DataFrame with a Series
result = f(pairwise_frames, Series([1, 1, 3, 8]))
tm.assert_index_equal(result.index, pairwise_frames.index)
tm.assert_index_equal(result.columns, pairwise_frames.columns)
expected = f(pairwise_target_frame, Series([1, 1, 3, 8]))
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
result = f(Series([1, 1, 3, 8]), pairwise_frames)
tm.assert_index_equal(result.index, pairwise_frames.index)
tm.assert_index_equal(result.columns, pairwise_frames.columns)
expected = f(Series([1, 1, 3, 8]), pairwise_target_frame)
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
def test_corr_freq_memory_error(self):
# GH 31789
s = Series(range(5), index=date_range("2020", periods=5))
result = s.rolling("12h").corr(s)
expected = Series([np.nan] * 5, index=date_range("2020", periods=5))
tm.assert_series_equal(result, expected)
def test_cov_mulittindex(self):
# GH 34440
columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")])
index = range(3)
df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns)
result = df.ewm(alpha=0.1).cov()
index = MultiIndex.from_product([range(3), list("ab"), list("xy"), list("AB")])
columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")])
expected = DataFrame(
np.vstack(
(
np.full((8, 8), np.nan),
np.full((8, 8), 32.000000),
np.full((8, 8), 63.881919),
)
),
index=index,
columns=columns,
)
tm.assert_frame_equal(result, expected)
def test_multindex_columns_pairwise_func(self):
# GH 21157
columns = MultiIndex.from_arrays([["M", "N"], ["P", "Q"]], names=["a", "b"])
df = DataFrame(np.ones((5, 2)), columns=columns)
result = df.rolling(3).corr()
expected = DataFrame(
np.nan,
index=MultiIndex.from_arrays(
[
np.repeat(np.arange(5, dtype=np.int64), 2),
["M", "N"] * 5,
["P", "Q"] * 5,
],
names=[None, "a", "b"],
),
columns=columns,
)
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,532 @@
from datetime import datetime
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
DatetimeIndex,
Series,
concat,
isna,
notna,
)
import pandas._testing as tm
from pandas.tseries import offsets
@pytest.mark.parametrize(
"compare_func, roll_func, kwargs",
[
[np.mean, "mean", {}],
[np.nansum, "sum", {}],
[
lambda x: np.isfinite(x).astype(float).sum(),
"count",
{},
],
[np.median, "median", {}],
[np.min, "min", {}],
[np.max, "max", {}],
[lambda x: np.std(x, ddof=1), "std", {}],
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
[lambda x: np.var(x, ddof=1), "var", {}],
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
],
)
def test_series(series, compare_func, roll_func, kwargs, step):
result = getattr(series.rolling(50, step=step), roll_func)(**kwargs)
assert isinstance(result, Series)
end = range(0, len(series), step or 1)[-1] + 1
tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
@pytest.mark.parametrize(
"compare_func, roll_func, kwargs",
[
[np.mean, "mean", {}],
[np.nansum, "sum", {}],
[
lambda x: np.isfinite(x).astype(float).sum(),
"count",
{},
],
[np.median, "median", {}],
[np.min, "min", {}],
[np.max, "max", {}],
[lambda x: np.std(x, ddof=1), "std", {}],
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
[lambda x: np.var(x, ddof=1), "var", {}],
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
],
)
def test_frame(raw, frame, compare_func, roll_func, kwargs, step):
result = getattr(frame.rolling(50, step=step), roll_func)(**kwargs)
assert isinstance(result, DataFrame)
end = range(0, len(frame), step or 1)[-1] + 1
tm.assert_series_equal(
result.iloc[-1, :],
frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
check_names=False,
)
@pytest.mark.parametrize(
"compare_func, roll_func, kwargs, minp",
[
[np.mean, "mean", {}, 10],
[np.nansum, "sum", {}, 10],
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0],
[np.median, "median", {}, 10],
[np.min, "min", {}, 10],
[np.max, "max", {}, 10],
[lambda x: np.std(x, ddof=1), "std", {}, 10],
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10],
[lambda x: np.var(x, ddof=1), "var", {}, 10],
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10],
],
)
def test_time_rule_series(series, compare_func, roll_func, kwargs, minp):
win = 25
ser = series[::2].resample("B").mean()
series_result = getattr(ser.rolling(window=win, min_periods=minp), roll_func)(
**kwargs
)
last_date = series_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_series = series[::2].truncate(prev_date, last_date)
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
@pytest.mark.parametrize(
"compare_func, roll_func, kwargs, minp",
[
[np.mean, "mean", {}, 10],
[np.nansum, "sum", {}, 10],
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0],
[np.median, "median", {}, 10],
[np.min, "min", {}, 10],
[np.max, "max", {}, 10],
[lambda x: np.std(x, ddof=1), "std", {}, 10],
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10],
[lambda x: np.var(x, ddof=1), "var", {}, 10],
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10],
],
)
def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp):
win = 25
frm = frame[::2].resample("B").mean()
frame_result = getattr(frm.rolling(window=win, min_periods=minp), roll_func)(
**kwargs
)
last_date = frame_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_frame = frame[::2].truncate(prev_date, last_date)
tm.assert_series_equal(
frame_result.xs(last_date),
trunc_frame.apply(compare_func, raw=raw),
check_names=False,
)
@pytest.mark.parametrize(
"compare_func, roll_func, kwargs",
[
[np.mean, "mean", {}],
[np.nansum, "sum", {}],
[np.median, "median", {}],
[np.min, "min", {}],
[np.max, "max", {}],
[lambda x: np.std(x, ddof=1), "std", {}],
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
[lambda x: np.var(x, ddof=1), "var", {}],
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
],
)
def test_nans(compare_func, roll_func, kwargs):
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = getattr(obj.rolling(50, min_periods=30), roll_func)(**kwargs)
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
# min_periods is working correctly
result = getattr(obj.rolling(20, min_periods=15), roll_func)(**kwargs)
assert isna(result.iloc[23])
assert not isna(result.iloc[24])
assert not isna(result.iloc[-6])
assert isna(result.iloc[-5])
obj2 = Series(np.random.default_rng(2).standard_normal(20))
result = getattr(obj2.rolling(10, min_periods=5), roll_func)(**kwargs)
assert isna(result.iloc[3])
assert notna(result.iloc[4])
if roll_func != "sum":
result0 = getattr(obj.rolling(20, min_periods=0), roll_func)(**kwargs)
result1 = getattr(obj.rolling(20, min_periods=1), roll_func)(**kwargs)
tm.assert_almost_equal(result0, result1)
def test_nans_count():
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = obj.rolling(50, min_periods=30).count()
tm.assert_almost_equal(
result.iloc[-1], np.isfinite(obj[10:-10]).astype(float).sum()
)
@pytest.mark.parametrize(
"roll_func, kwargs",
[
["mean", {}],
["sum", {}],
["median", {}],
["min", {}],
["max", {}],
["std", {}],
["std", {"ddof": 0}],
["var", {}],
["var", {"ddof": 0}],
],
)
@pytest.mark.parametrize("minp", [0, 99, 100])
def test_min_periods(series, minp, roll_func, kwargs, step):
result = getattr(
series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
)(**kwargs)
expected = getattr(
series.rolling(len(series), min_periods=minp, step=step), roll_func
)(**kwargs)
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
nan_mask = ~nan_mask
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
def test_min_periods_count(series, step):
result = series.rolling(len(series) + 1, min_periods=0, step=step).count()
expected = series.rolling(len(series), min_periods=0, step=step).count()
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
nan_mask = ~nan_mask
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
@pytest.mark.parametrize(
"roll_func, kwargs, minp",
[
["mean", {}, 15],
["sum", {}, 15],
["count", {}, 0],
["median", {}, 15],
["min", {}, 15],
["max", {}, 15],
["std", {}, 15],
["std", {"ddof": 0}, 15],
["var", {}, 15],
["var", {"ddof": 0}, 15],
],
)
def test_center(roll_func, kwargs, minp):
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(
**kwargs
)
expected = (
getattr(
concat([obj, Series([np.nan] * 9)]).rolling(20, min_periods=minp), roll_func
)(**kwargs)
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"roll_func, kwargs, minp, fill_value",
[
["mean", {}, 10, None],
["sum", {}, 10, None],
["count", {}, 0, 0],
["median", {}, 10, None],
["min", {}, 10, None],
["max", {}, 10, None],
["std", {}, 10, None],
["std", {"ddof": 0}, 10, None],
["var", {}, 10, None],
["var", {"ddof": 0}, 10, None],
],
)
def test_center_reindex_series(series, roll_func, kwargs, minp, fill_value):
# shifter index
s = [f"x{x:d}" for x in range(12)]
series_xp = (
getattr(
series.reindex(list(series.index) + s).rolling(window=25, min_periods=minp),
roll_func,
)(**kwargs)
.shift(-12)
.reindex(series.index)
)
series_rs = getattr(
series.rolling(window=25, min_periods=minp, center=True), roll_func
)(**kwargs)
if fill_value is not None:
series_xp = series_xp.fillna(fill_value)
tm.assert_series_equal(series_xp, series_rs)
@pytest.mark.parametrize(
"roll_func, kwargs, minp, fill_value",
[
["mean", {}, 10, None],
["sum", {}, 10, None],
["count", {}, 0, 0],
["median", {}, 10, None],
["min", {}, 10, None],
["max", {}, 10, None],
["std", {}, 10, None],
["std", {"ddof": 0}, 10, None],
["var", {}, 10, None],
["var", {"ddof": 0}, 10, None],
],
)
def test_center_reindex_frame(frame, roll_func, kwargs, minp, fill_value):
# shifter index
s = [f"x{x:d}" for x in range(12)]
frame_xp = (
getattr(
frame.reindex(list(frame.index) + s).rolling(window=25, min_periods=minp),
roll_func,
)(**kwargs)
.shift(-12)
.reindex(frame.index)
)
frame_rs = getattr(
frame.rolling(window=25, min_periods=minp, center=True), roll_func
)(**kwargs)
if fill_value is not None:
frame_xp = frame_xp.fillna(fill_value)
tm.assert_frame_equal(frame_xp, frame_rs)
@pytest.mark.parametrize(
"f",
[
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
lambda x: x.rolling(window=10, min_periods=5).max(),
lambda x: x.rolling(window=10, min_periods=5).min(),
lambda x: x.rolling(window=10, min_periods=5).sum(),
lambda x: x.rolling(window=10, min_periods=5).mean(),
lambda x: x.rolling(window=10, min_periods=5).std(),
lambda x: x.rolling(window=10, min_periods=5).var(),
lambda x: x.rolling(window=10, min_periods=5).skew(),
lambda x: x.rolling(window=10, min_periods=5).kurt(),
lambda x: x.rolling(window=10, min_periods=5).quantile(q=0.5),
lambda x: x.rolling(window=10, min_periods=5).median(),
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False),
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True),
pytest.param(
lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(),
marks=td.skip_if_no("scipy"),
),
],
)
def test_rolling_functions_window_non_shrinkage(f):
# GH 7764
s = Series(range(4))
s_expected = Series(np.nan, index=s.index)
df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=["A", "B"])
df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
s_result = f(s)
tm.assert_series_equal(s_result, s_expected)
df_result = f(df)
tm.assert_frame_equal(df_result, df_expected)
def test_rolling_max_gh6297(step):
"""Replicate result expected in GH #6297"""
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 2 datapoints on one of the days
indices.append(datetime(1975, 1, 3, 6, 0))
series = Series(range(1, 7), index=indices)
# Use floats instead of ints as values
series = series.map(lambda x: float(x))
# Sort chronologically
series = series.sort_index()
expected = Series(
[1.0, 2.0, 6.0, 4.0, 5.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
)[::step]
x = series.resample("D").max().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
def test_rolling_max_resample(step):
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 3 datapoints on last day (4, 10, and 20)
indices.append(datetime(1975, 1, 5, 1))
indices.append(datetime(1975, 1, 5, 2))
series = Series(list(range(5)) + [10, 20], index=indices)
# Use floats instead of ints as values
series = series.map(lambda x: float(x))
# Sort chronologically
series = series.sort_index()
# Default how should be max
expected = Series(
[0.0, 1.0, 2.0, 3.0, 20.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
)[::step]
x = series.resample("D").max().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
# Now specify median (10.0)
expected = Series(
[0.0, 1.0, 2.0, 3.0, 10.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
)[::step]
x = series.resample("D").median().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
# Now specify mean (4+10+20)/3
v = (4.0 + 10.0 + 20.0) / 3.0
expected = Series(
[0.0, 1.0, 2.0, 3.0, v],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
)[::step]
x = series.resample("D").mean().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
def test_rolling_min_resample(step):
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 3 datapoints on last day (4, 10, and 20)
indices.append(datetime(1975, 1, 5, 1))
indices.append(datetime(1975, 1, 5, 2))
series = Series(list(range(5)) + [10, 20], index=indices)
# Use floats instead of ints as values
series = series.map(lambda x: float(x))
# Sort chronologically
series = series.sort_index()
# Default how should be min
expected = Series(
[0.0, 1.0, 2.0, 3.0, 4.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
)[::step]
r = series.resample("D").min().rolling(window=1, step=step)
tm.assert_series_equal(expected, r.min())
def test_rolling_median_resample():
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 3 datapoints on last day (4, 10, and 20)
indices.append(datetime(1975, 1, 5, 1))
indices.append(datetime(1975, 1, 5, 2))
series = Series(list(range(5)) + [10, 20], index=indices)
# Use floats instead of ints as values
series = series.map(lambda x: float(x))
# Sort chronologically
series = series.sort_index()
# Default how should be median
expected = Series(
[0.0, 1.0, 2.0, 3.0, 10],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
)
x = series.resample("D").median().rolling(window=1).median()
tm.assert_series_equal(expected, x)
def test_rolling_median_memory_error():
# GH11722
n = 20000
Series(np.random.default_rng(2).standard_normal(n)).rolling(
window=2, center=False
).median()
Series(np.random.default_rng(2).standard_normal(n)).rolling(
window=2, center=False
).median()
@pytest.mark.parametrize(
"data_type",
[np.dtype(f"f{width}") for width in [4, 8]]
+ [np.dtype(f"{sign}{width}") for width in [1, 2, 4, 8] for sign in "ui"],
)
def test_rolling_min_max_numeric_types(data_type):
# GH12373
# Just testing that these don't throw exceptions and that
# the return type is float64. Other tests will cover quantitative
# correctness
result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).max()
assert result.dtypes[0] == np.dtype("f8")
result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).min()
assert result.dtypes[0] == np.dtype("f8")
@pytest.mark.parametrize(
"f",
[
lambda x: x.rolling(window=10, min_periods=0).count(),
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
lambda x: x.rolling(window=10, min_periods=5).max(),
lambda x: x.rolling(window=10, min_periods=5).min(),
lambda x: x.rolling(window=10, min_periods=5).sum(),
lambda x: x.rolling(window=10, min_periods=5).mean(),
lambda x: x.rolling(window=10, min_periods=5).std(),
lambda x: x.rolling(window=10, min_periods=5).var(),
lambda x: x.rolling(window=10, min_periods=5).skew(),
lambda x: x.rolling(window=10, min_periods=5).kurt(),
lambda x: x.rolling(window=10, min_periods=5).quantile(0.5),
lambda x: x.rolling(window=10, min_periods=5).median(),
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False),
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True),
pytest.param(
lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(),
marks=td.skip_if_no("scipy"),
),
],
)
def test_moment_functions_zero_length(f):
# GH 8056
s = Series(dtype=np.float64)
s_expected = s
df1 = DataFrame()
df1_expected = df1
df2 = DataFrame(columns=["a"])
df2["a"] = df2["a"].astype("float64")
df2_expected = df2
s_result = f(s)
tm.assert_series_equal(s_result, s_expected)
df1_result = f(df1)
tm.assert_frame_equal(df1_result, df1_expected)
df2_result = f(df2)
tm.assert_frame_equal(df2_result, df2_expected)

View File

@ -0,0 +1,182 @@
from functools import partial
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
concat,
isna,
notna,
)
import pandas._testing as tm
from pandas.tseries import offsets
def scoreatpercentile(a, per):
values = np.sort(a, axis=0)
idx = int(per / 1.0 * (values.shape[0] - 1))
if idx == values.shape[0] - 1:
retval = values[-1]
else:
qlow = idx / (values.shape[0] - 1)
qhig = (idx + 1) / (values.shape[0] - 1)
vlow = values[idx]
vhig = values[idx + 1]
retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)
return retval
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_series(series, q, step):
compare_func = partial(scoreatpercentile, per=q)
result = series.rolling(50, step=step).quantile(q)
assert isinstance(result, Series)
end = range(0, len(series), step or 1)[-1] + 1
tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_frame(raw, frame, q, step):
compare_func = partial(scoreatpercentile, per=q)
result = frame.rolling(50, step=step).quantile(q)
assert isinstance(result, DataFrame)
end = range(0, len(frame), step or 1)[-1] + 1
tm.assert_series_equal(
result.iloc[-1, :],
frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
check_names=False,
)
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_time_rule_series(series, q):
compare_func = partial(scoreatpercentile, per=q)
win = 25
ser = series[::2].resample("B").mean()
series_result = ser.rolling(window=win, min_periods=10).quantile(q)
last_date = series_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_series = series[::2].truncate(prev_date, last_date)
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_time_rule_frame(raw, frame, q):
compare_func = partial(scoreatpercentile, per=q)
win = 25
frm = frame[::2].resample("B").mean()
frame_result = frm.rolling(window=win, min_periods=10).quantile(q)
last_date = frame_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_frame = frame[::2].truncate(prev_date, last_date)
tm.assert_series_equal(
frame_result.xs(last_date),
trunc_frame.apply(compare_func, raw=raw),
check_names=False,
)
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_nans(q):
compare_func = partial(scoreatpercentile, per=q)
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = obj.rolling(50, min_periods=30).quantile(q)
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
# min_periods is working correctly
result = obj.rolling(20, min_periods=15).quantile(q)
assert isna(result.iloc[23])
assert not isna(result.iloc[24])
assert not isna(result.iloc[-6])
assert isna(result.iloc[-5])
obj2 = Series(np.random.default_rng(2).standard_normal(20))
result = obj2.rolling(10, min_periods=5).quantile(q)
assert isna(result.iloc[3])
assert notna(result.iloc[4])
result0 = obj.rolling(20, min_periods=0).quantile(q)
result1 = obj.rolling(20, min_periods=1).quantile(q)
tm.assert_almost_equal(result0, result1)
@pytest.mark.parametrize("minp", [0, 99, 100])
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_min_periods(series, minp, q, step):
result = series.rolling(len(series) + 1, min_periods=minp, step=step).quantile(q)
expected = series.rolling(len(series), min_periods=minp, step=step).quantile(q)
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
nan_mask = ~nan_mask
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_center(q):
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = obj.rolling(20, center=True).quantile(q)
expected = (
concat([obj, Series([np.nan] * 9)])
.rolling(20)
.quantile(q)
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_center_reindex_series(series, q):
# shifter index
s = [f"x{x:d}" for x in range(12)]
series_xp = (
series.reindex(list(series.index) + s)
.rolling(window=25)
.quantile(q)
.shift(-12)
.reindex(series.index)
)
series_rs = series.rolling(window=25, center=True).quantile(q)
tm.assert_series_equal(series_xp, series_rs)
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
def test_center_reindex_frame(frame, q):
# shifter index
s = [f"x{x:d}" for x in range(12)]
frame_xp = (
frame.reindex(list(frame.index) + s)
.rolling(window=25)
.quantile(q)
.shift(-12)
.reindex(frame.index)
)
frame_rs = frame.rolling(window=25, center=True).quantile(q)
tm.assert_frame_equal(frame_xp, frame_rs)
def test_keyword_quantile_deprecated():
# GH #52550
s = Series([1, 2, 3, 4])
with tm.assert_produces_warning(FutureWarning):
s.rolling(2).quantile(quantile=0.4)

View File

@ -0,0 +1,227 @@
from functools import partial
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
concat,
isna,
notna,
)
import pandas._testing as tm
from pandas.tseries import offsets
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_series(series, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
result = getattr(series.rolling(50), roll_func)()
assert isinstance(result, Series)
tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_frame(raw, frame, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
result = getattr(frame.rolling(50), roll_func)()
assert isinstance(result, DataFrame)
tm.assert_series_equal(
result.iloc[-1, :],
frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw),
check_names=False,
)
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_time_rule_series(series, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
win = 25
ser = series[::2].resample("B").mean()
series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)()
last_date = series_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_series = series[::2].truncate(prev_date, last_date)
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_time_rule_frame(raw, frame, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
win = 25
frm = frame[::2].resample("B").mean()
frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)()
last_date = frame_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
trunc_frame = frame[::2].truncate(prev_date, last_date)
tm.assert_series_equal(
frame_result.xs(last_date),
trunc_frame.apply(compare_func, raw=raw),
check_names=False,
)
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_nans(sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = getattr(obj.rolling(50, min_periods=30), roll_func)()
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
# min_periods is working correctly
result = getattr(obj.rolling(20, min_periods=15), roll_func)()
assert isna(result.iloc[23])
assert not isna(result.iloc[24])
assert not isna(result.iloc[-6])
assert isna(result.iloc[-5])
obj2 = Series(np.random.default_rng(2).standard_normal(20))
result = getattr(obj2.rolling(10, min_periods=5), roll_func)()
assert isna(result.iloc[3])
assert notna(result.iloc[4])
result0 = getattr(obj.rolling(20, min_periods=0), roll_func)()
result1 = getattr(obj.rolling(20, min_periods=1), roll_func)()
tm.assert_almost_equal(result0, result1)
@pytest.mark.parametrize("minp", [0, 99, 100])
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
def test_min_periods(series, minp, roll_func, step):
result = getattr(
series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
)()
expected = getattr(
series.rolling(len(series), min_periods=minp, step=step), roll_func
)()
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
nan_mask = ~nan_mask
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
def test_center(roll_func):
obj = Series(np.random.default_rng(2).standard_normal(50))
obj[:10] = np.nan
obj[-10:] = np.nan
result = getattr(obj.rolling(20, center=True), roll_func)()
expected = (
getattr(concat([obj, Series([np.nan] * 9)]).rolling(20), roll_func)()
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
def test_center_reindex_series(series, roll_func):
# shifter index
s = [f"x{x:d}" for x in range(12)]
series_xp = (
getattr(
series.reindex(list(series.index) + s).rolling(window=25),
roll_func,
)()
.shift(-12)
.reindex(series.index)
)
series_rs = getattr(series.rolling(window=25, center=True), roll_func)()
tm.assert_series_equal(series_xp, series_rs)
@pytest.mark.slow
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
def test_center_reindex_frame(frame, roll_func):
# shifter index
s = [f"x{x:d}" for x in range(12)]
frame_xp = (
getattr(
frame.reindex(list(frame.index) + s).rolling(window=25),
roll_func,
)()
.shift(-12)
.reindex(frame.index)
)
frame_rs = getattr(frame.rolling(window=25, center=True), roll_func)()
tm.assert_frame_equal(frame_xp, frame_rs)
def test_rolling_skew_edge_cases(step):
expected = Series([np.nan] * 4 + [0.0])[::step]
# yields all NaN (0 variance)
d = Series([1] * 5)
x = d.rolling(window=5, step=step).skew()
# index 4 should be 0 as it contains 5 same obs
tm.assert_series_equal(expected, x)
expected = Series([np.nan] * 5)[::step]
# yields all NaN (window too small)
d = Series(np.random.default_rng(2).standard_normal(5))
x = d.rolling(window=2, step=step).skew()
tm.assert_series_equal(expected, x)
# yields [NaN, NaN, NaN, 0.177994, 1.548824]
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
expected = Series([np.nan, np.nan, np.nan, 0.177994, 1.548824])[::step]
x = d.rolling(window=4, step=step).skew()
tm.assert_series_equal(expected, x)
def test_rolling_kurt_edge_cases(step):
expected = Series([np.nan] * 4 + [-3.0])[::step]
# yields all NaN (0 variance)
d = Series([1] * 5)
x = d.rolling(window=5, step=step).kurt()
tm.assert_series_equal(expected, x)
# yields all NaN (window too small)
expected = Series([np.nan] * 5)[::step]
d = Series(np.random.default_rng(2).standard_normal(5))
x = d.rolling(window=3, step=step).kurt()
tm.assert_series_equal(expected, x)
# yields [NaN, NaN, NaN, 1.224307, 2.671499]
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
expected = Series([np.nan, np.nan, np.nan, 1.224307, 2.671499])[::step]
x = d.rolling(window=4, step=step).kurt()
tm.assert_series_equal(expected, x)
def test_rolling_skew_eq_value_fperr(step):
# #18804 all rolling skew for all equal values should return Nan
# #46717 update: all equal values should return 0 instead of NaN
a = Series([1.1] * 15).rolling(window=10, step=step).skew()
assert (a[a.index >= 9] == 0).all()
assert a[a.index < 9].isna().all()
def test_rolling_kurt_eq_value_fperr(step):
# #18804 all rolling kurt for all equal values should return Nan
# #46717 update: all equal values should return -3 instead of NaN
a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
assert (a[a.index >= 9] == -3).all()
assert a[a.index < 9].isna().all()

View File

@ -0,0 +1,715 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.tseries import offsets
@pytest.fixture
def regular():
return DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
).set_index("A")
@pytest.fixture
def ragged():
df = DataFrame({"B": range(5)})
df.index = [
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
return df
class TestRollingTS:
# rolling time-series friendly
# xref GH13327
def test_doc_string(self):
df = DataFrame(
{"B": [0, 1, 2, np.nan, 4]},
index=[
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
],
)
df
df.rolling("2s").sum()
def test_invalid_window_non_int(self, regular):
# not a valid freq
msg = "passed window foobar is not compatible with a datetimelike index"
with pytest.raises(ValueError, match=msg):
regular.rolling(window="foobar")
# not a datetimelike index
msg = "window must be an integer"
with pytest.raises(ValueError, match=msg):
regular.reset_index().rolling(window="foobar")
@pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)])
def test_invalid_window_nonfixed(self, freq, regular):
# non-fixed freqs
msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
with pytest.raises(ValueError, match=msg):
regular.rolling(window=freq)
@pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"])
def test_valid_window(self, freq, regular):
regular.rolling(window=freq)
@pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])])
def test_invalid_minp(self, minp, regular):
# non-integer min_periods
msg = (
r"local variable 'minp' referenced before assignment|"
"min_periods must be an integer"
)
with pytest.raises(ValueError, match=msg):
regular.rolling(window="1D", min_periods=minp)
def test_on(self, regular):
df = regular
# not a valid column
msg = (
r"invalid on specified as foobar, must be a column "
"\\(of DataFrame\\), an Index or None"
)
with pytest.raises(ValueError, match=msg):
df.rolling(window="2s", on="foobar")
# column is valid
df = df.copy()
df["C"] = date_range("20130101", periods=len(df))
df.rolling(window="2d", on="C").sum()
# invalid columns
msg = "window must be an integer"
with pytest.raises(ValueError, match=msg):
df.rolling(window="2d", on="B")
# ok even though on non-selected
df.rolling(window="2d", on="C").B.sum()
def test_monotonic_on(self):
# on/index must be monotonic
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
)
assert df.A.is_monotonic_increasing
df.rolling("2s", on="A").sum()
df = df.set_index("A")
assert df.index.is_monotonic_increasing
df.rolling("2s").sum()
def test_non_monotonic_on(self):
# GH 19248
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
)
df = df.set_index("A")
non_monotonic_index = df.index.to_list()
non_monotonic_index[0] = non_monotonic_index[3]
df.index = non_monotonic_index
assert not df.index.is_monotonic_increasing
msg = "index values must be monotonic"
with pytest.raises(ValueError, match=msg):
df.rolling("2s").sum()
df = df.reset_index()
msg = (
r"invalid on specified as A, must be a column "
"\\(of DataFrame\\), an Index or None"
)
with pytest.raises(ValueError, match=msg):
df.rolling("2s", on="A").sum()
def test_frame_on(self):
df = DataFrame(
{"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
)
df["A"] = [
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
# we are doing simulating using 'on'
expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
result = df.rolling("2s", on="A").B.sum()
tm.assert_series_equal(result, expected)
# test as a frame
# we should be ignoring the 'on' as an aggregation column
# note that the expected is setting, computing, and resetting
# so the columns need to be switched compared
# to the actual result where they are ordered as in the
# original
expected = (
df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
)
result = df.rolling("2s", on="A")[["B"]].sum()
tm.assert_frame_equal(result, expected)
def test_frame_on2(self, unit):
# using multiple aggregation columns
dti = DatetimeIndex(
[
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
).as_unit(unit)
df = DataFrame(
{
"A": [0, 1, 2, 3, 4],
"B": [0, 1, 2, np.nan, 4],
"C": dti,
},
columns=["A", "C", "B"],
)
expected1 = DataFrame(
{"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
columns=["A", "C", "B"],
)
result = df.rolling("2s", on="C").sum()
expected = expected1
tm.assert_frame_equal(result, expected)
expected = Series([0, 1, 3, np.nan, 4], name="B")
result = df.rolling("2s", on="C").B.sum()
tm.assert_series_equal(result, expected)
expected = expected1[["A", "B", "C"]]
result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
tm.assert_frame_equal(result, expected)
def test_basic_regular(self, regular):
df = regular.copy()
df.index = date_range("20130101", periods=5, freq="D")
expected = df.rolling(window=1, min_periods=1).sum()
result = df.rolling(window="1D").sum()
tm.assert_frame_equal(result, expected)
df.index = date_range("20130101", periods=5, freq="2D")
expected = df.rolling(window=1, min_periods=1).sum()
result = df.rolling(window="2D", min_periods=1).sum()
tm.assert_frame_equal(result, expected)
expected = df.rolling(window=1, min_periods=1).sum()
result = df.rolling(window="2D", min_periods=1).sum()
tm.assert_frame_equal(result, expected)
expected = df.rolling(window=1).sum()
result = df.rolling(window="2D").sum()
tm.assert_frame_equal(result, expected)
def test_min_periods(self, regular):
# compare for min_periods
df = regular
# these slightly different
expected = df.rolling(2, min_periods=1).sum()
result = df.rolling("2s").sum()
tm.assert_frame_equal(result, expected)
expected = df.rolling(2, min_periods=1).sum()
result = df.rolling("2s", min_periods=1).sum()
tm.assert_frame_equal(result, expected)
def test_closed(self, regular, unit):
# xref GH13965
dti = DatetimeIndex(
[
Timestamp("20130101 09:00:01"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:04"),
Timestamp("20130101 09:00:06"),
]
).as_unit(unit)
df = DataFrame(
{"A": [1] * 5},
index=dti,
)
# closed must be 'right', 'left', 'both', 'neither'
msg = "closed must be 'right', 'left', 'both' or 'neither'"
with pytest.raises(ValueError, match=msg):
regular.rolling(window="2s", closed="blabla")
expected = df.copy()
expected["A"] = [1.0, 2, 2, 2, 1]
result = df.rolling("2s", closed="right").sum()
tm.assert_frame_equal(result, expected)
# default should be 'right'
result = df.rolling("2s").sum()
tm.assert_frame_equal(result, expected)
expected = df.copy()
expected["A"] = [1.0, 2, 3, 3, 2]
result = df.rolling("2s", closed="both").sum()
tm.assert_frame_equal(result, expected)
expected = df.copy()
expected["A"] = [np.nan, 1.0, 2, 2, 1]
result = df.rolling("2s", closed="left").sum()
tm.assert_frame_equal(result, expected)
expected = df.copy()
expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
result = df.rolling("2s", closed="neither").sum()
tm.assert_frame_equal(result, expected)
def test_ragged_sum(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 3, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=2).sum()
expected = df.copy()
expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 5, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s").sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 5, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="4s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 6, 9]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="4s", min_periods=3).sum()
expected = df.copy()
expected["B"] = [np.nan, np.nan, 3, 6, 9]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 6, 10]
tm.assert_frame_equal(result, expected)
def test_ragged_mean(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).mean()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).mean()
expected = df.copy()
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)
def test_ragged_median(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).median()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).median()
expected = df.copy()
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)
def test_ragged_quantile(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).quantile(0.5)
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).quantile(0.5)
expected = df.copy()
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)
def test_ragged_std(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).std(ddof=0)
expected = df.copy()
expected["B"] = [0.0] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="1s", min_periods=1).std(ddof=1)
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s", min_periods=1).std(ddof=0)
expected = df.copy()
expected["B"] = [0.0] + [0.5] * 4
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).std(ddof=1)
expected = df.copy()
expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
tm.assert_frame_equal(result, expected)
def test_ragged_var(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).var(ddof=0)
expected = df.copy()
expected["B"] = [0.0] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="1s", min_periods=1).var(ddof=1)
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s", min_periods=1).var(ddof=0)
expected = df.copy()
expected["B"] = [0.0] + [0.25] * 4
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).var(ddof=1)
expected = df.copy()
expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
tm.assert_frame_equal(result, expected)
def test_ragged_skew(self, ragged):
df = ragged
result = df.rolling(window="3s", min_periods=1).skew()
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).skew()
expected = df.copy()
expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
tm.assert_frame_equal(result, expected)
def test_ragged_kurt(self, ragged):
df = ragged
result = df.rolling(window="3s", min_periods=1).kurt()
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).kurt()
expected = df.copy()
expected["B"] = [np.nan] * 4 + [-1.2]
tm.assert_frame_equal(result, expected)
def test_ragged_count(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).count()
expected = df.copy()
expected["B"] = [1.0, 1, 1, 1, 1]
tm.assert_frame_equal(result, expected)
df = ragged
result = df.rolling(window="1s").count()
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).count()
expected = df.copy()
expected["B"] = [1.0, 1, 2, 1, 2]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=2).count()
expected = df.copy()
expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
tm.assert_frame_equal(result, expected)
def test_regular_min(self):
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
).set_index("A")
result = df.rolling("1s").min()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
).set_index("A")
tm.assert_frame_equal(result, expected)
result = df.rolling("2s").min()
expected = df.copy()
expected["B"] = [5.0, 4, 3, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling("5s").min()
expected = df.copy()
expected["B"] = [5.0, 4, 3, 3, 3]
tm.assert_frame_equal(result, expected)
def test_ragged_min(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).min()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).min()
expected = df.copy()
expected["B"] = [0.0, 1, 1, 3, 3]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).min()
expected = df.copy()
expected["B"] = [0.0, 0, 0, 1, 1]
tm.assert_frame_equal(result, expected)
def test_perf_min(self):
N = 10000
dfp = DataFrame(
{"B": np.random.default_rng(2).standard_normal(N)},
index=date_range("20130101", periods=N, freq="s"),
)
expected = dfp.rolling(2, min_periods=1).min()
result = dfp.rolling("2s").min()
assert ((result - expected) < 0.01).all().all()
expected = dfp.rolling(200, min_periods=1).min()
result = dfp.rolling("200s").min()
assert ((result - expected) < 0.01).all().all()
def test_ragged_max(self, ragged):
df = ragged
result = df.rolling(window="1s", min_periods=1).max()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).max()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).max()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"freq, op, result_data",
[
("ms", "min", [0.0] * 10),
("ms", "mean", [0.0] * 9 + [2.0 / 9]),
("ms", "max", [0.0] * 9 + [2.0]),
("s", "min", [0.0] * 10),
("s", "mean", [0.0] * 9 + [2.0 / 9]),
("s", "max", [0.0] * 9 + [2.0]),
("min", "min", [0.0] * 10),
("min", "mean", [0.0] * 9 + [2.0 / 9]),
("min", "max", [0.0] * 9 + [2.0]),
("h", "min", [0.0] * 10),
("h", "mean", [0.0] * 9 + [2.0 / 9]),
("h", "max", [0.0] * 9 + [2.0]),
("D", "min", [0.0] * 10),
("D", "mean", [0.0] * 9 + [2.0 / 9]),
("D", "max", [0.0] * 9 + [2.0]),
],
)
def test_freqs_ops(self, freq, op, result_data):
# GH 21096
index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10)
# Explicit cast to float to avoid implicit cast when setting nan
s = Series(data=0, index=index, dtype="float")
s.iloc[1] = np.nan
s.iloc[-1] = 2
result = getattr(s.rolling(window=f"10{freq}"), op)()
expected = Series(data=result_data, index=index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"f",
[
"sum",
"mean",
"count",
"median",
"std",
"var",
"kurt",
"skew",
"min",
"max",
],
)
def test_all(self, f, regular):
# simple comparison of integer vs time-based windowing
df = regular * 2
er = df.rolling(window=1)
r = df.rolling(window="1s")
result = getattr(r, f)()
expected = getattr(er, f)()
tm.assert_frame_equal(result, expected)
result = r.quantile(0.5)
expected = er.quantile(0.5)
tm.assert_frame_equal(result, expected)
def test_all2(self, arithmetic_win_operators):
f = arithmetic_win_operators
# more sophisticated comparison of integer vs.
# time-based windowing
df = DataFrame(
{"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="h")
)
# in-range data
dft = df.between_time("09:00", "16:00")
r = dft.rolling(window="5h")
result = getattr(r, f)()
# we need to roll the days separately
# to compare with a time-based roll
# finally groupby-apply will return a multi-index
# so we need to drop the day
def agg_by_day(x):
x = x.between_time("09:00", "16:00")
return getattr(x.rolling(5, min_periods=1), f)()
expected = (
df.groupby(df.index.day).apply(agg_by_day).reset_index(level=0, drop=True)
)
tm.assert_frame_equal(result, expected)
def test_rolling_cov_offset(self):
# GH16058
idx = date_range("2017-01-01", periods=24, freq="1h")
ss = Series(np.arange(len(idx)), index=idx)
result = ss.rolling("2h").cov()
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
tm.assert_series_equal(result, expected)
expected2 = ss.rolling(2, min_periods=1).cov()
tm.assert_series_equal(result, expected2)
result = ss.rolling("3h").cov()
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
tm.assert_series_equal(result, expected)
expected2 = ss.rolling(3, min_periods=1).cov()
tm.assert_series_equal(result, expected2)
def test_rolling_on_decreasing_index(self, unit):
# GH-19248, GH-32385
index = DatetimeIndex(
[
Timestamp("20190101 09:00:30"),
Timestamp("20190101 09:00:27"),
Timestamp("20190101 09:00:20"),
Timestamp("20190101 09:00:18"),
Timestamp("20190101 09:00:10"),
]
).as_unit(unit)
df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
result = df.rolling("5s").min()
expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index)
tm.assert_frame_equal(result, expected)
def test_rolling_on_empty(self):
# GH-32385
df = DataFrame({"column": []}, index=[])
result = df.rolling("5s").min()
expected = DataFrame({"column": []}, index=[])
tm.assert_frame_equal(result, expected)
def test_rolling_on_multi_index_level(self):
# GH-15584
df = DataFrame(
{"column": range(6)},
index=MultiIndex.from_product(
[date_range("20190101", periods=3), range(2)], names=["date", "seq"]
),
)
result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
expected = DataFrame(
{"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
def test_nat_axis_error(msg, axis):
idx = [Timestamp("2020"), NaT]
kwargs = {"columns" if axis == 1 else "index": idx}
df = DataFrame(np.eye(2), **kwargs)
warn_msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
if axis == 1:
warn_msg = "Support for axis=1 in DataFrame.rolling is deprecated"
with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
df.rolling("D", axis=axis).mean()
@td.skip_if_no("pyarrow")
def test_arrow_datetime_axis():
# GH 55849
expected = Series(
np.arange(5, dtype=np.float64),
index=Index(
date_range("2020-01-01", periods=5), dtype="timestamp[ns][pyarrow]"
),
)
result = expected.rolling("1D").sum()
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,688 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
Timedelta,
concat,
date_range,
)
import pandas._testing as tm
from pandas.api.indexers import BaseIndexer
@pytest.fixture(
params=[
"triang",
"blackman",
"hamming",
"bartlett",
"bohman",
"blackmanharris",
"nuttall",
"barthann",
]
)
def win_types(request):
return request.param
@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"])
def win_types_special(request):
return request.param
def test_constructor(frame_or_series):
# GH 12669
pytest.importorskip("scipy")
c = frame_or_series(range(5)).rolling
# valid
c(win_type="boxcar", window=2, min_periods=1)
c(win_type="boxcar", window=2, min_periods=1, center=True)
c(win_type="boxcar", window=2, min_periods=1, center=False)
@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
def test_invalid_constructor(frame_or_series, w):
# not valid
pytest.importorskip("scipy")
c = frame_or_series(range(5)).rolling
with pytest.raises(ValueError, match="min_periods must be an integer"):
c(win_type="boxcar", window=2, min_periods=w)
with pytest.raises(ValueError, match="center must be a boolean"):
c(win_type="boxcar", window=2, min_periods=1, center=w)
@pytest.mark.parametrize("wt", ["foobar", 1])
def test_invalid_constructor_wintype(frame_or_series, wt):
pytest.importorskip("scipy")
c = frame_or_series(range(5)).rolling
with pytest.raises(ValueError, match="Invalid win_type"):
c(win_type=wt, window=2)
def test_constructor_with_win_type(frame_or_series, win_types):
# GH 12669
pytest.importorskip("scipy")
c = frame_or_series(range(5)).rolling
c(win_type=win_types, window=2)
@pytest.mark.parametrize("arg", ["median", "kurt", "skew"])
def test_agg_function_support(arg):
pytest.importorskip("scipy")
df = DataFrame({"A": np.arange(5)})
roll = df.rolling(2, win_type="triang")
msg = f"'{arg}' is not a valid function for 'Window' object"
with pytest.raises(AttributeError, match=msg):
roll.agg(arg)
with pytest.raises(AttributeError, match=msg):
roll.agg([arg])
with pytest.raises(AttributeError, match=msg):
roll.agg({"A": arg})
def test_invalid_scipy_arg():
# This error is raised by scipy
pytest.importorskip("scipy")
msg = r"boxcar\(\) got an unexpected"
with pytest.raises(TypeError, match=msg):
Series(range(3)).rolling(1, win_type="boxcar").mean(foo="bar")
def test_constructor_with_win_type_invalid(frame_or_series):
# GH 13383
pytest.importorskip("scipy")
c = frame_or_series(range(5)).rolling
msg = "window must be an integer 0 or greater"
with pytest.raises(ValueError, match=msg):
c(-1, win_type="boxcar")
def test_window_with_args(step):
# make sure that we are aggregating window functions correctly with arg
pytest.importorskip("scipy")
r = Series(np.random.default_rng(2).standard_normal(100)).rolling(
window=10, min_periods=1, win_type="gaussian", step=step
)
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
expected.columns = ["<lambda>", "<lambda>"]
result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)])
tm.assert_frame_equal(result, expected)
def a(x):
return x.mean(std=10)
def b(x):
return x.mean(std=0.01)
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
expected.columns = ["a", "b"]
result = r.aggregate([a, b])
tm.assert_frame_equal(result, expected)
def test_win_type_with_method_invalid():
pytest.importorskip("scipy")
with pytest.raises(
NotImplementedError, match="'single' is the only supported method type."
):
Series(range(1)).rolling(1, win_type="triang", method="table")
@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")])
def test_consistent_win_type_freq(arg):
# GH 15969
pytest.importorskip("scipy")
s = Series(range(1))
with pytest.raises(ValueError, match="Invalid win_type freq"):
s.rolling(arg, win_type="freq")
def test_win_type_freq_return_none():
# GH 48838
freq_roll = Series(range(2), index=date_range("2020", periods=2)).rolling("2s")
assert freq_roll.win_type is None
def test_win_type_not_implemented():
pytest.importorskip("scipy")
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed, step):
return np.array([0, 1]), np.array([1, 2])
df = DataFrame({"values": range(2)})
indexer = CustomIndexer()
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
df.rolling(indexer, win_type="boxcar")
def test_cmov_mean(step):
# GH 8238
pytest.importorskip("scipy")
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
result = Series(vals).rolling(5, center=True, step=step).mean()
expected_values = [
np.nan,
np.nan,
9.962,
11.27,
11.564,
12.516,
12.818,
12.952,
np.nan,
np.nan,
]
expected = Series(expected_values)[::step]
tm.assert_series_equal(expected, result)
def test_cmov_window(step):
# GH 8238
pytest.importorskip("scipy")
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
result = Series(vals).rolling(5, win_type="boxcar", center=True, step=step).mean()
expected_values = [
np.nan,
np.nan,
9.962,
11.27,
11.564,
12.516,
12.818,
12.952,
np.nan,
np.nan,
]
expected = Series(expected_values)[::step]
tm.assert_series_equal(expected, result)
def test_cmov_window_corner(step):
# GH 8238
# all nan
pytest.importorskip("scipy")
vals = Series([np.nan] * 10)
result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
assert np.isnan(result).all()
# empty
vals = Series([], dtype=object)
result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
assert len(result) == 0
# shorter than window
vals = Series(np.random.default_rng(2).standard_normal(5))
result = vals.rolling(10, win_type="boxcar", step=step).mean()
assert np.isnan(result).all()
assert len(result) == len(range(0, 5, step or 1))
@pytest.mark.parametrize(
"f,xp",
[
(
"mean",
[
[np.nan, np.nan],
[np.nan, np.nan],
[9.252, 9.392],
[8.644, 9.906],
[8.87, 10.208],
[6.81, 8.588],
[7.792, 8.644],
[9.05, 7.824],
[np.nan, np.nan],
[np.nan, np.nan],
],
),
(
"std",
[
[np.nan, np.nan],
[np.nan, np.nan],
[3.789706, 4.068313],
[3.429232, 3.237411],
[3.589269, 3.220810],
[3.405195, 2.380655],
[3.281839, 2.369869],
[3.676846, 1.801799],
[np.nan, np.nan],
[np.nan, np.nan],
],
),
(
"var",
[
[np.nan, np.nan],
[np.nan, np.nan],
[14.36187, 16.55117],
[11.75963, 10.48083],
[12.88285, 10.37362],
[11.59535, 5.66752],
[10.77047, 5.61628],
[13.51920, 3.24648],
[np.nan, np.nan],
[np.nan, np.nan],
],
),
(
"sum",
[
[np.nan, np.nan],
[np.nan, np.nan],
[46.26, 46.96],
[43.22, 49.53],
[44.35, 51.04],
[34.05, 42.94],
[38.96, 43.22],
[45.25, 39.12],
[np.nan, np.nan],
[np.nan, np.nan],
],
),
],
)
def test_cmov_window_frame(f, xp, step):
# Gh 8238
pytest.importorskip("scipy")
df = DataFrame(
np.array(
[
[12.18, 3.64],
[10.18, 9.16],
[13.24, 14.61],
[4.51, 8.11],
[6.15, 11.44],
[9.14, 6.21],
[11.31, 10.67],
[2.94, 6.51],
[9.42, 8.39],
[12.44, 7.34],
]
)
)
xp = DataFrame(np.array(xp))[::step]
roll = df.rolling(5, win_type="boxcar", center=True, step=step)
rs = getattr(roll, f)()
tm.assert_frame_equal(xp, rs)
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4, 5])
def test_cmov_window_na_min_periods(step, min_periods):
pytest.importorskip("scipy")
vals = Series(np.random.default_rng(2).standard_normal(10))
vals[4] = np.nan
vals[8] = np.nan
xp = vals.rolling(5, min_periods=min_periods, center=True, step=step).mean()
rs = vals.rolling(
5, win_type="boxcar", min_periods=min_periods, center=True, step=step
).mean()
tm.assert_series_equal(xp, rs)
def test_cmov_window_regular(win_types, step):
# GH 8238
pytest.importorskip("scipy")
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
xps = {
"hamming": [
np.nan,
np.nan,
8.71384,
9.56348,
12.38009,
14.03687,
13.8567,
11.81473,
np.nan,
np.nan,
],
"triang": [
np.nan,
np.nan,
9.28667,
10.34667,
12.00556,
13.33889,
13.38,
12.33667,
np.nan,
np.nan,
],
"barthann": [
np.nan,
np.nan,
8.4425,
9.1925,
12.5575,
14.3675,
14.0825,
11.5675,
np.nan,
np.nan,
],
"bohman": [
np.nan,
np.nan,
7.61599,
9.1764,
12.83559,
14.17267,
14.65923,
11.10401,
np.nan,
np.nan,
],
"blackmanharris": [
np.nan,
np.nan,
6.97691,
9.16438,
13.05052,
14.02156,
15.10512,
10.74574,
np.nan,
np.nan,
],
"nuttall": [
np.nan,
np.nan,
7.04618,
9.16786,
13.02671,
14.03559,
15.05657,
10.78514,
np.nan,
np.nan,
],
"blackman": [
np.nan,
np.nan,
7.73345,
9.17869,
12.79607,
14.20036,
14.57726,
11.16988,
np.nan,
np.nan,
],
"bartlett": [
np.nan,
np.nan,
8.4425,
9.1925,
12.5575,
14.3675,
14.0825,
11.5675,
np.nan,
np.nan,
],
}
xp = Series(xps[win_types])[::step]
rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
tm.assert_series_equal(xp, rs)
def test_cmov_window_regular_linear_range(win_types, step):
# GH 8238
pytest.importorskip("scipy")
vals = np.array(range(10), dtype=float)
xp = vals.copy()
xp[:2] = np.nan
xp[-2:] = np.nan
xp = Series(xp)[::step]
rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
tm.assert_series_equal(xp, rs)
def test_cmov_window_regular_missing_data(win_types, step):
# GH 8238
pytest.importorskip("scipy")
vals = np.array(
[6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48]
)
xps = {
"bartlett": [
np.nan,
np.nan,
9.70333,
10.5225,
8.4425,
9.1925,
12.5575,
14.3675,
15.61667,
13.655,
],
"blackman": [
np.nan,
np.nan,
9.04582,
11.41536,
7.73345,
9.17869,
12.79607,
14.20036,
15.8706,
13.655,
],
"barthann": [
np.nan,
np.nan,
9.70333,
10.5225,
8.4425,
9.1925,
12.5575,
14.3675,
15.61667,
13.655,
],
"bohman": [
np.nan,
np.nan,
8.9444,
11.56327,
7.61599,
9.1764,
12.83559,
14.17267,
15.90976,
13.655,
],
"hamming": [
np.nan,
np.nan,
9.59321,
10.29694,
8.71384,
9.56348,
12.38009,
14.20565,
15.24694,
13.69758,
],
"nuttall": [
np.nan,
np.nan,
8.47693,
12.2821,
7.04618,
9.16786,
13.02671,
14.03673,
16.08759,
13.65553,
],
"triang": [
np.nan,
np.nan,
9.33167,
9.76125,
9.28667,
10.34667,
12.00556,
13.82125,
14.49429,
13.765,
],
"blackmanharris": [
np.nan,
np.nan,
8.42526,
12.36824,
6.97691,
9.16438,
13.05052,
14.02175,
16.1098,
13.65509,
],
}
xp = Series(xps[win_types])[::step]
rs = Series(vals).rolling(5, win_type=win_types, min_periods=3, step=step).mean()
tm.assert_series_equal(xp, rs)
def test_cmov_window_special(win_types_special, step):
# GH 8238
pytest.importorskip("scipy")
kwds = {
"kaiser": {"beta": 1.0},
"gaussian": {"std": 1.0},
"general_gaussian": {"p": 2.0, "sig": 2.0},
"exponential": {"tau": 10},
}
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
xps = {
"gaussian": [
np.nan,
np.nan,
8.97297,
9.76077,
12.24763,
13.89053,
13.65671,
12.01002,
np.nan,
np.nan,
],
"general_gaussian": [
np.nan,
np.nan,
9.85011,
10.71589,
11.73161,
13.08516,
12.95111,
12.74577,
np.nan,
np.nan,
],
"kaiser": [
np.nan,
np.nan,
9.86851,
11.02969,
11.65161,
12.75129,
12.90702,
12.83757,
np.nan,
np.nan,
],
"exponential": [
np.nan,
np.nan,
9.83364,
11.10472,
11.64551,
12.66138,
12.92379,
12.83770,
np.nan,
np.nan,
],
}
xp = Series(xps[win_types_special])[::step]
rs = (
Series(vals)
.rolling(5, win_type=win_types_special, center=True, step=step)
.mean(**kwds[win_types_special])
)
tm.assert_series_equal(xp, rs)
def test_cmov_window_special_linear_range(win_types_special, step):
# GH 8238
pytest.importorskip("scipy")
kwds = {
"kaiser": {"beta": 1.0},
"gaussian": {"std": 1.0},
"general_gaussian": {"p": 2.0, "sig": 2.0},
"slepian": {"width": 0.5},
"exponential": {"tau": 10},
}
vals = np.array(range(10), dtype=float)
xp = vals.copy()
xp[:2] = np.nan
xp[-2:] = np.nan
xp = Series(xp)[::step]
rs = (
Series(vals)
.rolling(5, win_type=win_types_special, center=True, step=step)
.mean(**kwds[win_types_special])
)
tm.assert_series_equal(xp, rs)
def test_weighted_var_big_window_no_segfault(win_types, center):
# GitHub Issue #46772
pytest.importorskip("scipy")
x = Series(0)
result = x.rolling(window=16, center=center, win_type=win_types).var()
expected = Series(np.nan)
tm.assert_series_equal(result, expected)
def test_rolling_center_axis_1():
pytest.importorskip("scipy")
df = DataFrame(
{"a": [1, 1, 0, 0, 0, 1], "b": [1, 0, 0, 1, 0, 0], "c": [1, 0, 0, 1, 0, 1]}
)
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.rolling(window=3, axis=1, win_type="boxcar", center=True).sum()
expected = DataFrame(
{"a": [np.nan] * 6, "b": [3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "c": [np.nan] * 6}
)
tm.assert_frame_equal(result, expected, check_dtype=True)