forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,143 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
|
||||
# The various methods we support
|
||||
downsample_methods = [
|
||||
"min",
|
||||
"max",
|
||||
"first",
|
||||
"last",
|
||||
"sum",
|
||||
"mean",
|
||||
"sem",
|
||||
"median",
|
||||
"prod",
|
||||
"var",
|
||||
"std",
|
||||
"ohlc",
|
||||
"quantile",
|
||||
]
|
||||
upsample_methods = ["count", "size"]
|
||||
series_methods = ["nunique"]
|
||||
resample_methods = downsample_methods + upsample_methods + series_methods
|
||||
|
||||
|
||||
@pytest.fixture(params=downsample_methods)
|
||||
def downsample_method(request):
|
||||
"""Fixture for parametrization of Grouper downsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=resample_methods)
|
||||
def resample_method(request):
|
||||
"""Fixture for parametrization of Grouper resample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_start():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_end():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_freq():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return "D"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_name():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
|
||||
"""
|
||||
Fixture for parametrization of date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _static_values(index):
|
||||
"""
|
||||
Fixture for parametrization of values used in parametrization of
|
||||
Series and DataFrames with date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return np.arange(len(index))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
"""
|
||||
Fixture for parametrization of Series name for Series used with
|
||||
date_range, period_range and timedelta_range indexes
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series(index, _series_name, _static_values):
|
||||
"""
|
||||
Fixture for parametrization of Series with date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return Series(_static_values, index=index, name=_series_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_series_dti(series):
|
||||
"""
|
||||
Fixture for parametrization of empty Series with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
return series[:0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(index, _series_name, _static_values):
|
||||
"""
|
||||
Fixture for parametrization of DataFrame with date_range, period_range
|
||||
and timedelta_range indexes
|
||||
"""
|
||||
# _series_name is intentionally unused
|
||||
return DataFrame({"value": _static_values}, index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_frame_dti(series):
|
||||
"""
|
||||
Fixture for parametrization of empty DataFrame with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
index = series.index[:0]
|
||||
return DataFrame(index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_and_frame(frame_or_series, series, frame):
|
||||
"""
|
||||
Fixture for parametrization of Series and DataFrame with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
if frame_or_series == Series:
|
||||
return series
|
||||
if frame_or_series == DataFrame:
|
||||
return frame
|
@ -0,0 +1,460 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.groupby import DataError
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
from pandas.core.resample import _asfreq_compat
|
||||
|
||||
# a fixture value can be overridden by the test parameter value. Note that the
|
||||
# value of the fixture can be overridden this way even if the test doesn't use
|
||||
# it directly (doesn't mention it in the function prototype).
|
||||
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501
|
||||
# in this module we override the fixture values defined in conftest.py
|
||||
# tuples of '_index_factory,_series_name,_index_start,_index_end'
|
||||
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
|
||||
|
||||
all_ts = pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end",
|
||||
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_index(_index_factory):
|
||||
def _create_index(*args, **kwargs):
|
||||
"""return the _index_factory created using the args, kwargs"""
|
||||
return _index_factory(*args, **kwargs)
|
||||
|
||||
return _create_index
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2D", "1h"])
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq(series_and_frame, freq, create_index):
|
||||
obj = series_and_frame
|
||||
|
||||
result = obj.resample(freq).asfreq()
|
||||
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
|
||||
expected = obj.reindex(new_index)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq_fill_value(series, create_index):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
ser = series
|
||||
|
||||
result = ser.resample("1h").asfreq()
|
||||
new_index = create_index(ser.index[0], ser.index[-1], freq="1h")
|
||||
expected = ser.reindex(new_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Explicit cast to float to avoid implicit cast when setting None
|
||||
frame = ser.astype("float").to_frame("value")
|
||||
frame.iloc[1] = None
|
||||
result = frame.resample("1h").asfreq(fill_value=4.0)
|
||||
new_index = create_index(frame.index[0], frame.index[-1], freq="1h")
|
||||
expected = frame.reindex(new_index, fill_value=4.0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_interpolate(frame):
|
||||
# GH#12925
|
||||
df = frame
|
||||
warn = None
|
||||
if isinstance(df.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = df.resample("1min").asfreq().interpolate()
|
||||
expected = df.resample("1min").interpolate()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_raises_on_non_datetimelike_index():
|
||||
# this is a non datetimelike index
|
||||
xp = DataFrame()
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
|
||||
"but got an instance of 'RangeIndex'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
xp.resample("YE")
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_empty_series(freq, empty_series_dti, resample_method):
|
||||
# GH12771 & GH12868
|
||||
|
||||
ser = empty_series_dti
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(ser.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = ser.resample(freq)
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
|
||||
)
|
||||
expected.index = _asfreq_compat(ser.index, freq)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = ser.copy()
|
||||
expected.index = _asfreq_compat(ser.index, freq)
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize(
|
||||
"freq",
|
||||
[
|
||||
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")),
|
||||
"D",
|
||||
"h",
|
||||
],
|
||||
)
|
||||
def test_resample_nat_index_series(freq, series, resample_method):
|
||||
# GH39227
|
||||
|
||||
ser = series.copy()
|
||||
ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
|
||||
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.resample(freq)
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = ser[:0].copy()
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
@pytest.mark.parametrize("resample_method", ["count", "size"])
|
||||
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
|
||||
# GH28427
|
||||
ser = empty_series_dti
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(ser.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = ser.resample(freq)
|
||||
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
index = _asfreq_compat(ser.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index, name=ser.name)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
|
||||
# GH13212
|
||||
df = empty_frame_dti
|
||||
# count retains dimensions too
|
||||
if freq == "ME" and isinstance(df.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample(freq, group_keys=False)
|
||||
return
|
||||
elif freq == "ME" and isinstance(df.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(df.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = df.resample(freq, group_keys=False)
|
||||
result = getattr(rs, resample_method)()
|
||||
if resample_method == "ohlc":
|
||||
# TODO: no tests with len(df.columns) > 0
|
||||
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
|
||||
expected = DataFrame(
|
||||
[], index=df.index[:0].copy(), columns=mi, dtype=np.float64
|
||||
)
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
elif resample_method != "size":
|
||||
expected = df.copy()
|
||||
else:
|
||||
# GH14962
|
||||
expected = Series([], dtype=np.int64)
|
||||
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# test size for GH13212 (currently stays as df)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_count_empty_dataframe(freq, empty_frame_dti):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti["a"] = []
|
||||
|
||||
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_frame_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = empty_frame_dti.resample(freq)
|
||||
result = rs.count()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_size_empty_dataframe(freq, empty_frame_dti):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti["a"] = []
|
||||
|
||||
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_frame_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = empty_frame_dti.resample(freq)
|
||||
result = rs.size()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
PeriodIndex([], freq="M", name="a"),
|
||||
DatetimeIndex([], name="a"),
|
||||
TimedeltaIndex([], name="a"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_resample_empty_dtypes(index, dtype, resample_method):
|
||||
# Empty series were sometimes causing a segfault (for the functions
|
||||
# with Cython bounds-checking disabled) or an IndexError. We just run
|
||||
# them to ensure they no longer do. (GH #10228)
|
||||
warn = None
|
||||
if isinstance(index, PeriodIndex):
|
||||
# GH#53511
|
||||
index = PeriodIndex([], freq="B", name=index.name)
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
|
||||
empty_series_dti = Series([], index, dtype)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = empty_series_dti.resample("d", group_keys=False)
|
||||
try:
|
||||
getattr(rs, resample_method)()
|
||||
except DataError:
|
||||
# Ignore these since some combinations are invalid
|
||||
# (ex: doing mean with dtype of np.object_)
|
||||
pass
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_apply_to_empty_series(empty_series_dti, freq):
|
||||
# GH 14313
|
||||
ser = empty_series_dti
|
||||
|
||||
if freq == "ME" and isinstance(empty_series_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_series_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_series_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(empty_series_dti.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = ser.resample(freq, group_keys=False)
|
||||
|
||||
result = rs.apply(lambda x: 1)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
expected = ser.resample(freq).apply("sum")
|
||||
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resampler_is_iterable(series):
|
||||
# GH 15314
|
||||
freq = "h"
|
||||
tg = Grouper(freq=freq, convention="start")
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(series.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
grouped = series.groupby(tg)
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
resampled = series.resample(freq)
|
||||
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
||||
assert rk == gk
|
||||
tm.assert_series_equal(rv, gv)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_quantile(series):
|
||||
# GH 15023
|
||||
ser = series
|
||||
q = 0.75
|
||||
freq = "h"
|
||||
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(series.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = ser.resample(freq).quantile(q)
|
||||
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("how", ["first", "last"])
|
||||
def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
|
||||
# GH#57019
|
||||
if is_extension_array_dtype(any_real_nullable_dtype):
|
||||
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
|
||||
else:
|
||||
na_value = np.nan
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [2, 1, 1, 2],
|
||||
"b": [na_value, 3.0, na_value, 4.0],
|
||||
"c": [na_value, 3.0, na_value, 4.0],
|
||||
},
|
||||
index=date_range("2020-01-01", periods=4, freq="D"),
|
||||
dtype=any_real_nullable_dtype,
|
||||
)
|
||||
rs = df.resample("ME")
|
||||
method = getattr(rs, how)
|
||||
result = method(skipna=skipna)
|
||||
|
||||
gb = df.groupby(df.shape[0] * [pd.to_datetime("2020-01-31")])
|
||||
expected = getattr(gb, how)(skipna=skipna)
|
||||
expected.index.freq = "ME"
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,715 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_frame():
|
||||
return DataFrame(
|
||||
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
|
||||
index=date_range("1/1/2000", freq="s", periods=40),
|
||||
)
|
||||
|
||||
|
||||
def test_tab_complete_ipython6_warning(ip):
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = dedent(
|
||||
"""\
|
||||
import numpy as np
|
||||
from pandas import Series, date_range
|
||||
data = np.arange(10, dtype=np.float64)
|
||||
index = date_range("2020-01-01", periods=len(data))
|
||||
s = Series(data, index=index)
|
||||
rs = s.resample("D")
|
||||
"""
|
||||
)
|
||||
ip.run_cell(code)
|
||||
|
||||
# GH 31324 newer jedi version raises Deprecation warning;
|
||||
# appears resolved 2021-02-02
|
||||
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("rs.", 1))
|
||||
|
||||
|
||||
def test_deferred_with_groupby():
|
||||
# GH 12486
|
||||
# support deferred resample ops with groupby
|
||||
data = [
|
||||
["2010-01-01", "A", 2],
|
||||
["2010-01-02", "A", 3],
|
||||
["2010-01-05", "A", 8],
|
||||
["2010-01-10", "A", 7],
|
||||
["2010-01-13", "A", 3],
|
||||
["2010-01-01", "B", 5],
|
||||
["2010-01-03", "B", 2],
|
||||
["2010-01-04", "B", 1],
|
||||
["2010-01-11", "B", 7],
|
||||
["2010-01-14", "B", 3],
|
||||
]
|
||||
|
||||
df = DataFrame(data, columns=["date", "id", "score"])
|
||||
df.date = pd.to_datetime(df.date)
|
||||
|
||||
def f_0(x):
|
||||
return x.set_index("date").resample("D").asfreq()
|
||||
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = df.groupby("id").apply(f_0)
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.set_index("date").groupby("id").resample("D").asfreq()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
def f_1(x):
|
||||
return x.resample("1D").ffill()
|
||||
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = df.groupby("group").apply(f_1)
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("group").resample("1D").ffill()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem(test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
|
||||
expected = g.B.apply(lambda x: x.resample("2s").mean())
|
||||
|
||||
result = g.resample("2s").B.mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.resample("2s").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = g.resample("2s").mean().B
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multiple():
|
||||
# GH 13174
|
||||
# multiple calls after selection causing an issue with aliasing
|
||||
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
|
||||
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
|
||||
r = df.groupby("id").resample("1D")
|
||||
result = r["buyer"].count()
|
||||
|
||||
exp_mi = pd.MultiIndex.from_arrays([[1, 2], df.index], names=("id", None))
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=exp_mi,
|
||||
name="buyer",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = r["buyer"].count()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api_with_getitem():
|
||||
# GH 17813
|
||||
df = DataFrame(
|
||||
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
|
||||
)
|
||||
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
|
||||
result = df.groupby("id").resample("2D", on="date")["data"].sum()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_groupby_with_origin():
|
||||
# GH 31809
|
||||
|
||||
freq = "1399min" # prime number that is smaller than 24h
|
||||
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
|
||||
middle = "1/15/2000 00:00:00"
|
||||
|
||||
rng = date_range(start, end, freq="1231min") # prime number
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
ts2 = ts[middle:end]
|
||||
|
||||
# proves that grouper without a fixed origin does not work
|
||||
# when dealing with unusual frequencies
|
||||
simple_grouper = pd.Grouper(freq=freq)
|
||||
count_ts = ts.groupby(simple_grouper).agg("count")
|
||||
count_ts = count_ts[middle:end]
|
||||
count_ts2 = ts2.groupby(simple_grouper).agg("count")
|
||||
with pytest.raises(AssertionError, match="Index are different"):
|
||||
tm.assert_index_equal(count_ts.index, count_ts2.index)
|
||||
|
||||
# test origin on 1970-01-01 00:00:00
|
||||
origin = Timestamp(0)
|
||||
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
|
||||
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
|
||||
adjusted_count_ts = adjusted_count_ts[middle:end]
|
||||
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
|
||||
|
||||
# test origin on 2049-10-18 20:00:00
|
||||
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
|
||||
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
|
||||
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
|
||||
adjusted2_count_ts = adjusted2_count_ts[middle:end]
|
||||
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
|
||||
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
|
||||
|
||||
# both grouper use an adjusted timestamp that is a multiple of 1399 min
|
||||
# they should be equals even if the adjusted_timestamp is in the future
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
|
||||
|
||||
|
||||
def test_nearest():
|
||||
# GH 17496
|
||||
# Resample nearest
|
||||
index = date_range("1/1/2000", periods=3, freq="min")
|
||||
result = Series(range(3), index=index).resample("20s").nearest()
|
||||
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 2, 2],
|
||||
index=pd.DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 00:00:20",
|
||||
"2000-01-01 00:00:40",
|
||||
"2000-01-01 00:01:00",
|
||||
"2000-01-01 00:01:20",
|
||||
"2000-01-01 00:01:40",
|
||||
"2000-01-01 00:02:00",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="20s",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
"first",
|
||||
"last",
|
||||
"median",
|
||||
"sem",
|
||||
"sum",
|
||||
"mean",
|
||||
"min",
|
||||
"max",
|
||||
"size",
|
||||
"count",
|
||||
"nearest",
|
||||
"bfill",
|
||||
"ffill",
|
||||
"asfreq",
|
||||
"ohlc",
|
||||
],
|
||||
)
|
||||
def test_methods(f, test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = getattr(r, f)()
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_methods_nunique(test_frame):
|
||||
# series only
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
result = r.B.nunique()
|
||||
expected = g.B.apply(lambda x: x.resample("2s").nunique())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["std", "var"])
|
||||
def test_methods_std_var(f, test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = getattr(r, f)(ddof=1)
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply(test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
# reduction
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = g.resample("2s").sum()
|
||||
|
||||
def f_0(x):
|
||||
return x.resample("2s").sum()
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = r.apply(f_0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def f_1(x):
|
||||
return x.resample("2s").apply(lambda y: y.sum())
|
||||
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = g.apply(f_1)
|
||||
# y.sum() results in int64 instead of int32 on 32-bit architectures
|
||||
expected = expected.astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_with_mutated_index():
|
||||
# GH 15169
|
||||
index = date_range("1-1-2015", "12-31-15", freq="D")
|
||||
df = DataFrame(
|
||||
data={"col1": np.random.default_rng(2).random(len(index))}, index=index
|
||||
)
|
||||
|
||||
def f(x):
|
||||
s = Series([1, 2], index=["a", "b"])
|
||||
return s
|
||||
|
||||
expected = df.groupby(pd.Grouper(freq="ME")).apply(f)
|
||||
|
||||
result = df.resample("ME").apply(f)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A case for series
|
||||
expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f)
|
||||
result = df["col1"].resample("ME").apply(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_columns_multilevel():
|
||||
# GH 16231
|
||||
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
|
||||
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
|
||||
df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
|
||||
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
|
||||
result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
|
||||
expected = DataFrame(
|
||||
2 * [[0, 0.0]],
|
||||
index=date_range(start="2017-01-01", freq="1h", periods=2),
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("A", "a", "", "one"), ("B", "b", "i", "two")]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_non_naive_index():
|
||||
def weighted_quantile(series, weights, q):
|
||||
series = series.sort_values()
|
||||
cumsum = weights.reindex(series.index).fillna(0).cumsum()
|
||||
cutoff = cumsum.iloc[-1] * q
|
||||
return series[cumsum >= cutoff].iloc[0]
|
||||
|
||||
times = date_range("2017-6-23 18:00", periods=8, freq="15min", tz="UTC")
|
||||
data = Series([1.0, 1, 1, 1, 1, 2, 2, 0], index=times)
|
||||
weights = Series([160.0, 91, 65, 43, 24, 10, 1, 0], index=times)
|
||||
|
||||
result = data.resample("D").apply(weighted_quantile, weights=weights, q=0.5)
|
||||
ind = date_range(
|
||||
"2017-06-23 00:00:00+00:00", "2017-06-23 00:00:00+00:00", freq="D", tz="UTC"
|
||||
)
|
||||
expected = Series([1.0], index=ind)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_with_label(unit):
|
||||
# GH 13235
|
||||
index = date_range("2000-01-01", freq="2D", periods=5, unit=unit)
|
||||
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("col0").resample("1W", label="left").sum()
|
||||
|
||||
mi = [
|
||||
np.array([0, 0, 1, 2], dtype=np.int64),
|
||||
np.array(
|
||||
["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"],
|
||||
dtype=f"M8[{unit}]",
|
||||
),
|
||||
]
|
||||
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
|
||||
expected = DataFrame(
|
||||
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_consistency_with_window(test_frame):
|
||||
# consistent return values with window
|
||||
df = test_frame
|
||||
expected = Index([1, 2, 3], name="A")
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("A").resample("2s").mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
result = df.groupby("A").rolling(20).mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
|
||||
def test_median_duplicate_columns():
|
||||
# GH 14233
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 3)),
|
||||
columns=list("aaa"),
|
||||
index=date_range("2012-01-01", periods=20, freq="s"),
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.columns = ["a", "b", "c"]
|
||||
expected = df2.resample("5s").median()
|
||||
result = df.resample("5s").median()
|
||||
expected.columns = result.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_one_column_of_df():
|
||||
# GH: 36951
|
||||
df = DataFrame(
|
||||
{"col": range(10), "col1": range(10, 20)},
|
||||
index=date_range("2012-01-01", periods=10, freq="20min"),
|
||||
)
|
||||
|
||||
# access "col" via getattr -> make sure we handle AttributeError
|
||||
result = df.resample("h").apply(lambda group: group.col.sum())
|
||||
expected = Series(
|
||||
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="h")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# access "col" via _getitem__ -> make sure we handle KeyErrpr
|
||||
result = df.resample("h").apply(lambda group: group["col"].sum())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg():
|
||||
# GH: 33548
|
||||
df = DataFrame(
|
||||
{
|
||||
"cat": [
|
||||
"cat_1",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
],
|
||||
"num": [5, 20, 22, 3, 4, 30, 10, 50],
|
||||
"date": [
|
||||
"2019-2-1",
|
||||
"2018-02-03",
|
||||
"2020-3-11",
|
||||
"2019-2-2",
|
||||
"2019-2-2",
|
||||
"2018-12-4",
|
||||
"2020-3-11",
|
||||
"2020-12-12",
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
|
||||
resampled = df.groupby("cat").resample("YE", on="date")
|
||||
expected = resampled[["num"]].sum()
|
||||
result = resampled.agg({"num": "sum"})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg_listlike():
|
||||
# GH 42905
|
||||
ts = Timestamp("2021-02-28 00:00:00")
|
||||
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
|
||||
resampled = df.groupby("class").resample("ME")["value"]
|
||||
result = resampled.agg(["sum", "size"])
|
||||
expected = DataFrame(
|
||||
[[69, 1]],
|
||||
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
|
||||
columns=["sum", "size"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_empty(keys):
|
||||
# GH 26411
|
||||
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected = (
|
||||
DataFrame(columns=["a", "b"])
|
||||
.set_index(keys, drop=False)
|
||||
.set_index(TimedeltaIndex([]), append=True)
|
||||
)
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("consolidate", [True, False])
|
||||
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
|
||||
# https://github.com/pandas-dev/pandas/issues/39329
|
||||
|
||||
dates = date_range("2020-01-01", periods=15, freq="D")
|
||||
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
|
||||
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
|
||||
df = pd.concat([df1, df2], ignore_index=True)
|
||||
if consolidate:
|
||||
df = df._consolidate()
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby(["key"]).resample("W", on="date").min()
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
["A"] * 3 + ["B"] * 3,
|
||||
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit(
|
||||
"ns"
|
||||
),
|
||||
],
|
||||
names=["key", "date"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"key": ["A"] * 3 + ["B"] * 3,
|
||||
"col1": [0, 5, 12] * 2,
|
||||
"col_object": ["val"] * 3 + [np.nan] * 3,
|
||||
},
|
||||
index=idx,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_with_list_of_keys():
|
||||
# GH 47362
|
||||
df = DataFrame(
|
||||
data={
|
||||
"date": date_range(start="2016-01-01", periods=8),
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [1, 7, 5, 2, 3, 10, 5, 1],
|
||||
}
|
||||
)
|
||||
result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"val": [4.0, 3.5, 6.5, 3.0],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_resample_no_index(keys):
|
||||
# GH 47705
|
||||
df = DataFrame([], columns=["a", "b", "date"])
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
df = df.set_index("date")
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
|
||||
expected["date"] = pd.to_datetime(expected["date"])
|
||||
expected = expected.set_index("date", append=True, drop=True)
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_no_columns():
|
||||
# GH#52484
|
||||
df = DataFrame(
|
||||
index=Index(
|
||||
pd.to_datetime(
|
||||
["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
|
||||
),
|
||||
name="date",
|
||||
)
|
||||
)
|
||||
result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
|
||||
index = pd.to_datetime(
|
||||
[
|
||||
"2018-01-01 00:00:00",
|
||||
"2018-01-01 06:00:00",
|
||||
"2018-01-01 12:00:00",
|
||||
"2018-01-02 00:00:00",
|
||||
]
|
||||
)
|
||||
expected = DataFrame(
|
||||
index=pd.MultiIndex(
|
||||
levels=[np.array([0, 1], dtype=np.intp), index],
|
||||
codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
|
||||
names=[None, "date"],
|
||||
)
|
||||
)
|
||||
|
||||
# GH#52710 - Index comes out as 32-bit on 64-bit Windows
|
||||
tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
|
||||
|
||||
|
||||
def test_groupby_resample_size_all_index_same():
|
||||
# GH 46826
|
||||
df = DataFrame(
|
||||
{"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
|
||||
index=date_range("31/12/2000 18:00", freq="h", periods=12),
|
||||
)
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("A").resample("D").size()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 1, 2, 2],
|
||||
pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"),
|
||||
],
|
||||
names=["A", None],
|
||||
)
|
||||
expected = Series(
|
||||
3,
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys():
|
||||
# GH 50840
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
},
|
||||
index=date_range(start="2016-01-01", periods=8, name="date"),
|
||||
)
|
||||
result = df.groupby("group").resample("2D")[["val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"val": [2.0, 2.5, 7.0, 4.0],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
|
||||
# GH 50876
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
|
||||
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
|
||||
},
|
||||
index=date_range(start="2016-01-01", periods=8, name="date"),
|
||||
)
|
||||
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"first_val": [2.0, 2.5, 7.0, 4.0],
|
||||
"second_val": [4.5, 4.5, 5.0, 4.5],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
|
||||
# GH 50876
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
},
|
||||
index=Series(
|
||||
date_range(start="2016-01-01", periods=8),
|
||||
name="date",
|
||||
),
|
||||
)
|
||||
gb = df.groupby("group")
|
||||
rs = gb.resample("2D")
|
||||
with pytest.raises(KeyError, match="Columns not found"):
|
||||
rs[["val_not_in_dataframe"]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kind", ["datetime", "period"])
|
||||
def test_groupby_resample_kind(kind):
|
||||
# GH 24103
|
||||
df = DataFrame(
|
||||
{
|
||||
"datetime": pd.to_datetime(
|
||||
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"]
|
||||
),
|
||||
"group": ["A", "B", "A", "B"],
|
||||
"value": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
df = df.set_index("datetime")
|
||||
result = df.groupby("group")["value"].resample("D", kind=kind).last()
|
||||
|
||||
dt_level = pd.DatetimeIndex(["2018-11-01", "2018-11-02"])
|
||||
if kind == "period":
|
||||
dt_level = dt_level.to_period(freq="D")
|
||||
expected_index = pd.MultiIndex.from_product(
|
||||
[["A", "B"], dt_level],
|
||||
names=["group", "datetime"],
|
||||
)
|
||||
expected = Series([1, 3, 2, 4], index=expected_index, name="value")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,390 @@
|
||||
from datetime import datetime
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_series():
|
||||
return Series(
|
||||
np.random.default_rng(2).standard_normal(1000),
|
||||
index=date_range("1/1/2000", periods=1000),
|
||||
)
|
||||
|
||||
|
||||
def test_apply(test_series):
|
||||
grouper = Grouper(freq="YE", label="right", closed="right")
|
||||
|
||||
grouped = test_series.groupby(grouper)
|
||||
|
||||
def f(x):
|
||||
return x.sort_values()[-3:]
|
||||
|
||||
applied = grouped.apply(f)
|
||||
expected = test_series.groupby(lambda x: x.year).apply(f)
|
||||
|
||||
applied.index = applied.index.droplevel(0)
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(applied, expected)
|
||||
|
||||
|
||||
def test_count(test_series):
|
||||
test_series[::3] = np.nan
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).count()
|
||||
|
||||
grouper = Grouper(freq="YE", label="right", closed="right")
|
||||
result = test_series.groupby(grouper).count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = test_series.resample("YE").count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_reduction(test_series):
|
||||
result = test_series.resample("YE", closed="right").prod()
|
||||
|
||||
msg = "using SeriesGroupBy.prod"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
|
||||
expected.index = result.index
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_iteration():
|
||||
# #2300
|
||||
N = 1000
|
||||
ind = date_range(start="2000-01-01", freq="D", periods=N)
|
||||
df = DataFrame({"open": 1, "close": 2}, index=ind)
|
||||
tg = Grouper(freq="ME")
|
||||
|
||||
grouper, _ = tg._get_grouper(df)
|
||||
|
||||
# Errors
|
||||
grouped = df.groupby(grouper, group_keys=False)
|
||||
|
||||
def f(df):
|
||||
return df["close"] / df["open"]
|
||||
|
||||
# it works!
|
||||
result = grouped.apply(f)
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index([1, 2]),
|
||||
Index(["a", "b"]),
|
||||
Index([1.1, 2.2]),
|
||||
pd.MultiIndex.from_arrays([[1, 2], ["a", "b"]]),
|
||||
],
|
||||
)
|
||||
def test_fails_on_no_datetime_index(index):
|
||||
name = type(index).__name__
|
||||
df = DataFrame({"a": range(len(index))}, index=index)
|
||||
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex "
|
||||
f"or PeriodIndex, but got an instance of '{name}'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby(Grouper(freq="D"))
|
||||
|
||||
|
||||
def test_aaa_group_order():
|
||||
# GH 12840
|
||||
# check TimeGrouper perform stable sorts
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4))
|
||||
df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
grouped = df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
|
||||
|
||||
|
||||
def test_aggregate_normal(resample_method):
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 4))
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = getattr(normal_grouped, resample_method)()
|
||||
dt_result = getattr(dt_grouped, resample_method)()
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
tm.assert_equal(expected, dt_result)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="if TimeGrouper is used included, 'nth' doesn't work yet")
|
||||
def test_aggregate_nth():
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 4))
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = normal_grouped.nth(3)
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
dt_result = dt_grouped.nth(3)
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, unit",
|
||||
[
|
||||
("sum", {}, 0),
|
||||
("sum", {"min_count": 0}, 0),
|
||||
("sum", {"min_count": 1}, np.nan),
|
||||
("prod", {}, 1),
|
||||
("prod", {"min_count": 0}, 1),
|
||||
("prod", {"min_count": 1}, np.nan),
|
||||
],
|
||||
)
|
||||
def test_resample_entirely_nat_window(method, method_args, unit):
|
||||
ser = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4))
|
||||
result = methodcaller(method, **method_args)(ser.resample("2d"))
|
||||
|
||||
exp_dti = pd.DatetimeIndex(["2017-01-01", "2017-01-03"], dtype="M8[ns]", freq="2D")
|
||||
expected = Series([0.0, unit], index=exp_dti)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, fill_value",
|
||||
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
|
||||
)
|
||||
def test_aggregate_with_nat(func, fill_value):
|
||||
# check TimeGrouper's aggregation is identical as normal groupby
|
||||
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
||||
# and 'nth' doesn't work yet
|
||||
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = getattr(normal_grouped, func)()
|
||||
dt_result = getattr(dt_grouped, func)()
|
||||
|
||||
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
dti = date_range(
|
||||
start="2013-01-01",
|
||||
freq="D",
|
||||
periods=5,
|
||||
name="key",
|
||||
unit=dt_df["key"]._values.unit,
|
||||
)
|
||||
expected.index = dti._with_freq(None) # TODO: is this desired?
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_aggregate_with_nat_size():
|
||||
# GH 9925
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = normal_grouped.size()
|
||||
dt_result = dt_grouped.size()
|
||||
|
||||
pad = Series([0], index=[3])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(
|
||||
start="2013-01-01",
|
||||
freq="D",
|
||||
periods=5,
|
||||
name="key",
|
||||
unit=dt_df["key"]._values.unit,
|
||||
)._with_freq(None)
|
||||
tm.assert_series_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH18203
|
||||
result = repr(Grouper(key="A", freq="h"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, dropna=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin='start_day')"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
result = repr(Grouper(key="A", freq="h", origin="2000-01-01"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, dropna=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, expected_values",
|
||||
[
|
||||
("sum", {}, [1, 0, 1]),
|
||||
("sum", {"min_count": 0}, [1, 0, 1]),
|
||||
("sum", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
("prod", {}, [1, 1, 1]),
|
||||
("prod", {"min_count": 0}, [1, 1, 1]),
|
||||
("prod", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_upsample_sum(method, method_args, expected_values):
|
||||
ser = Series(1, index=date_range("2017", periods=2, freq="h"))
|
||||
resampled = ser.resample("30min")
|
||||
index = pd.DatetimeIndex(
|
||||
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
|
||||
dtype="M8[ns]",
|
||||
freq="30min",
|
||||
)
|
||||
result = methodcaller(method, **method_args)(resampled)
|
||||
expected = Series(expected_values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_interpolate():
|
||||
# GH 35325
|
||||
d = {"price": [10, 11, 9], "volume": [50, 60, 50]}
|
||||
|
||||
df = DataFrame(d)
|
||||
|
||||
df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = (
|
||||
df.set_index("week_starting")
|
||||
.groupby("volume")
|
||||
.resample("1D")
|
||||
.interpolate(method="linear")
|
||||
)
|
||||
|
||||
volume = [50] * 15 + [60]
|
||||
week_starting = list(date_range("2018-01-07", "2018-01-21")) + [
|
||||
Timestamp("2018-01-14")
|
||||
]
|
||||
expected_ind = pd.MultiIndex.from_arrays(
|
||||
[volume, week_starting],
|
||||
names=["volume", "week_starting"],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"price": [
|
||||
10.0,
|
||||
9.928571428571429,
|
||||
9.857142857142858,
|
||||
9.785714285714286,
|
||||
9.714285714285714,
|
||||
9.642857142857142,
|
||||
9.571428571428571,
|
||||
9.5,
|
||||
9.428571428571429,
|
||||
9.357142857142858,
|
||||
9.285714285714286,
|
||||
9.214285714285714,
|
||||
9.142857142857142,
|
||||
9.071428571428571,
|
||||
9.0,
|
||||
11.0,
|
||||
],
|
||||
"volume": [50.0] * 15 + [60],
|
||||
},
|
||||
index=expected_ind,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,220 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
|
||||
|
||||
def test_asfreq_bug():
|
||||
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
|
||||
result = df.resample("1min").asfreq()
|
||||
expected = DataFrame(
|
||||
data=[1, np.nan, np.nan, 3],
|
||||
index=timedelta_range("0 day", periods=4, freq="1min"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_nat():
|
||||
# GH 13223
|
||||
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
|
||||
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
|
||||
expected = DataFrame(
|
||||
{"value": [2.5, np.nan, 5.0]},
|
||||
index=timedelta_range("0 day", periods=3, freq="1s"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_as_freq_with_subperiod():
|
||||
# GH 13022
|
||||
index = timedelta_range("00:00:00", "00:10:00", freq="5min")
|
||||
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
|
||||
result = df.resample("2min").asfreq()
|
||||
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
|
||||
expected = DataFrame(
|
||||
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2min")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_timedeltas():
|
||||
expected = DataFrame({"A": np.arange(1480)})
|
||||
expected = expected.groupby(expected.index // 30).sum()
|
||||
expected.index = timedelta_range("0 days", freq="30min", periods=50)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="min")
|
||||
)
|
||||
result = df.resample("30min").sum()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = df["A"]
|
||||
result = s.resample("30min").sum()
|
||||
tm.assert_series_equal(result, expected["A"])
|
||||
|
||||
|
||||
def test_resample_single_period_timedelta():
|
||||
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
|
||||
result = s.resample("2s").sum()
|
||||
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_idempotency():
|
||||
# GH 12072
|
||||
index = timedelta_range("0", periods=9, freq="10ms")
|
||||
series = Series(range(9), index=index)
|
||||
result = series.resample("10ms").mean()
|
||||
expected = series.astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_offset_with_timedeltaindex():
|
||||
# GH 10530 & 31809
|
||||
rng = timedelta_range(start="0s", periods=25, freq="s")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
with_base = ts.resample("2s", offset="5s").mean()
|
||||
without_base = ts.resample("2s").mean()
|
||||
|
||||
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
|
||||
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_resample_categorical_data_with_timedeltaindex():
|
||||
# GH #12169
|
||||
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
|
||||
df["Group"] = df["Group_obj"].astype("category")
|
||||
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
|
||||
exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit(
|
||||
"ns"
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
|
||||
index=exp_tdi,
|
||||
)
|
||||
expected = expected.reindex(["Group_obj", "Group"], axis=1)
|
||||
expected["Group"] = expected["Group_obj"].astype("category")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_values():
|
||||
# GH 13119
|
||||
# check that timedelta dtype is preserved when NaT values are
|
||||
# introduced by the resampling
|
||||
|
||||
times = timedelta_range("1 day", "6 day", freq="4D")
|
||||
df = DataFrame({"time": times}, index=times)
|
||||
|
||||
times2 = timedelta_range("1 day", "6 day", freq="2D")
|
||||
exp = Series(times2, index=times2, name="time")
|
||||
exp.iloc[1] = pd.NaT
|
||||
|
||||
res = df.resample("2D").first()["time"]
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = df["time"].resample("2D").first()
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, resample_freq",
|
||||
[
|
||||
("8h", "21h59min50s", "10s", "3h"), # GH 30353 example
|
||||
("3h", "22h", "1h", "5h"),
|
||||
("527D", "5006D", "3D", "10D"),
|
||||
("1D", "10D", "1D", "2D"), # GH 13022 example
|
||||
# tests that worked before GH 33498:
|
||||
("8h", "21h59min50s", "10s", "2h"),
|
||||
("0h", "21h59min50s", "10s", "3h"),
|
||||
("10D", "85D", "D", "2D"),
|
||||
],
|
||||
)
|
||||
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
|
||||
# GH 33498
|
||||
# check that the timedelta bins does not contains an extra bin
|
||||
idx = timedelta_range(start=start, end=end, freq=freq)
|
||||
s = Series(np.arange(len(idx)), index=idx)
|
||||
result = s.resample(resample_freq).min()
|
||||
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
assert result.index.freq == expected_index.freq
|
||||
assert not np.isnan(result.iloc[-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("duplicates", [True, False])
|
||||
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
|
||||
# GH 10603
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).normal(size=(10000, 4)),
|
||||
index=timedelta_range(start="0s", periods=10000, freq="3906250ns"),
|
||||
)
|
||||
if duplicates:
|
||||
# case with non-unique columns
|
||||
df.columns = ["A", "B", "A", "C"]
|
||||
|
||||
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
|
||||
|
||||
expected = DataFrame(
|
||||
[[768] * 4] * 12 + [[528] * 4],
|
||||
index=timedelta_range(start="1s", periods=13, freq="3s"),
|
||||
)
|
||||
expected.columns = df.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
|
||||
def test_resample_quantile_timedelta(unit):
|
||||
# GH: 29485
|
||||
dtype = np.dtype(f"m8[{unit}]")
|
||||
df = DataFrame(
|
||||
{"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
|
||||
index=pd.date_range("20200101", periods=4, tz="UTC"),
|
||||
)
|
||||
result = df.resample("2D").quantile(0.99)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"value": [
|
||||
pd.Timedelta("0 days 00:00:00.990000"),
|
||||
pd.Timedelta("0 days 00:00:02.990000"),
|
||||
]
|
||||
},
|
||||
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
|
||||
).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_closed_right():
|
||||
# GH#45414
|
||||
idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
|
||||
ser = Series(range(10), index=idx)
|
||||
result = ser.resample("min", closed="right", label="right").sum()
|
||||
expected = Series(
|
||||
[0, 3, 7, 11, 15, 9],
|
||||
index=pd.TimedeltaIndex(
|
||||
[pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="min"
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_arrow_duration_resample():
|
||||
# GH 56371
|
||||
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
|
||||
expected = Series(np.arange(5, dtype=np.float64), index=idx)
|
||||
result = expected.resample("1D").mean()
|
||||
tm.assert_series_equal(result, expected)
|
Reference in New Issue
Block a user