venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,258 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.categorical import CategoricalAccessor
|
||||
from pandas.core.indexes.accessors import Properties
|
||||
|
||||
|
||||
class TestCatAccessor:
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.cat.set_categories([1, 2, 3]),
|
||||
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
|
||||
lambda x: x.cat.rename_categories([1, 2, 3]),
|
||||
lambda x: x.cat.remove_unused_categories(),
|
||||
lambda x: x.cat.remove_categories([2]),
|
||||
lambda x: x.cat.add_categories([4]),
|
||||
lambda x: x.cat.as_ordered(),
|
||||
lambda x: x.cat.as_unordered(),
|
||||
],
|
||||
)
|
||||
def test_getname_categorical_accessor(self, method):
|
||||
# GH#17509
|
||||
ser = Series([1, 2, 3], name="A").astype("category")
|
||||
expected = "A"
|
||||
result = method(ser).name
|
||||
assert result == expected
|
||||
|
||||
def test_cat_accessor(self):
|
||||
ser = Series(Categorical(["a", "b", np.nan, "a"]))
|
||||
tm.assert_index_equal(ser.cat.categories, Index(["a", "b"]))
|
||||
assert not ser.cat.ordered, False
|
||||
|
||||
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
|
||||
|
||||
res = ser.cat.set_categories(["b", "a"])
|
||||
tm.assert_categorical_equal(res.values, exp)
|
||||
|
||||
ser[:] = "a"
|
||||
ser = ser.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(ser.cat.categories, Index(["a"]))
|
||||
|
||||
def test_cat_accessor_api(self):
|
||||
# GH#9322
|
||||
|
||||
assert Series.cat is CategoricalAccessor
|
||||
ser = Series(list("aabbcde")).astype("category")
|
||||
assert isinstance(ser.cat, CategoricalAccessor)
|
||||
|
||||
invalid = Series([1])
|
||||
with pytest.raises(AttributeError, match="only use .cat accessor"):
|
||||
invalid.cat
|
||||
assert not hasattr(invalid, "cat")
|
||||
|
||||
def test_cat_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
cat = Series(list("aabbcde")).astype("category")
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
cat.cat.xlabel = "a"
|
||||
|
||||
def test_categorical_delegations(self):
|
||||
# invalid accessor
|
||||
msg = r"Can only use \.cat accessor with a 'category' dtype"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat()
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(["a", "b", "c"]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(np.arange(5.0)).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([Timestamp("20130101")]).cat
|
||||
|
||||
# Series should delegate calls to '.categories', '.codes', '.ordered'
|
||||
# and the methods '.set_categories()' 'drop_unused_categories()' to the
|
||||
# categorical
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
ser = ser.cat.rename_categories([1, 2, 3])
|
||||
exp_categories = Index([1, 2, 3])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
|
||||
exp_codes = Series([0, 1, 2, 0], dtype="int8")
|
||||
tm.assert_series_equal(ser.cat.codes, exp_codes)
|
||||
|
||||
assert ser.cat.ordered
|
||||
ser = ser.cat.as_unordered()
|
||||
assert not ser.cat.ordered
|
||||
|
||||
ser = ser.cat.as_ordered()
|
||||
assert ser.cat.ordered
|
||||
|
||||
# reorder
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
ser = ser.cat.set_categories(["c", "b", "a"])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
|
||||
|
||||
# remove unused categories
|
||||
ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
|
||||
exp_categories = Index(["a", "b"])
|
||||
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
|
||||
ser = ser.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
|
||||
|
||||
# This method is likely to be confused, so test that it raises an error
|
||||
# on wrong inputs:
|
||||
msg = "'Series' object has no attribute 'set_categories'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.set_categories([4, 3, 2, 1])
|
||||
|
||||
# right: ser.cat.set_categories([4,3,2,1])
|
||||
|
||||
# GH#18862 (let Series.cat.rename_categories take callables)
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
result = ser.cat.rename_categories(lambda x: x.upper())
|
||||
expected = Series(
|
||||
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
date_range("1/1/2015", periods=5),
|
||||
date_range("1/1/2015", periods=5, tz="MET"),
|
||||
period_range("1/1/2015", freq="D", periods=5),
|
||||
timedelta_range("1 days", "10 days"),
|
||||
],
|
||||
)
|
||||
def test_dt_accessor_api_for_categorical(self, idx):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
|
||||
ser = Series(idx)
|
||||
cat = ser.astype("category")
|
||||
|
||||
# only testing field (like .day)
|
||||
# and bool (is_month_start)
|
||||
attr_names = type(ser._values)._datetimelike_ops
|
||||
|
||||
assert isinstance(cat.dt, Properties)
|
||||
|
||||
special_func_defs = [
|
||||
("strftime", ("%Y-%m-%d",), {}),
|
||||
("round", ("D",), {}),
|
||||
("floor", ("D",), {}),
|
||||
("ceil", ("D",), {}),
|
||||
("asfreq", ("D",), {}),
|
||||
("as_unit", ("s"), {}),
|
||||
]
|
||||
if idx.dtype == "M8[ns]":
|
||||
# exclude dt64tz since that is already localized and would raise
|
||||
tup = ("tz_localize", ("UTC",), {})
|
||||
special_func_defs.append(tup)
|
||||
elif idx.dtype.kind == "M":
|
||||
# exclude dt64 since that is not localized so would raise
|
||||
tup = ("tz_convert", ("EST",), {})
|
||||
special_func_defs.append(tup)
|
||||
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
_ignore_names = ["components", "tz_localize", "tz_convert"]
|
||||
|
||||
func_names = [
|
||||
fname
|
||||
for fname in dir(ser.dt)
|
||||
if not (
|
||||
fname.startswith("_")
|
||||
or fname in attr_names
|
||||
or fname in _special_func_names
|
||||
or fname in _ignore_names
|
||||
)
|
||||
]
|
||||
|
||||
func_defs = [(fname, (), {}) for fname in func_names]
|
||||
func_defs.extend(
|
||||
f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
|
||||
)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
warn_cls = []
|
||||
if func == "to_period" and getattr(idx, "tz", None) is not None:
|
||||
# dropping TZ
|
||||
warn_cls.append(UserWarning)
|
||||
if func == "to_pydatetime":
|
||||
# deprecated to return Index[object]
|
||||
warn_cls.append(FutureWarning)
|
||||
if warn_cls:
|
||||
warn_cls = tuple(warn_cls)
|
||||
else:
|
||||
warn_cls = None
|
||||
with tm.assert_produces_warning(warn_cls):
|
||||
res = getattr(cat.dt, func)(*args, **kwargs)
|
||||
exp = getattr(ser.dt, func)(*args, **kwargs)
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
for attr in attr_names:
|
||||
res = getattr(cat.dt, attr)
|
||||
exp = getattr(ser.dt, attr)
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
def test_dt_accessor_api_for_categorical_invalid(self):
|
||||
invalid = Series([1, 2, 3]).astype("category")
|
||||
msg = "Can only use .dt accessor with datetimelike"
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
invalid.dt
|
||||
assert not hasattr(invalid, "str")
|
||||
|
||||
def test_set_categories_setitem(self):
|
||||
# GH#43334
|
||||
|
||||
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
|
||||
|
||||
df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"])
|
||||
df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"])
|
||||
|
||||
# values should not be coerced to NaN
|
||||
assert list(df["Sex"]) == ["female", "male", "male"]
|
||||
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
|
||||
|
||||
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
|
||||
df["Survived"] = Categorical(
|
||||
df["Survived"], categories=["No", "Yes"], ordered=False
|
||||
)
|
||||
|
||||
# values should not be coerced to NaN
|
||||
assert list(df["Sex"]) == ["female", "male", "male"]
|
||||
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
|
||||
|
||||
def test_categorical_of_booleans_is_boolean(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/46313
|
||||
df = DataFrame(
|
||||
{"int_cat": [1, 2, 3], "bool_cat": [True, False, False]}, dtype="category"
|
||||
)
|
||||
value = df["bool_cat"].cat.categories.dtype
|
||||
expected = np.dtype(np.bool_)
|
||||
assert value is expected
|
@ -0,0 +1,843 @@
|
||||
import calendar
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
from pandas.errors import SettingWithCopyError
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer_dtype,
|
||||
is_list_like,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
PeriodArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
|
||||
ok_for_period = PeriodArray._datetimelike_ops
|
||||
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
|
||||
ok_for_dt = DatetimeArray._datetimelike_ops
|
||||
ok_for_dt_methods = [
|
||||
"to_period",
|
||||
"to_pydatetime",
|
||||
"tz_localize",
|
||||
"tz_convert",
|
||||
"normalize",
|
||||
"strftime",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"day_name",
|
||||
"month_name",
|
||||
"isocalendar",
|
||||
"as_unit",
|
||||
]
|
||||
ok_for_td = TimedeltaArray._datetimelike_ops
|
||||
ok_for_td_methods = [
|
||||
"components",
|
||||
"to_pytimedelta",
|
||||
"total_seconds",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"as_unit",
|
||||
]
|
||||
|
||||
|
||||
def get_dir(ser):
|
||||
# check limited display api
|
||||
results = [r for r in ser.dt.__dir__() if not r.startswith("_")]
|
||||
return sorted(set(results))
|
||||
|
||||
|
||||
class TestSeriesDatetimeValues:
|
||||
def _compare(self, ser, name):
|
||||
# GH 7207, 11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
def get_expected(ser, prop):
|
||||
result = getattr(Index(ser._values), prop)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype("int64")
|
||||
elif not is_list_like(result) or isinstance(result, DataFrame):
|
||||
return result
|
||||
return Series(result, index=ser.index, name=ser.name)
|
||||
|
||||
left = getattr(ser.dt, name)
|
||||
right = get_expected(ser, name)
|
||||
if not (is_list_like(left) and is_list_like(right)):
|
||||
assert left == right
|
||||
elif isinstance(left, DataFrame):
|
||||
tm.assert_frame_equal(left, right)
|
||||
else:
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "s", "ms"])
|
||||
def test_dt_namespace_accessor_datetime64(self, freq):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# datetimeindex
|
||||
dti = date_range("20130101", periods=5, freq=freq)
|
||||
ser = Series(dti, name="xxx")
|
||||
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.tz_localize("US/Eastern")
|
||||
exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "US/Eastern"
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
|
||||
|
||||
# let's localize, then convert
|
||||
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
|
||||
exp_values = (
|
||||
DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern")
|
||||
)
|
||||
expected = Series(exp_values, index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_namespace_accessor_datetime64tz(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# datetimeindex with tz
|
||||
dti = date_range("20130101", periods=5, tz="US/Eastern")
|
||||
ser = Series(dti, name="xxx")
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.tz_convert("CET")
|
||||
expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "CET"
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
|
||||
|
||||
def test_dt_namespace_accessor_timedelta(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# timedelta index
|
||||
cases = [
|
||||
Series(
|
||||
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
|
||||
),
|
||||
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
|
||||
Series(
|
||||
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
|
||||
name="xxx",
|
||||
),
|
||||
]
|
||||
for ser in cases:
|
||||
for prop in ok_for_td:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_td_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
result = ser.dt.components
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_index_equal(result.index, ser.index)
|
||||
|
||||
result = ser.dt.to_pytimedelta()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.total_seconds()
|
||||
assert isinstance(result, Series)
|
||||
assert result.dtype == "float64"
|
||||
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq
|
||||
|
||||
def test_dt_namespace_accessor_period(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# periodindex
|
||||
pi = period_range("20130101", periods=5, freq="D")
|
||||
ser = Series(pi, name="xxx")
|
||||
|
||||
for prop in ok_for_period:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_period_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == PeriodIndex(ser.values).freq
|
||||
|
||||
def test_dt_namespace_accessor_index_and_values(self):
|
||||
# both
|
||||
index = date_range("20130101", periods=3, freq="D")
|
||||
dti = date_range("20140204", periods=3, freq="s")
|
||||
ser = Series(dti, index=index, name="xxx")
|
||||
exp = Series(
|
||||
np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx"
|
||||
)
|
||||
tm.assert_series_equal(ser.dt.year, exp)
|
||||
|
||||
exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.month, exp)
|
||||
|
||||
exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.second, exp)
|
||||
|
||||
exp = Series([ser.iloc[0]] * 3, index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.normalize(), exp)
|
||||
|
||||
def test_dt_accessor_limited_display_api(self):
|
||||
# tznaive
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
|
||||
# tzaware
|
||||
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
|
||||
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
|
||||
# Period
|
||||
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
ser = Series(idx)
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(
|
||||
results, sorted(set(ok_for_period + ok_for_period_methods))
|
||||
)
|
||||
|
||||
def test_dt_accessor_ambiguous_freq_conversions(self):
|
||||
# GH#11295
|
||||
# ambiguous time error on the conversions
|
||||
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
|
||||
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
|
||||
exp_values = date_range(
|
||||
"2015-01-01", "2016-01-01", freq="min", tz="UTC"
|
||||
).tz_convert("America/Chicago")
|
||||
# freq not preserved by tz_localize above
|
||||
exp_values = exp_values._with_freq(None)
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write):
|
||||
# no setting allowed
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
with pytest.raises(ValueError, match="modifications"):
|
||||
ser.dt.hour = 5
|
||||
|
||||
# trying to set a copy
|
||||
msg = "modifications to a property of a datetimelike.+not supported"
|
||||
with pd.option_context("chained_assignment", "raise"):
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
ser.dt.hour[0] = 5
|
||||
elif warn_copy_on_write:
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="ChainedAssignmentError"
|
||||
):
|
||||
ser.dt.hour[0] = 5
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
ser.dt.hour[0] = 5
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, dates",
|
||||
[
|
||||
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
|
||||
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
|
||||
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
|
||||
],
|
||||
)
|
||||
def test_dt_round(self, method, dates):
|
||||
# round
|
||||
ser = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = getattr(ser.dt, method)("D")
|
||||
expected = Series(pd.to_datetime(dates), name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_round_tz(self):
|
||||
ser = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
|
||||
|
||||
exp_values = pd.to_datetime(
|
||||
["2012-01-01", "2012-01-01", "2012-01-01"]
|
||||
).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
|
||||
def test_dt_round_tz_ambiguous(self, method):
|
||||
# GH 18946 round near "fall back" DST
|
||||
df1 = DataFrame(
|
||||
[
|
||||
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
|
||||
],
|
||||
columns=["date"],
|
||||
)
|
||||
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
|
||||
# infer
|
||||
result = getattr(df1.date.dt, method)("h", ambiguous="infer")
|
||||
expected = df1["date"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# bool-array
|
||||
result = getattr(df1.date.dt, method)("h", ambiguous=[True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# NaT
|
||||
result = getattr(df1.date.dt, method)("h", ambiguous="NaT")
|
||||
expected = df1["date"].copy()
|
||||
expected.iloc[0:2] = pd.NaT
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# raise
|
||||
with tm.external_error_raised(pytz.AmbiguousTimeError):
|
||||
getattr(df1.date.dt, method)("h", ambiguous="raise")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, ts_str, freq",
|
||||
[
|
||||
["ceil", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["round", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["floor", "2018-03-11 03:01:00-0500", "2h"],
|
||||
],
|
||||
)
|
||||
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
|
||||
# GH 23324 round near "spring forward" DST
|
||||
ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
|
||||
result = getattr(ser.dt, method)(freq, nonexistent="shift_forward")
|
||||
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(ser.dt, method)(freq, nonexistent="NaT")
|
||||
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
|
||||
getattr(ser.dt, method)(freq, nonexistent="raise")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["ns", "us", "1000us"])
|
||||
def test_dt_round_nonnano_higher_resolution_no_op(self, freq):
|
||||
# GH 52761
|
||||
ser = Series(
|
||||
["2020-05-31 08:00:00", "2000-12-31 04:00:05", "1800-03-14 07:30:20"],
|
||||
dtype="datetime64[ms]",
|
||||
)
|
||||
expected = ser.copy()
|
||||
result = ser.dt.round(freq)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert not np.shares_memory(ser.array._ndarray, result.array._ndarray)
|
||||
|
||||
def test_dt_namespace_accessor_categorical(self):
|
||||
# GH 19468
|
||||
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
|
||||
ser = Series(pd.Categorical(dti), name="foo")
|
||||
result = ser.dt.year
|
||||
expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
|
||||
# GH 27952
|
||||
tz = tz_aware_fixture
|
||||
datetimes = Series(
|
||||
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = categorical.dt.tz_localize(tz)
|
||||
expected = datetimes.dt.tz_localize(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
|
||||
# GH 27952
|
||||
tz = tz_aware_fixture
|
||||
datetimes = Series(
|
||||
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = categorical.dt.tz_convert(tz)
|
||||
expected = datetimes.dt.tz_convert(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
|
||||
def test_dt_other_accessors_categorical(self, accessor):
|
||||
# GH 27952
|
||||
datetimes = Series(
|
||||
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = getattr(categorical.dt, accessor)
|
||||
expected = getattr(datetimes.dt, accessor)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"))
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
ser.dt.xlabel = "a"
|
||||
|
||||
# error: Unsupported operand types for + ("List[None]" and "List[str]")
|
||||
@pytest.mark.parametrize(
|
||||
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
|
||||
)
|
||||
def test_dt_accessor_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
expected_months = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
|
||||
english_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
||||
name = name.capitalize()
|
||||
assert ser.dt.day_name(locale=time_locale)[day] == name
|
||||
assert ser.dt.day_name(locale=None)[day] == eng_name
|
||||
ser = pd.concat([ser, Series([pd.NaT])])
|
||||
assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1])
|
||||
|
||||
ser = Series(date_range(freq="ME", start="2012", end="2013"))
|
||||
result = ser.dt.month_name(locale=time_locale)
|
||||
expected = Series([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around https://github.com/pandas-dev/pandas/issues/22342
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for s_date, expected in zip(ser, expected_months):
|
||||
result = s_date.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", expected)
|
||||
|
||||
assert result == expected
|
||||
|
||||
ser = pd.concat([ser, Series([pd.NaT])])
|
||||
assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
|
||||
|
||||
def test_strftime(self):
|
||||
# GH 10086
|
||||
ser = Series(date_range("20130101", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/04 11-22-33",
|
||||
"2015/02/05 11-22-33",
|
||||
"2015/02/06 11-22-33",
|
||||
"2015/02/07 11-22-33",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(period_range("20130101", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/03 11-22-34",
|
||||
"2015/02/03 11-22-35",
|
||||
"2015/02/03 11-22-36",
|
||||
"2015/02/03 11-22-37",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_dt64_days(self):
|
||||
ser = Series(date_range("20130101", periods=5))
|
||||
ser.iloc[0] = pd.NaT
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
[np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
datetime_index = date_range("20150301", periods=5)
|
||||
result = datetime_index.strftime("%Y/%m/%d")
|
||||
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype=np.object_,
|
||||
)
|
||||
# dtype may be S10 or U10 depending on python version
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_strftime_period_days(self, using_infer_string):
|
||||
period_index = period_range("20150301", periods=5)
|
||||
result = period_index.strftime("%Y/%m/%d")
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype="=U10",
|
||||
)
|
||||
if using_infer_string:
|
||||
expected = expected.astype("string[pyarrow_numpy]")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_strftime_dt64_microsecond_resolution(self):
|
||||
ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
|
||||
result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_period_hours(self):
|
||||
ser = Series(period_range("20130101", periods=4, freq="h"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00",
|
||||
"2013/01/01 01:00:00",
|
||||
"2013/01/01 02:00:00",
|
||||
"2013/01/01 03:00:00",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_period_minutes(self):
|
||||
ser = Series(period_range("20130101", periods=4, freq="ms"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00.000",
|
||||
"2013/01/01 00:00:00.001",
|
||||
"2013/01/01 00:00:00.002",
|
||||
"2013/01/01 00:00:00.003",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
DatetimeIndex(["2019-01-01", pd.NaT]),
|
||||
PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
|
||||
],
|
||||
)
|
||||
def test_strftime_nat(self, data):
|
||||
# GH 29578
|
||||
ser = Series(data)
|
||||
result = ser.dt.strftime("%Y-%m-%d")
|
||||
expected = Series(["2019-01-01", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data", [DatetimeIndex([pd.NaT]), PeriodIndex([pd.NaT], dtype="period[D]")]
|
||||
)
|
||||
def test_strftime_all_nat(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/45858
|
||||
ser = Series(data)
|
||||
with tm.assert_produces_warning(None):
|
||||
result = ser.dt.strftime("%Y-%m-%d")
|
||||
expected = Series([np.nan], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_valid_dt_with_missing_values(self):
|
||||
# GH 8689
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"))
|
||||
ser.iloc[2] = pd.NaT
|
||||
|
||||
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
|
||||
expected = getattr(ser.dt, attr).copy()
|
||||
expected.iloc[2] = np.nan
|
||||
result = getattr(ser.dt, attr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.dt.date
|
||||
expected = Series(
|
||||
[
|
||||
date(2013, 1, 1),
|
||||
date(2013, 1, 2),
|
||||
pd.NaT,
|
||||
date(2013, 1, 4),
|
||||
date(2013, 1, 5),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.dt.time
|
||||
expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.indexes.accessors import (
|
||||
CombinedDatetimelikeProperties,
|
||||
DatetimeProperties,
|
||||
)
|
||||
|
||||
assert Series.dt is CombinedDatetimelikeProperties
|
||||
|
||||
ser = Series(date_range("2000-01-01", periods=3))
|
||||
assert isinstance(ser.dt, DatetimeProperties)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(np.arange(5)),
|
||||
Series(list("abcde")),
|
||||
Series(np.random.default_rng(2).standard_normal(5)),
|
||||
],
|
||||
)
|
||||
def test_dt_accessor_invalid(self, ser):
|
||||
# GH#9322 check that series with incorrect dtypes don't have attr
|
||||
with pytest.raises(AttributeError, match="only use .dt accessor"):
|
||||
ser.dt
|
||||
assert not hasattr(ser, "dt")
|
||||
|
||||
def test_dt_accessor_updates_on_inplace(self):
|
||||
ser = Series(date_range("2018-01-01", periods=10))
|
||||
ser[2] = None
|
||||
return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True)
|
||||
assert return_value is None
|
||||
result = ser.dt.date
|
||||
assert result[0] == result[2]
|
||||
|
||||
def test_date_tz(self):
|
||||
# GH11757
|
||||
rng = DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
|
||||
tz="US/Eastern",
|
||||
)
|
||||
ser = Series(rng)
|
||||
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
|
||||
tm.assert_series_equal(ser.dt.date, expected)
|
||||
tm.assert_series_equal(ser.apply(lambda x: x.date()), expected)
|
||||
|
||||
def test_dt_timetz_accessor(self, tz_naive_fixture):
|
||||
# GH21358
|
||||
tz = maybe_get_tz(tz_naive_fixture)
|
||||
|
||||
dtindex = DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
|
||||
)
|
||||
ser = Series(dtindex)
|
||||
expected = Series(
|
||||
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
|
||||
)
|
||||
result = ser.dt.timetz
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_series, expected_output",
|
||||
[
|
||||
[["2020-01-01"], [[2020, 1, 3]]],
|
||||
[[pd.NaT], [[np.nan, np.nan, np.nan]]],
|
||||
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
|
||||
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.nan, np.nan, np.nan]]],
|
||||
# see GH#36032
|
||||
[["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
|
||||
[["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
|
||||
],
|
||||
)
|
||||
def test_isocalendar(self, input_series, expected_output):
|
||||
result = pd.to_datetime(Series(input_series)).dt.isocalendar()
|
||||
expected_frame = DataFrame(
|
||||
expected_output, columns=["year", "week", "day"], dtype="UInt32"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected_frame)
|
||||
|
||||
def test_hour_index(self):
|
||||
dt_series = Series(
|
||||
date_range(start="2021-01-01", periods=5, freq="h"),
|
||||
index=[2, 6, 7, 8, 11],
|
||||
dtype="category",
|
||||
)
|
||||
result = dt_series.dt.hour
|
||||
expected = Series(
|
||||
[0, 1, 2, 3, 4],
|
||||
dtype="int32",
|
||||
index=[2, 6, 7, 8, 11],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesPeriodValuesDtAccessor:
|
||||
@pytest.mark.parametrize(
|
||||
"input_vals",
|
||||
[
|
||||
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
|
||||
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="h"),
|
||||
Period("2016-01-01 01:00:00", freq="h"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="M"),
|
||||
Period("2016-01-01 00:01:00", freq="M"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="s"),
|
||||
Period("2016-01-01 00:00:01", freq="s"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_end_time_timevalues(self, input_vals):
|
||||
# GH#17157
|
||||
# Check that the time part of the Period is adjusted by end_time
|
||||
# when using the dt accessor on a Series
|
||||
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
|
||||
|
||||
ser = Series(input_vals)
|
||||
result = ser.dt.end_time
|
||||
expected = ser.apply(lambda x: x.end_time)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
|
||||
def test_to_period(self, input_vals):
|
||||
# GH#21205
|
||||
expected = Series([input_vals], dtype="Period[D]")
|
||||
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_normalize_pre_epoch_dates():
|
||||
# GH: 36294
|
||||
ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"]))
|
||||
result = ser.dt.normalize()
|
||||
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_day_attribute_non_nano_beyond_int32():
|
||||
# GH 52386
|
||||
data = np.array(
|
||||
[
|
||||
136457654736252,
|
||||
134736784364431,
|
||||
245345345545332,
|
||||
223432411,
|
||||
2343241,
|
||||
3634548734,
|
||||
23234,
|
||||
],
|
||||
dtype="timedelta64[s]",
|
||||
)
|
||||
ser = Series(data)
|
||||
result = ser.dt.days
|
||||
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,129 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
ArrowDtype,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.compat import pa_version_under11p0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"list_dtype",
|
||||
(
|
||||
pa.list_(pa.int64()),
|
||||
pa.list_(pa.int64(), list_size=3),
|
||||
pa.large_list(pa.int64()),
|
||||
),
|
||||
)
|
||||
def test_list_getitem(list_dtype):
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(list_dtype),
|
||||
)
|
||||
actual = ser.list[1]
|
||||
expected = Series([2, None, None], dtype="int64[pyarrow]")
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_getitem_slice():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
if pa_version_under11p0:
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="List slice not supported by pyarrow "
|
||||
):
|
||||
ser.list[1:None:None]
|
||||
else:
|
||||
actual = ser.list[1:None:None]
|
||||
expected = Series(
|
||||
[[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
|
||||
)
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_len():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
actual = ser.list.len()
|
||||
expected = Series([3, 2, None], dtype=ArrowDtype(pa.int32()))
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_flatten():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
actual = ser.list.flatten()
|
||||
expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_list_getitem_slice_invalid():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
if pa_version_under11p0:
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="List slice not supported by pyarrow "
|
||||
):
|
||||
ser.list[1:None:0]
|
||||
else:
|
||||
with pytest.raises(pa.lib.ArrowInvalid, match=re.escape("`step` must be >= 1")):
|
||||
ser.list[1:None:0]
|
||||
|
||||
|
||||
def test_list_accessor_non_list_dtype():
|
||||
ser = Series(
|
||||
[1, 2, 4],
|
||||
dtype=ArrowDtype(pa.int64()),
|
||||
)
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match=re.escape(
|
||||
"Can only use the '.list' accessor with 'list[pyarrow]' dtype, "
|
||||
"not int64[pyarrow]."
|
||||
),
|
||||
):
|
||||
ser.list[1:None:0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"list_dtype",
|
||||
(
|
||||
pa.list_(pa.int64()),
|
||||
pa.list_(pa.int64(), list_size=3),
|
||||
pa.large_list(pa.int64()),
|
||||
),
|
||||
)
|
||||
def test_list_getitem_invalid_index(list_dtype):
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None, 5], None],
|
||||
dtype=ArrowDtype(list_dtype),
|
||||
)
|
||||
with pytest.raises(pa.lib.ArrowInvalid, match="Index -1 is out of bounds"):
|
||||
ser.list[-1]
|
||||
with pytest.raises(pa.lib.ArrowInvalid, match="Index 5 is out of bounds"):
|
||||
ser.list[5]
|
||||
with pytest.raises(ValueError, match="key must be an int or slice, got str"):
|
||||
ser.list["abc"]
|
||||
|
||||
|
||||
def test_list_accessor_not_iterable():
|
||||
ser = Series(
|
||||
[[1, 2, 3], [4, None], None],
|
||||
dtype=ArrowDtype(pa.list_(pa.int64())),
|
||||
)
|
||||
with pytest.raises(TypeError, match="'ListAccessor' object is not iterable"):
|
||||
iter(ser.list)
|
@ -0,0 +1,9 @@
|
||||
from pandas import Series
|
||||
|
||||
|
||||
class TestSparseAccessor:
|
||||
def test_sparse_accessor_updates_on_inplace(self):
|
||||
ser = Series([1, 1, 2, 3], dtype="Sparse[int]")
|
||||
return_value = ser.drop([0, 1], inplace=True)
|
||||
assert return_value is None
|
||||
assert ser.sparse.density == 1.0
|
@ -0,0 +1,25 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestStrAccessor:
|
||||
def test_str_attribute(self):
|
||||
# GH#9068
|
||||
methods = ["strip", "rstrip", "lstrip"]
|
||||
ser = Series([" jack", "jill ", " jesse ", "frank"])
|
||||
for method in methods:
|
||||
expected = Series([getattr(str, method)(x) for x in ser.values])
|
||||
tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected)
|
||||
|
||||
# str accessor only valid with string values
|
||||
ser = Series(range(5))
|
||||
with pytest.raises(AttributeError, match="only use .str accessor"):
|
||||
ser.str.repeat(2)
|
||||
|
||||
def test_str_accessor_updates_on_inplace(self):
|
||||
ser = Series(list("abc"))
|
||||
return_value = ser.drop([0], inplace=True)
|
||||
assert return_value is None
|
||||
assert len(ser.str.lower()) == 2
|
@ -0,0 +1,196 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat.pyarrow import (
|
||||
pa_version_under11p0,
|
||||
pa_version_under13p0,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
ArrowDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
pc = pytest.importorskip("pyarrow.compute")
|
||||
|
||||
|
||||
def test_struct_accessor_dtypes():
|
||||
ser = Series(
|
||||
[],
|
||||
dtype=ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("string_col", pa.string()),
|
||||
(
|
||||
"struct_col",
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("float_col", pa.float64()),
|
||||
]
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
),
|
||||
)
|
||||
actual = ser.struct.dtypes
|
||||
expected = Series(
|
||||
[
|
||||
ArrowDtype(pa.int64()),
|
||||
ArrowDtype(pa.string()),
|
||||
ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("float_col", pa.float64()),
|
||||
]
|
||||
)
|
||||
),
|
||||
],
|
||||
index=Index(["int_col", "string_col", "struct_col"]),
|
||||
)
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
|
||||
def test_struct_accessor_field():
|
||||
index = Index([-100, 42, 123])
|
||||
ser = Series(
|
||||
[
|
||||
{"rice": 1.0, "maize": -1, "wheat": "a"},
|
||||
{"rice": 2.0, "maize": 0, "wheat": "b"},
|
||||
{"rice": 3.0, "maize": 1, "wheat": "c"},
|
||||
],
|
||||
dtype=ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("rice", pa.float64()),
|
||||
("maize", pa.int64()),
|
||||
("wheat", pa.string()),
|
||||
]
|
||||
)
|
||||
),
|
||||
index=index,
|
||||
)
|
||||
by_name = ser.struct.field("maize")
|
||||
by_name_expected = Series(
|
||||
[-1, 0, 1],
|
||||
dtype=ArrowDtype(pa.int64()),
|
||||
index=index,
|
||||
name="maize",
|
||||
)
|
||||
tm.assert_series_equal(by_name, by_name_expected)
|
||||
|
||||
by_index = ser.struct.field(2)
|
||||
by_index_expected = Series(
|
||||
["a", "b", "c"],
|
||||
dtype=ArrowDtype(pa.string()),
|
||||
index=index,
|
||||
name="wheat",
|
||||
)
|
||||
tm.assert_series_equal(by_index, by_index_expected)
|
||||
|
||||
|
||||
def test_struct_accessor_field_with_invalid_name_or_index():
|
||||
ser = Series([], dtype=ArrowDtype(pa.struct([("field", pa.int64())])))
|
||||
|
||||
with pytest.raises(ValueError, match="name_or_index must be an int, str,"):
|
||||
ser.struct.field(1.1)
|
||||
|
||||
|
||||
@pytest.mark.skipif(pa_version_under11p0, reason="pyarrow>=11.0.0 required")
|
||||
def test_struct_accessor_explode():
|
||||
index = Index([-100, 42, 123])
|
||||
ser = Series(
|
||||
[
|
||||
{"painted": 1, "snapping": {"sea": "green"}},
|
||||
{"painted": 2, "snapping": {"sea": "leatherback"}},
|
||||
{"painted": 3, "snapping": {"sea": "hawksbill"}},
|
||||
],
|
||||
dtype=ArrowDtype(
|
||||
pa.struct(
|
||||
[
|
||||
("painted", pa.int64()),
|
||||
("snapping", pa.struct([("sea", pa.string())])),
|
||||
]
|
||||
)
|
||||
),
|
||||
index=index,
|
||||
)
|
||||
actual = ser.struct.explode()
|
||||
expected = DataFrame(
|
||||
{
|
||||
"painted": Series([1, 2, 3], index=index, dtype=ArrowDtype(pa.int64())),
|
||||
"snapping": Series(
|
||||
[{"sea": "green"}, {"sea": "leatherback"}, {"sea": "hawksbill"}],
|
||||
index=index,
|
||||
dtype=ArrowDtype(pa.struct([("sea", pa.string())])),
|
||||
),
|
||||
},
|
||||
)
|
||||
tm.assert_frame_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid",
|
||||
[
|
||||
pytest.param(Series([1, 2, 3], dtype="int64"), id="int64"),
|
||||
pytest.param(
|
||||
Series(["a", "b", "c"], dtype="string[pyarrow]"), id="string-pyarrow"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_struct_accessor_api_for_invalid(invalid):
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match=re.escape(
|
||||
"Can only use the '.struct' accessor with 'struct[pyarrow]' dtype, "
|
||||
f"not {invalid.dtype}."
|
||||
),
|
||||
):
|
||||
invalid.struct
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["indices", "name"],
|
||||
[
|
||||
(0, "int_col"),
|
||||
([1, 2], "str_col"),
|
||||
(pc.field("int_col"), "int_col"),
|
||||
("int_col", "int_col"),
|
||||
(b"string_col", b"string_col"),
|
||||
([b"string_col"], "string_col"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
|
||||
def test_struct_accessor_field_expanded(indices, name):
|
||||
arrow_type = pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
(
|
||||
"struct_col",
|
||||
pa.struct(
|
||||
[
|
||||
("int_col", pa.int64()),
|
||||
("float_col", pa.float64()),
|
||||
("str_col", pa.string()),
|
||||
]
|
||||
),
|
||||
),
|
||||
(b"string_col", pa.string()),
|
||||
]
|
||||
)
|
||||
|
||||
data = pa.array([], type=arrow_type)
|
||||
ser = Series(data, dtype=ArrowDtype(arrow_type))
|
||||
expected = pc.struct_field(data, indices)
|
||||
result = ser.struct.field(indices)
|
||||
tm.assert_equal(result.array._pa_array.combine_chunks(), expected)
|
||||
assert result.name == name
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,499 @@
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import re
|
||||
|
||||
from dateutil.tz import (
|
||||
gettz,
|
||||
tzutc,
|
||||
)
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert s[48] == 48
|
||||
assert s["1/2/2009"] == 48
|
||||
assert s["2009-1-2"] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s["2009-1-3"]
|
||||
tm.assert_series_equal(
|
||||
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||||
)
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
msg = "Series.__setitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s[48] = -1
|
||||
assert s.iloc[48] == -1
|
||||
s["1/2/2009"] = -2
|
||||
assert s.iloc[48] == -2
|
||||
s["1/2/2009":"2009-06-05"] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
|
||||
def test_getitem_setitem_datetime_tz(tz_source):
|
||||
if tz_source == "pytz":
|
||||
tzget = pytz.timezone
|
||||
else:
|
||||
# handle special case for utc in dateutil
|
||||
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="h", tz=tzget("US/Eastern"))
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
|
||||
dt = dt.to_pydatetime()
|
||||
result[dt] = 0
|
||||
result[dt] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="h", tz="US/Eastern")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# But we do not give datetimes a pass on tzawareness compat
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
naive = datetime(1990, 1, 1, 4)
|
||||
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
# GH#36148 as of 2.0 we require tzawareness-compat
|
||||
ts[key]
|
||||
|
||||
result = ts.copy()
|
||||
# GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
|
||||
# so setting it as a new key casts to object rather than matching
|
||||
# rng[4]
|
||||
result[naive] = ts.iloc[4]
|
||||
assert result.index.dtype == object
|
||||
tm.assert_index_equal(result.index[:-1], rng.astype(object))
|
||||
assert result.index[-1] == naive
|
||||
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
ts[naive : datetime(1990, 1, 1, 7)]
|
||||
|
||||
result = ts.copy()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
result[naive : datetime(1990, 1, 1, 7)] = 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
result[naive : datetime(1990, 1, 1, 7)] = 99
|
||||
# the __setitems__ here failed, so result should still match ts
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = naive
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = Timestamp(naive).tz_localize(rng.tzinfo)
|
||||
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="h")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
index = date_range("1/1/2000", "1/7/2000")
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp("1/8/2000")
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
# test duplicates in time series
|
||||
|
||||
|
||||
def test_indexing_with_duplicate_datetimeindex(
|
||||
rand_series_with_duplicate_datetimeindex,
|
||||
):
|
||||
ts = rand_series_with_duplicate_datetimeindex
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
tm.assert_almost_equal(result, expected.iloc[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
tm.assert_series_equal(cp, expected)
|
||||
|
||||
key = datetime(2000, 1, 6)
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
ts[key]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_loc_getitem_over_size_cutoff(monkeypatch):
|
||||
# #1821
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = timedelta(seconds=1)
|
||||
half_sec = timedelta(microseconds=500000)
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(dates), 4)),
|
||||
index=dates,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||||
# GH 27136
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
n = 1100
|
||||
idx = period_range("1/1/2000", freq="min", periods=n)
|
||||
assert idx._engine.over_size_threshold
|
||||
|
||||
s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||||
|
||||
pos = n - 1
|
||||
timestamp = idx[pos]
|
||||
assert timestamp in s.index
|
||||
|
||||
# it works!
|
||||
s[timestamp]
|
||||
assert len(s.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||||
ts = Series(np.random.default_rng(2).random(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for key in [
|
||||
slice("2011-01-01", "2011-01-15"),
|
||||
slice("2010-12-30", "2011-01-15"),
|
||||
slice("2011-01-01", "2011-01-16"),
|
||||
# partial ranges
|
||||
slice("2011-01-01", "2011-01-6"),
|
||||
slice("2011-01-06", "2011-01-8"),
|
||||
slice("2011-01-06", "2011-01-12"),
|
||||
]:
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
compare(key)
|
||||
|
||||
# single values
|
||||
result = ts2["2011"].sort_index()
|
||||
expected = ts["2011"]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_unordered2():
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.default_rng(2).permutation(20))
|
||||
|
||||
result = ts["2005"]
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq="ME")
|
||||
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
result = ts["2001"]
|
||||
tm.assert_series_equal(result, ts.iloc[:12])
|
||||
|
||||
df = DataFrame({"A": ts.copy()})
|
||||
|
||||
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
|
||||
# like any other key, so raises
|
||||
with pytest.raises(KeyError, match="2001"):
|
||||
df["2001"]
|
||||
|
||||
# setting
|
||||
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||||
expected = ts.copy()
|
||||
expected.iloc[:12] = 1
|
||||
ts["2001"] = 1
|
||||
tm.assert_series_equal(ts, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected.iloc[:12, 0] = 1
|
||||
df.loc["2001", "A"] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_getitem_str_month_with_datetimeindex():
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="h")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="s")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_year_with_datetimeindex():
|
||||
idx = [
|
||||
Timestamp("2013-05-31 00:00"),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||||
]
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_second_with_datetimeindex():
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((5, 5)),
|
||||
columns=["open", "high", "low", "close", "volume"],
|
||||
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||||
)
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df["2012-01-02 18:01:02"]
|
||||
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
def test_compare_datetime_with_all_none():
|
||||
# GH#54870
|
||||
ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||||
ser2 = Series([None, None])
|
||||
result = ser > ser2
|
||||
expected = Series([False, False])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,70 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDelItem:
|
||||
def test_delitem(self):
|
||||
# GH#5542
|
||||
# should delete the item inplace
|
||||
s = Series(range(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(range(1, 5), index=range(1, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(range(2, 5), index=range(2, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
s[0] = 1
|
||||
tm.assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
|
||||
def test_delitem_object_index(self, using_infer_string):
|
||||
# Index(dtype=object)
|
||||
dtype = "string[pyarrow_numpy]" if using_infer_string else object
|
||||
s = Series(1, index=Index(["a"], dtype=dtype))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
|
||||
s["a"] = 1
|
||||
tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
|
||||
|
||||
def test_delitem_missing_key(self):
|
||||
# empty
|
||||
s = Series(dtype=object)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
def test_delitem_extension_dtype(self):
|
||||
# GH#40386
|
||||
# DatetimeTZDtype
|
||||
dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
ser = Series(dti)
|
||||
|
||||
expected = ser[[0, 2]]
|
||||
del ser[1]
|
||||
assert ser.dtype == dti.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
# PeriodDtype
|
||||
pi = dti.tz_localize(None).to_period("D")
|
||||
ser = Series(pi)
|
||||
|
||||
expected = ser[:2]
|
||||
del ser[2]
|
||||
assert ser.dtype == pi.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
@ -0,0 +1,238 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
),
|
||||
index=Index(
|
||||
[
|
||||
25.0,
|
||||
36.0,
|
||||
49.0,
|
||||
64.0,
|
||||
81.0,
|
||||
100.0,
|
||||
121.0,
|
||||
144.0,
|
||||
169.0,
|
||||
196.0,
|
||||
1225.0,
|
||||
1296.0,
|
||||
1369.0,
|
||||
1444.0,
|
||||
1521.0,
|
||||
1600.0,
|
||||
1681.0,
|
||||
1764.0,
|
||||
1849.0,
|
||||
1936.0,
|
||||
],
|
||||
dtype=np.float64,
|
||||
),
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default="Missing")
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
|
||||
def test_get_nan(float_numpy_dtype):
|
||||
# GH 8569
|
||||
s = Index(range(10), dtype=float_numpy_dtype).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default="Missing") == "Missing"
|
||||
|
||||
|
||||
def test_get_nan_multiple(float_numpy_dtype):
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = Index(range(10), dtype=float_numpy_dtype).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [2, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
|
||||
def test_get_with_default():
|
||||
# GH#7725
|
||||
d0 = ["a", "b", "c", "d"]
|
||||
d1 = np.arange(4, dtype="int64")
|
||||
|
||||
for data, index in ((d0, d1), (d1, d0)):
|
||||
s = Series(data, index=index)
|
||||
for i, d in zip(index, data):
|
||||
assert s.get(i) == d
|
||||
assert s.get(i, d) == d
|
||||
assert s.get(i, "z") == d
|
||||
|
||||
assert s.get("e", "z") == "z"
|
||||
assert s.get("e", "e") == "e"
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
warn = None
|
||||
if index is d0:
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
assert s.get(10, "z") == "z"
|
||||
assert s.get(10, 10) == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[
|
||||
np.random.default_rng(2).standard_normal(10),
|
||||
DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize(
|
||||
tz="US/Eastern"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_with_ea(arr):
|
||||
# GH#21260
|
||||
ser = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert ser.get(4) == ser.iloc[2]
|
||||
|
||||
result = ser.get([4, 6])
|
||||
expected = ser.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get(slice(2))
|
||||
expected = ser.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert ser.get(-1) is None
|
||||
assert ser.get(ser.index.max() + 1) is None
|
||||
|
||||
ser = Series(arr[:6], index=list("abcdef"))
|
||||
assert ser.get("c") == ser.iloc[2]
|
||||
|
||||
result = ser.get(slice("b", "d"))
|
||||
expected = ser.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get("Z")
|
||||
assert result is None
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(4) == ser.iloc[4]
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(-1) == ser.iloc[-1]
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(len(ser)) is None
|
||||
|
||||
# GH#21257
|
||||
ser = Series(arr)
|
||||
ser2 = ser[::2]
|
||||
assert ser2.get(1) is None
|
||||
|
||||
|
||||
def test_getitem_get(string_series, object_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
|
||||
for obj in [string_series, object_series]:
|
||||
idx = obj.index[5]
|
||||
|
||||
assert obj[idx] == obj.get(idx)
|
||||
assert obj[idx] == obj.iloc[5]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert string_series.get(-1) == string_series.get(string_series.index[-1])
|
||||
assert string_series.iloc[5] == string_series.get(string_series.index[5])
|
||||
|
||||
|
||||
def test_get_none():
|
||||
# GH#5652
|
||||
s1 = Series(dtype=object)
|
||||
s2 = Series(dtype=object, index=list("abc"))
|
||||
for s in [s1, s2]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
@ -0,0 +1,735 @@
|
||||
"""
|
||||
Series.__getitem__ test classes are organized by the type of key passed.
|
||||
"""
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
conversion,
|
||||
timezones,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
class TestSeriesGetitemScalars:
|
||||
def test_getitem_object_index_float_string(self):
|
||||
# GH#17286
|
||||
ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0]))
|
||||
assert ser["a"] == 1
|
||||
assert ser[1.0] == 1
|
||||
|
||||
def test_getitem_float_keys_tuple_values(self):
|
||||
# see GH#13509
|
||||
|
||||
# unique Index
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
|
||||
result = ser[0.0]
|
||||
assert result == (1, 1)
|
||||
|
||||
# non-unique Index
|
||||
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
|
||||
|
||||
result = ser[0.0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_unrecognized_scalar(self):
|
||||
# GH#32684 a scalar key that is not recognized by lib.is_scalar
|
||||
|
||||
# a series that might be produced via `frame.dtypes`
|
||||
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
|
||||
|
||||
key = ser.index[1]
|
||||
|
||||
result = ser[key]
|
||||
assert result == 2
|
||||
|
||||
def test_getitem_negative_out_of_bounds(self):
|
||||
ser = Series(["a"] * 10, index=["a"] * 10)
|
||||
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
|
||||
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
ser[-11]
|
||||
|
||||
def test_getitem_out_of_bounds_indexerror(self, datetime_series):
|
||||
# don't segfault, GH#495
|
||||
msg = r"index \d+ is out of bounds for axis 0 with size \d+"
|
||||
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
datetime_series[len(datetime_series)]
|
||||
|
||||
def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
|
||||
# GH#917
|
||||
# With a RangeIndex, an int key gives a KeyError
|
||||
ser = Series([], dtype=object)
|
||||
with pytest.raises(KeyError, match="-1"):
|
||||
ser[-1]
|
||||
|
||||
def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype):
|
||||
dtype = any_int_numpy_dtype
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(6),
|
||||
index=Index([0, 0, 1, 1, 2, 2], dtype=dtype),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
# not monotonic
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(6), index=[2, 2, 0, 0, 1, 1]
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
def test_getitem_int64(self, datetime_series):
|
||||
idx = np.int64(5)
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = datetime_series[idx]
|
||||
assert res == datetime_series.iloc[5]
|
||||
|
||||
def test_getitem_full_range(self):
|
||||
# github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
|
||||
ser = Series(range(5), index=list(range(5)))
|
||||
result = ser[list(range(5))]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with DatetimeIndex
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(
|
||||
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="h", tz=tzstr
|
||||
)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = conversion.localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range("1/1/2000", periods=10, tz=tz)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
result = ser["1/3/2000"]
|
||||
tm.assert_almost_equal(result, ser.iloc[2])
|
||||
|
||||
def test_getitem_time_object(self):
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
mask = (rng.hour == 9) & (rng.minute == 30)
|
||||
result = ts[time(9, 30)]
|
||||
expected = ts[mask]
|
||||
result.index = result.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with CategoricalIndex
|
||||
|
||||
def test_getitem_scalar_categorical_index(self):
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
ser = Series([1, 2], index=cats)
|
||||
|
||||
expected = ser.iloc[0]
|
||||
result = ser[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_getitem_numeric_categorical_listlike_matches_scalar(self):
|
||||
# GH#15470
|
||||
ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0]))
|
||||
|
||||
# 0 is treated as a label
|
||||
assert ser[0] == "c"
|
||||
|
||||
# the listlike analogue should also be treated as labels
|
||||
res = ser[[0]]
|
||||
expected = ser.iloc[-1:]
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
res2 = ser[[0, 1, 2]]
|
||||
tm.assert_series_equal(res2, ser.iloc[::-1])
|
||||
|
||||
def test_getitem_integer_categorical_not_positional(self):
|
||||
# GH#14865
|
||||
ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category"))
|
||||
assert ser.get(3) == "c"
|
||||
assert ser[3] == "c"
|
||||
|
||||
def test_getitem_str_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
key = "6 days, 23:11:12"
|
||||
indexer = rng.get_loc(key)
|
||||
assert indexer == 133
|
||||
|
||||
result = ser[key]
|
||||
assert result == ser.iloc[133]
|
||||
|
||||
msg = r"^Timedelta\('50 days 00:00:00'\)$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
rng.get_loc("50 days")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser["50 days"]
|
||||
|
||||
def test_getitem_bool_index_positional(self):
|
||||
# GH#48653
|
||||
ser = Series({True: 1, False: 0})
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser[0]
|
||||
assert result == 1
|
||||
|
||||
|
||||
class TestSeriesGetitemSlices:
|
||||
def test_getitem_partial_str_slice_with_datetimeindex(self):
|
||||
# GH#34860
|
||||
arr = date_range("1/1/2008", "1/1/2009")
|
||||
ser = arr.to_series()
|
||||
result = ser["2008"]
|
||||
|
||||
rng = date_range(start="2008-01-01", end="2008-12-31")
|
||||
expected = Series(rng, index=rng)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_strings_with_datetimeindex(self):
|
||||
idx = DatetimeIndex(
|
||||
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
|
||||
)
|
||||
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||||
|
||||
result = ts["1/2/2000":]
|
||||
expected = ts[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts["1/2/2000":"1/3/2000"]
|
||||
expected = ts[1:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["5 day":"6 day"]
|
||||
expected = ser.iloc[86:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["5 day":]
|
||||
expected = ser.iloc[86:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[:"6 day"]
|
||||
expected = ser.iloc[:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
|
||||
# higher reso
|
||||
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["1 day 10:11:12":]
|
||||
expected = ser.iloc[0:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 day 10:11:12.001":]
|
||||
expected = ser.iloc[1000:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 days, 10:11:12.001001"]
|
||||
assert result == ser.iloc[1001]
|
||||
|
||||
def test_getitem_slice_2d(self, datetime_series):
|
||||
# GH#30588 multi-dimensional indexing deprecated
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
datetime_series[:, np.newaxis]
|
||||
|
||||
def test_getitem_median_slice_bug(self):
|
||||
index = date_range("20090415", "20090519", freq="2B")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
msg = "Indexing with a single-item list"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#31299
|
||||
ser[indexer]
|
||||
# but we're OK with a single-element tuple
|
||||
result = ser[(indexer[0],)]
|
||||
expected = ser[indexer[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"slc, positions",
|
||||
[
|
||||
[slice(date(2018, 1, 1), None), [0, 1, 2]],
|
||||
[slice(date(2019, 1, 2), None), [2]],
|
||||
[slice(date(2020, 1, 1), None), []],
|
||||
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
|
||||
[slice(None, date(2019, 1, 1)), [0]],
|
||||
],
|
||||
)
|
||||
def test_getitem_slice_date(self, slc, positions):
|
||||
# https://github.com/pandas-dev/pandas/issues/31501
|
||||
ser = Series(
|
||||
[0, 1, 2],
|
||||
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
|
||||
)
|
||||
result = ser[slc]
|
||||
expected = ser.take(positions)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_float_raises(self, datetime_series):
|
||||
msg = (
|
||||
"cannot do slice indexing on DatetimeIndex with these indexers "
|
||||
r"\[{key}\] of type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
datetime_series[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
datetime_series[4.5:10.0]
|
||||
|
||||
def test_getitem_slice_bug(self):
|
||||
ser = Series(range(10), index=list(range(10)))
|
||||
result = ser[-12:]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
result = ser[-7:]
|
||||
tm.assert_series_equal(result, ser[3:])
|
||||
|
||||
result = ser[:-12]
|
||||
tm.assert_series_equal(result, ser[:0])
|
||||
|
||||
def test_getitem_slice_integers(self):
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(8),
|
||||
index=[2, 4, 6, 8, 10, 12, 14, 16],
|
||||
)
|
||||
|
||||
result = ser[:4]
|
||||
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesGetitemListLike:
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index, Series])
|
||||
def test_getitem_no_matches(self, box):
|
||||
# GH#33462 we expect the same behavior for list/ndarray/Index/Series
|
||||
ser = Series(["A", "B"])
|
||||
|
||||
key = Series(["C"], dtype=object)
|
||||
key = box(key)
|
||||
|
||||
msg = (
|
||||
r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_intlist_intindex_periodvalues(self):
|
||||
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
|
||||
result = ser[[2, 4]]
|
||||
exp = Series(
|
||||
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
|
||||
index=[2, 4],
|
||||
dtype="Period[D]",
|
||||
)
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == "Period[D]"
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
def test_getitem_intlist_intervalindex_non_int(self, box):
|
||||
# GH#33404 fall back to positional since ints are unambiguous
|
||||
dti = date_range("2000-01-03", periods=3)._with_freq(None)
|
||||
ii = pd.IntervalIndex.from_breaks(dti)
|
||||
ser = Series(range(len(ii)), index=ii)
|
||||
|
||||
expected = ser.iloc[:1]
|
||||
key = box([0])
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser[key]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
|
||||
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
|
||||
# GH#33404 do _not_ fall back to positional since ints are ambiguous
|
||||
idx = Index(range(4)).astype(dtype)
|
||||
dti = date_range("2000-01-03", periods=3)
|
||||
mi = pd.MultiIndex.from_product([idx, dti])
|
||||
ser = Series(range(len(mi))[::-1], index=mi)
|
||||
|
||||
key = box([5])
|
||||
with pytest.raises(KeyError, match="5"):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype):
|
||||
# GH #37218
|
||||
ser = Series([1, 2, 3])
|
||||
key = np.array([4], dtype=any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser[key]
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser.loc[key]
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_boolean(self, string_series):
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
|
||||
# passing list is OK
|
||||
result = ser[list(mask)]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, ser.index[mask])
|
||||
|
||||
def test_getitem_boolean_empty(self):
|
||||
ser = Series([], dtype=np.int64)
|
||||
ser.index.name = "index_name"
|
||||
ser = ser[ser.isna()]
|
||||
assert ser.index.name == "index_name"
|
||||
assert ser.dtype == np.int64
|
||||
|
||||
# GH#5877
|
||||
# indexing with empty series
|
||||
ser = Series(["A", "B"], dtype=object)
|
||||
expected = Series(dtype=object, index=Index([], dtype="int64"))
|
||||
result = ser[Series([], dtype=object)]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([True], dtype=bool)]
|
||||
|
||||
def test_getitem_boolean_object(self, string_series):
|
||||
# using column from DataFrame
|
||||
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = ser[omask]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = ser.copy()
|
||||
cop = ser.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
tm.assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "Cannot mask with non-boolean array containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask] = 5
|
||||
|
||||
def test_getitem_boolean_dt64_copies(self):
|
||||
# GH#36210
|
||||
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
|
||||
key = np.array([True, True, False, False])
|
||||
|
||||
ser = Series(dti._data)
|
||||
|
||||
res = ser[key]
|
||||
assert res._values._ndarray.base is None
|
||||
|
||||
# compare with numeric case for reference
|
||||
ser2 = Series(range(4))
|
||||
res2 = ser2[key]
|
||||
assert res2._values.base is None
|
||||
|
||||
def test_getitem_boolean_corner(self, datetime_series):
|
||||
ts = datetime_series
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
|
||||
def test_getitem_boolean_different_order(self, string_series):
|
||||
ordered = string_series.sort_values()
|
||||
|
||||
sel = string_series[ordered > 0]
|
||||
exp = string_series[string_series > 0]
|
||||
tm.assert_series_equal(sel, exp)
|
||||
|
||||
def test_getitem_boolean_contiguous_preserve_freq(self):
|
||||
rng = date_range("1/1/2000", "3/1/2000", freq="B")
|
||||
|
||||
mask = np.zeros(len(rng), dtype=bool)
|
||||
mask[10:20] = True
|
||||
|
||||
masked = rng[mask]
|
||||
expected = rng[10:20]
|
||||
assert expected.freq == rng.freq
|
||||
tm.assert_index_equal(masked, expected)
|
||||
|
||||
mask[22] = True
|
||||
masked = rng[mask]
|
||||
assert masked.freq is None
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self):
|
||||
# GH#12533
|
||||
ser = Series(4, index=list("ABCD"))
|
||||
result = ser[lambda x: "A"]
|
||||
assert result == ser.loc["A"]
|
||||
|
||||
result = ser[lambda x: ["A", "B"]]
|
||||
expected = ser.loc[["A", "B"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[lambda x: [True, False, True, True]]
|
||||
expected = ser.iloc[[0, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_generator(string_series):
|
||||
gen = (x > 0 for x in string_series)
|
||||
result = string_series[gen]
|
||||
result2 = string_series[iter(string_series > 0)]
|
||||
expected = string_series[string_series > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([0, 1]),
|
||||
Series(date_range("2012-01-01", periods=2)),
|
||||
Series(date_range("2012-01-01", periods=2, tz="CET")),
|
||||
],
|
||||
)
|
||||
def test_getitem_ndim_deprecated(series):
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
series[:, None]
|
||||
|
||||
|
||||
def test_getitem_multilevel_scalar_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
|
||||
msg = r"\(2000, slice\(3, 4, None\)\)"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[2000, 3:4]
|
||||
|
||||
|
||||
def test_getitem_dataframe_raises():
|
||||
rng = list(range(10))
|
||||
ser = Series(10, index=rng)
|
||||
df = DataFrame(rng, index=rng)
|
||||
msg = (
|
||||
"Indexing a Series with DataFrame is not supported, "
|
||||
"use the appropriate DataFrame column"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[df > 5]
|
||||
|
||||
|
||||
def test_getitem_assignment_series_alignment():
|
||||
# https://github.com/pandas-dev/pandas/issues/37427
|
||||
# with getitem, when assigning with a Series, it is not first aligned
|
||||
ser = Series(range(10))
|
||||
idx = np.array([2, 4, 9])
|
||||
ser[idx] = Series([10, 11, 12])
|
||||
expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
|
||||
# GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
|
||||
ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser[None]
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index.get_loc(None)
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index._engine.get_loc(None)
|
||||
|
||||
|
||||
def test_getitem_1tuple_slice_without_multiindex():
|
||||
ser = Series(range(5))
|
||||
key = (slice(3),)
|
||||
|
||||
result = ser[key]
|
||||
expected = ser[key[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_preserve_name(datetime_series):
|
||||
result = datetime_series[datetime_series > 0]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = datetime_series[[0, 2, 4]]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
result = datetime_series[5:10]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
|
||||
def test_getitem_with_integer_labels():
|
||||
# integer indexes, be careful
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
|
||||
)
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[inds]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[arr_inds]
|
||||
|
||||
|
||||
def test_getitem_missing(datetime_series):
|
||||
# missing
|
||||
d = datetime_series.index[0] - BDay()
|
||||
msg = r"Timestamp\('1999-12-31 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[d]
|
||||
|
||||
|
||||
def test_getitem_fancy(string_series, object_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
slice1 = string_series[[1, 2, 3]]
|
||||
slice2 = object_series[[1, 2, 3]]
|
||||
assert string_series.index[2] == slice1.index[1]
|
||||
assert object_series.index[2] == slice2.index[1]
|
||||
assert string_series.iloc[2] == slice1.iloc[1]
|
||||
assert object_series.iloc[2] == slice2.iloc[1]
|
||||
|
||||
|
||||
def test_getitem_box_float64(datetime_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
value = datetime_series[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
||||
assert is_scalar(obj["c"])
|
||||
assert obj["c"] == 0
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
||||
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
||||
result = ser["C"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_categorical_str():
|
||||
# GH#31765
|
||||
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
|
||||
result = ser["a"]
|
||||
expected = ser.iloc[[0, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
ser = Series(1, index=["a", "a", "b", "b", "c"])
|
||||
ser[::-1] # it works!
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
|
||||
def test_duplicated_index_getitem_positional_indexer(index_vals):
|
||||
# GH 11747
|
||||
s = Series(range(5), index=list(index_vals))
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s[3]
|
||||
assert result == 3
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_disallowed(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser[key] = 1
|
@ -0,0 +1,518 @@
|
||||
""" test get/set & misc """
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import IndexingError
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Index,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
isna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"]
|
||||
)
|
||||
|
||||
warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated"
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
|
||||
# GH51053
|
||||
dtype = any_numeric_dtype
|
||||
idx = Index([1, 0, 1], dtype=dtype)
|
||||
ser = Series(range(3), index=idx)
|
||||
result = ser[1]
|
||||
expected = Series([0, 2], index=Index([1, 1], dtype=dtype))
|
||||
tm.assert_series_equal(result, expected, check_exact=True)
|
||||
|
||||
|
||||
def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
|
||||
# GH51053
|
||||
dtype = any_numeric_dtype
|
||||
idx = Index([1, 0, 1], dtype=dtype)
|
||||
ser = Series(range(3), index=idx)
|
||||
ser[1] = 10
|
||||
expected = Series([10, 1, 10], index=idx)
|
||||
tm.assert_series_equal(ser, expected, check_exact=True)
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(datetime_series):
|
||||
indices = datetime_series.index[[5, 10, 15]]
|
||||
|
||||
result = datetime_series[indices]
|
||||
expected = datetime_series.reindex(indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = datetime_series[indices[0] : indices[2]]
|
||||
expected = datetime_series.loc[indices[0] : indices[2]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_basic_getitem_dt64tz_values():
|
||||
# GH12089
|
||||
# with tz for values
|
||||
ser = Series(
|
||||
date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
||||
result = ser.loc["a"]
|
||||
assert result == expected
|
||||
result = ser.iloc[0]
|
||||
assert result == expected
|
||||
result = ser["a"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis(using_copy_on_write, warn_copy_on_write):
|
||||
s = Series(np.random.default_rng(2).standard_normal(10))
|
||||
|
||||
result = s[...]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[...] = 5
|
||||
if not using_copy_on_write:
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result_1, duplicate_item, expected_1",
|
||||
[
|
||||
[
|
||||
Series({1: 12, 2: [1, 2, 2, 3]}),
|
||||
Series({1: 313}),
|
||||
Series({1: 12}, dtype=object),
|
||||
],
|
||||
[
|
||||
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1._append(duplicate_item)
|
||||
expected = expected_1._append(duplicate_item)
|
||||
tm.assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ["a", "b", "c"])
|
||||
|
||||
assert s.iloc[0] == s["a"]
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s["a"], 5)
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[1], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[3], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
ser = Series(rng, index=rng)
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[rng[1]], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[rng[3]], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
|
||||
def test_series_box_timedelta():
|
||||
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timedelta)
|
||||
assert isinstance(ser.at[1], Timedelta)
|
||||
assert isinstance(ser.iat[2], Timedelta)
|
||||
assert isinstance(ser.loc[3], Timedelta)
|
||||
assert isinstance(ser.iloc[4], Timedelta)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror(indexer_sl):
|
||||
ser = Series(range(10), index=list(range(0, 20, 2)))
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
indexer_sl(ser)[1]
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing(indexer_sl):
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
||||
with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
|
||||
indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror(indexer_sl):
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[1] = 5
|
||||
expected = concat([s, Series([5], index=[1])])
|
||||
tm.assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_setitem(datetime_series):
|
||||
datetime_series[datetime_series.index[5]] = np.nan
|
||||
datetime_series.iloc[[1, 2, 17]] = np.nan
|
||||
datetime_series.iloc[6] = np.nan
|
||||
assert np.isnan(datetime_series.iloc[6])
|
||||
assert np.isnan(datetime_series.iloc[2])
|
||||
datetime_series[np.isnan(datetime_series)] = 5
|
||||
assert not np.isnan(datetime_series.iloc[2])
|
||||
|
||||
|
||||
def test_setslice(datetime_series):
|
||||
sl = datetime_series[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
def test_basic_getitem_setitem_corner(datetime_series):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "key of type tuple not found and not a MultiIndex"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] raises but not two slices
|
||||
msg = "Indexing with a single-item list"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#31299
|
||||
datetime_series[[slice(None, 5)]]
|
||||
|
||||
# but we're OK with a single-element tuple
|
||||
result = datetime_series[(slice(None, 5),)]
|
||||
expected = datetime_series[:5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, [None, None]]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, [None, None]]] = 2
|
||||
|
||||
|
||||
def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_write):
|
||||
original = string_series.copy()
|
||||
numSlice = string_series[10:20]
|
||||
numSliceEnd = string_series[-10:]
|
||||
objSlice = object_series[10:20]
|
||||
|
||||
assert string_series.index[9] not in numSlice.index
|
||||
assert object_series.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == string_series.index[11]
|
||||
tm.assert_numpy_array_equal(np.array(numSliceEnd), np.array(string_series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = string_series[10:20]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
sl[:] = 0
|
||||
|
||||
if using_copy_on_write:
|
||||
# Doesn't modify parent (CoW)
|
||||
tm.assert_series_equal(string_series, original)
|
||||
else:
|
||||
assert (string_series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([], dtype=object)
|
||||
s.loc["B"] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
||||
|
||||
s = s.reindex(s.index.insert(0, "A"))
|
||||
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
||||
|
||||
s.loc["A"] = timedelta(1)
|
||||
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion(using_copy_on_write):
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
||||
return_value = df.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df["val"] = 0
|
||||
df_original = df.copy()
|
||||
df
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["val"].update(s)
|
||||
expected = df_original
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["val"].update(s)
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
|
||||
)
|
||||
return_value = expected.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(datetime_series):
|
||||
seq = datetime_series.iloc[[5, 10, 15]]
|
||||
seq.iloc[1] = np.nan
|
||||
assert not np.isnan(datetime_series.iloc[10])
|
||||
|
||||
|
||||
def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
|
||||
index = lexsorted_two_level_string_multiindex
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(len(index)), index=index, name="sth"
|
||||
)
|
||||
|
||||
result = indexer_sl(ser)["foo"]
|
||||
assert result.name == ser.name
|
||||
|
||||
|
||||
# miscellaneous methods
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
date_range("2014-01-01", periods=20, freq="MS"),
|
||||
period_range("2014-01", periods=20, freq="M"),
|
||||
timedelta_range("0", periods=20, freq="h"),
|
||||
],
|
||||
)
|
||||
def test_slice_with_negative_step(index):
|
||||
keystr1 = str(index[9])
|
||||
keystr2 = str(index[13])
|
||||
|
||||
ser = Series(np.arange(20), index)
|
||||
SLC = IndexSlice
|
||||
|
||||
for key in [keystr1, index[9]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
|
||||
|
||||
for key2 in [keystr2, index[13]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
|
||||
|
||||
|
||||
def test_tuple_index():
|
||||
# GH 35534 - Selecting values when a Series has an Index of tuples
|
||||
s = Series([1, 2], index=[("a",), ("b",)])
|
||||
assert s[("a",)] == 1
|
||||
assert s[("b",)] == 2
|
||||
s[("b",)] = 3
|
||||
assert s[("b",)] == 3
|
||||
|
||||
|
||||
def test_frozenset_index():
|
||||
# GH35747 - Selecting values when a Series has an Index of frozenset
|
||||
idx0, idx1 = frozenset("a"), frozenset("b")
|
||||
s = Series([1, 2], index=[idx0, idx1])
|
||||
assert s[idx0] == 1
|
||||
assert s[idx1] == 2
|
||||
s[idx1] = 3
|
||||
assert s[idx1] == 3
|
||||
|
||||
|
||||
def test_loc_setitem_all_false_indexer():
|
||||
# GH#45778
|
||||
ser = Series([1, 2], index=["a", "b"])
|
||||
expected = ser.copy()
|
||||
rhs = Series([6, 7], index=["a", "b"])
|
||||
ser.loc[ser > 100] = rhs
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_loc_boolean_indexer_non_matching_index():
|
||||
# GH#46551
|
||||
ser = Series([1])
|
||||
result = ser.loc[Series([NA, False], dtype="boolean")]
|
||||
expected = Series([], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_boolean_indexer_miss_matching_index():
|
||||
# GH#46551
|
||||
ser = Series([1])
|
||||
indexer = Series([NA, False], dtype="boolean", index=[1, 2])
|
||||
with pytest.raises(IndexingError, match="Unalignable"):
|
||||
ser.loc[indexer]
|
||||
|
||||
|
||||
def test_loc_setitem_nested_data_enlargement():
|
||||
# GH#48614
|
||||
df = DataFrame({"a": [1]})
|
||||
ser = Series({"label": df})
|
||||
ser.loc["new_label"] = df
|
||||
expected = Series({"label": df, "new_label": df})
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_loc_ea_numeric_index_oob_slice_end():
|
||||
# GH#50161
|
||||
ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
|
||||
result = ser.loc[2:3]
|
||||
expected = Series(1, index=Index([2], dtype="Int64"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_bool_int_key():
|
||||
# GH#48653
|
||||
ser = Series({True: 1, False: 0})
|
||||
with pytest.raises(KeyError, match="0"):
|
||||
ser.loc[0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [{}, {"b": "x"}])
|
||||
@pytest.mark.parametrize("indexer", [[], [False, False], slice(0, -1), np.array([])])
|
||||
def test_setitem_empty_indexer(indexer, val):
|
||||
# GH#45981
|
||||
df = DataFrame({"a": [1, 2], **val})
|
||||
expected = df.copy()
|
||||
df.loc[indexer] = 1.5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_disallowed(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key] = 1
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_setitem_dict_and_set_disallowed_multiindex(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key] = 1
|
||||
|
||||
|
||||
class TestSetitemValidation:
|
||||
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
|
||||
# but checks for warnings instead of errors.
|
||||
def _check_setitem_invalid(self, ser, invalid, indexer, warn):
|
||||
msg = "Setting an item of incompatible dtype is deprecated"
|
||||
msg = re.escape(msg)
|
||||
|
||||
orig_ser = ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.iloc[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.loc[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser[:] = invalid
|
||||
|
||||
_invalid_scalars = [
|
||||
1 + 2j,
|
||||
"True",
|
||||
"1",
|
||||
"1.0",
|
||||
NaT,
|
||||
np.datetime64("NaT"),
|
||||
np.timedelta64("NaT"),
|
||||
]
|
||||
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
|
||||
)
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_bool(self, invalid, indexer):
|
||||
ser = Series([True, False, False], dtype="bool")
|
||||
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
|
||||
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
|
||||
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
|
||||
warn = None
|
||||
else:
|
||||
warn = FutureWarning
|
||||
self._check_setitem_invalid(ser, invalid, indexer, warn)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
|
||||
ser = Series([1, 2, None], dtype=float_numpy_dtype)
|
||||
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
|
@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
|
||||
def test_mask_casts():
|
||||
# dtype changes
|
||||
ser = Series([1, 2, 3, 4])
|
||||
result = ser.mask(ser > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_casts2():
|
||||
# see gh-21891
|
||||
ser = Series([1, 2])
|
||||
res = ser.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[~cond])
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.mask(cond, -s))
|
@ -0,0 +1,45 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# GH#1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
s = Series(dtype=object)
|
||||
s._set_value(dates[0], 1.0)
|
||||
s._set_value(dates[1], np.nan)
|
||||
|
||||
expected = Series([1.0, np.nan], index=index)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_set_value_dt64(datetime_series):
|
||||
idx = datetime_series.index[10]
|
||||
res = datetime_series._set_value(idx, 0)
|
||||
assert res is None
|
||||
assert datetime_series[idx] == 0
|
||||
|
||||
|
||||
def test_set_value_str_index(string_series):
|
||||
# equiv
|
||||
ser = string_series.copy()
|
||||
res = ser._set_value("foobar", 0)
|
||||
assert res is None
|
||||
assert ser.index[-1] == "foobar"
|
||||
assert ser["foobar"] == 0
|
||||
|
||||
ser2 = string_series.copy()
|
||||
ser2.loc["foobar"] = 0
|
||||
assert ser2.index[-1] == "foobar"
|
||||
assert ser2["foobar"] == 0
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take_validate_axis():
|
||||
# GH#51022
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
msg = "No axis named foo for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.take([1, 2], axis="foo")
|
||||
|
||||
|
||||
def test_take():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = ser.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = ser.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "indices are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser.take([2, 5])
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
ser = Series(pd.Categorical(["a", "b", "c"]))
|
||||
result = ser.take([-2, -2, 0])
|
||||
expected = Series(
|
||||
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_take_slice_raises():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
msg = "Series.take requires a sequence of integers, not slice"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.take(slice(0, 3, 1))
|
@ -0,0 +1,481 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_where_unsafe_int(any_signed_int_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(
|
||||
list(range(2, 7)) + list(range(5, 10)),
|
||||
dtype=any_signed_int_numpy_dtype,
|
||||
)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=float_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
data = list(range(2, 7)) + list(range(5, 10))
|
||||
expected = Series(data, dtype=float_numpy_dtype)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected_dtype",
|
||||
[
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64),
|
||||
],
|
||||
)
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
||||
warn = (
|
||||
None
|
||||
if np.dtype(dtype).kind == np.dtype(expected_dtype).kind == "f"
|
||||
else FutureWarning
|
||||
)
|
||||
with tm.assert_produces_warning(warn, match="incompatible dtype"):
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(list(range(6)) + values, dtype="float64")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s < 5
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
tm.assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert s.shape == rs.shape
|
||||
assert rs is not s
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = (
|
||||
lambda x: f"cannot set using a {x} indexer with a "
|
||||
"different length than the value"
|
||||
)
|
||||
# slice
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
tm.assert_series_equal(s.astype(np.int64), expected)
|
||||
|
||||
# slice with step
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list("abcdef"))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, "b", 1, "d", "e", "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(["a", "b", "c", 0, 1, "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list("abc"))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), "b", "c"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", range(2, 6))
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False, False, False], [True, False], [False]]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
|
||||
)
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize(
|
||||
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
||||
)
|
||||
def test_broadcast(size, mask, item, box):
|
||||
# GH#8801, GH#4195
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series(
|
||||
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
||||
)
|
||||
|
||||
s = Series(data)
|
||||
|
||||
s[selection] = item
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[cond])
|
||||
tm.assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = Series([1, 2, 3])
|
||||
w = s.where(s > 1, "X")
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, ["X", "Y", "Z"])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"])
|
||||
def test_where_datetimelike_coerce(dtype):
|
||||
ser = Series([1, 2], dtype=dtype)
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, [10, 10])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, 10)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, 10.0)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, [10.0, 10.0])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, np.nan], dtype="object")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetimetz():
|
||||
# GH 15701
|
||||
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
||||
ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]")
|
||||
rs = ser.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_sparse():
|
||||
# GH#17198 make sure we dont get an AttributeError for sp_index
|
||||
ser = Series(pd.arrays.SparseArray([1, 2]))
|
||||
result = ser.where(ser >= 2, 0)
|
||||
expected = Series(pd.arrays.SparseArray([0, 2]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
|
||||
# https://github.com/pandas-dev/pandas/issues/34592
|
||||
ser = Series([], dtype=float)
|
||||
result = ser.where([])
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_where_categorical(frame_or_series):
|
||||
# https://github.com/pandas-dev/pandas/issues/18888
|
||||
exp = frame_or_series(
|
||||
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
|
||||
dtype="category",
|
||||
)
|
||||
df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category")
|
||||
res = df.where(df != "C")
|
||||
tm.assert_equal(exp, res)
|
||||
|
||||
|
||||
def test_where_datetimelike_categorical(tz_naive_fixture):
|
||||
# GH#37682
|
||||
tz = tz_naive_fixture
|
||||
|
||||
dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
|
||||
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
|
||||
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
|
||||
|
||||
mask = np.array([True, True, False])
|
||||
|
||||
# DatetimeIndex.where
|
||||
res = lvals.where(mask, rvals)
|
||||
tm.assert_index_equal(res, dr)
|
||||
|
||||
# DatetimeArray.where
|
||||
res = lvals._data._where(mask, rvals)
|
||||
tm.assert_datetime_array_equal(res, dr._data)
|
||||
|
||||
# Series.where
|
||||
res = Series(lvals).where(mask, rvals)
|
||||
tm.assert_series_equal(res, Series(dr))
|
||||
|
||||
# DataFrame.where
|
||||
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
|
||||
|
||||
tm.assert_frame_equal(res, pd.DataFrame(dr))
|
@ -0,0 +1,82 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_xs_datetimelike_wrapping():
|
||||
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
|
||||
arr = date_range("2016-01-01", periods=3)._data._ndarray
|
||||
|
||||
ser = Series(arr, dtype=object)
|
||||
for i in range(len(ser)):
|
||||
ser.iloc[i] = arr[i]
|
||||
assert ser.dtype == object
|
||||
assert isinstance(ser[0], np.datetime64)
|
||||
|
||||
result = ser.xs(0)
|
||||
assert isinstance(result, np.datetime64)
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_level_series(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
ser = df["A"]
|
||||
expected = ser[:, "two"]
|
||||
result = df.xs("two", level=1)["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label(self):
|
||||
# GH#5684
|
||||
idx = MultiIndex.from_tuples(
|
||||
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
|
||||
)
|
||||
ser = Series([1, 2, 3, 4], index=idx)
|
||||
return_value = ser.index.set_names(["L1", "L2"], inplace=True)
|
||||
assert return_value is None
|
||||
expected = Series([1, 3], index=["a", "b"])
|
||||
return_value = expected.index.set_names(["L1"], inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
result = ser.xs("one", level="L2")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs(self):
|
||||
# GH#6258
|
||||
dt = list(date_range("20130903", periods=3))
|
||||
idx = MultiIndex.from_product([list("AB"), dt])
|
||||
ser = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list("AB"))
|
||||
|
||||
result = ser.xs("20130903", level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_xs_droplevel_false(self):
|
||||
# GH: 19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
ser = Series([1, 1, 1], index=mi)
|
||||
result = ser.xs("a", axis=0, drop_level=False)
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_key_as_list(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"])
|
||||
ser = Series([1], index=mi)
|
||||
with pytest.raises(TypeError, match="list keys are not supported"):
|
||||
ser.xs(["a", "x"], axis=0, drop_level=False)
|
||||
|
||||
with pytest.raises(TypeError, match="list keys are not supported"):
|
||||
ser.xs(["a"], axis=0, drop_level=False)
|
@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) Series methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user