This commit is contained in:
2024-12-04 13:35:57 +05:00
parent d346bf4b2a
commit 73ce681a55
7059 changed files with 1196501 additions and 0 deletions

View File

@ -0,0 +1,258 @@
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Index,
Series,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays.categorical import CategoricalAccessor
from pandas.core.indexes.accessors import Properties
class TestCatAccessor:
@pytest.mark.parametrize(
"method",
[
lambda x: x.cat.set_categories([1, 2, 3]),
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
lambda x: x.cat.rename_categories([1, 2, 3]),
lambda x: x.cat.remove_unused_categories(),
lambda x: x.cat.remove_categories([2]),
lambda x: x.cat.add_categories([4]),
lambda x: x.cat.as_ordered(),
lambda x: x.cat.as_unordered(),
],
)
def test_getname_categorical_accessor(self, method):
# GH#17509
ser = Series([1, 2, 3], name="A").astype("category")
expected = "A"
result = method(ser).name
assert result == expected
def test_cat_accessor(self):
ser = Series(Categorical(["a", "b", np.nan, "a"]))
tm.assert_index_equal(ser.cat.categories, Index(["a", "b"]))
assert not ser.cat.ordered, False
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
res = ser.cat.set_categories(["b", "a"])
tm.assert_categorical_equal(res.values, exp)
ser[:] = "a"
ser = ser.cat.remove_unused_categories()
tm.assert_index_equal(ser.cat.categories, Index(["a"]))
def test_cat_accessor_api(self):
# GH#9322
assert Series.cat is CategoricalAccessor
ser = Series(list("aabbcde")).astype("category")
assert isinstance(ser.cat, CategoricalAccessor)
invalid = Series([1])
with pytest.raises(AttributeError, match="only use .cat accessor"):
invalid.cat
assert not hasattr(invalid, "cat")
def test_cat_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
cat = Series(list("aabbcde")).astype("category")
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
cat.cat.xlabel = "a"
def test_categorical_delegations(self):
# invalid accessor
msg = r"Can only use \.cat accessor with a 'category' dtype"
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat()
with pytest.raises(AttributeError, match=msg):
Series(["a", "b", "c"]).cat
with pytest.raises(AttributeError, match=msg):
Series(np.arange(5.0)).cat
with pytest.raises(AttributeError, match=msg):
Series([Timestamp("20130101")]).cat
# Series should delegate calls to '.categories', '.codes', '.ordered'
# and the methods '.set_categories()' 'drop_unused_categories()' to the
# categorical
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["a", "b", "c"])
tm.assert_index_equal(ser.cat.categories, exp_categories)
ser = ser.cat.rename_categories([1, 2, 3])
exp_categories = Index([1, 2, 3])
tm.assert_index_equal(ser.cat.categories, exp_categories)
exp_codes = Series([0, 1, 2, 0], dtype="int8")
tm.assert_series_equal(ser.cat.codes, exp_codes)
assert ser.cat.ordered
ser = ser.cat.as_unordered()
assert not ser.cat.ordered
ser = ser.cat.as_ordered()
assert ser.cat.ordered
# reorder
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["c", "b", "a"])
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
ser = ser.cat.set_categories(["c", "b", "a"])
tm.assert_index_equal(ser.cat.categories, exp_categories)
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
# remove unused categories
ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
exp_categories = Index(["a", "b"])
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
ser = ser.cat.remove_unused_categories()
tm.assert_index_equal(ser.cat.categories, exp_categories)
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
# This method is likely to be confused, so test that it raises an error
# on wrong inputs:
msg = "'Series' object has no attribute 'set_categories'"
with pytest.raises(AttributeError, match=msg):
ser.set_categories([4, 3, 2, 1])
# right: ser.cat.set_categories([4,3,2,1])
# GH#18862 (let Series.cat.rename_categories take callables)
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
result = ser.cat.rename_categories(lambda x: x.upper())
expected = Series(
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"idx",
[
date_range("1/1/2015", periods=5),
date_range("1/1/2015", periods=5, tz="MET"),
period_range("1/1/2015", freq="D", periods=5),
timedelta_range("1 days", "10 days"),
],
)
def test_dt_accessor_api_for_categorical(self, idx):
# https://github.com/pandas-dev/pandas/issues/10661
ser = Series(idx)
cat = ser.astype("category")
# only testing field (like .day)
# and bool (is_month_start)
attr_names = type(ser._values)._datetimelike_ops
assert isinstance(cat.dt, Properties)
special_func_defs = [
("strftime", ("%Y-%m-%d",), {}),
("round", ("D",), {}),
("floor", ("D",), {}),
("ceil", ("D",), {}),
("asfreq", ("D",), {}),
("as_unit", ("s"), {}),
]
if idx.dtype == "M8[ns]":
# exclude dt64tz since that is already localized and would raise
tup = ("tz_localize", ("UTC",), {})
special_func_defs.append(tup)
elif idx.dtype.kind == "M":
# exclude dt64 since that is not localized so would raise
tup = ("tz_convert", ("EST",), {})
special_func_defs.append(tup)
_special_func_names = [f[0] for f in special_func_defs]
_ignore_names = ["components", "tz_localize", "tz_convert"]
func_names = [
fname
for fname in dir(ser.dt)
if not (
fname.startswith("_")
or fname in attr_names
or fname in _special_func_names
or fname in _ignore_names
)
]
func_defs = [(fname, (), {}) for fname in func_names]
func_defs.extend(
f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
)
for func, args, kwargs in func_defs:
warn_cls = []
if func == "to_period" and getattr(idx, "tz", None) is not None:
# dropping TZ
warn_cls.append(UserWarning)
if func == "to_pydatetime":
# deprecated to return Index[object]
warn_cls.append(FutureWarning)
if warn_cls:
warn_cls = tuple(warn_cls)
else:
warn_cls = None
with tm.assert_produces_warning(warn_cls):
res = getattr(cat.dt, func)(*args, **kwargs)
exp = getattr(ser.dt, func)(*args, **kwargs)
tm.assert_equal(res, exp)
for attr in attr_names:
res = getattr(cat.dt, attr)
exp = getattr(ser.dt, attr)
tm.assert_equal(res, exp)
def test_dt_accessor_api_for_categorical_invalid(self):
invalid = Series([1, 2, 3]).astype("category")
msg = "Can only use .dt accessor with datetimelike"
with pytest.raises(AttributeError, match=msg):
invalid.dt
assert not hasattr(invalid, "str")
def test_set_categories_setitem(self):
# GH#43334
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"])
df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"])
# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
df["Survived"] = Categorical(
df["Survived"], categories=["No", "Yes"], ordered=False
)
# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
def test_categorical_of_booleans_is_boolean(self):
# https://github.com/pandas-dev/pandas/issues/46313
df = DataFrame(
{"int_cat": [1, 2, 3], "bool_cat": [True, False, False]}, dtype="category"
)
value = df["bool_cat"].cat.categories.dtype
expected = np.dtype(np.bool_)
assert value is expected

View File

@ -0,0 +1,843 @@
import calendar
from datetime import (
date,
datetime,
time,
)
import locale
import unicodedata
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.timezones import maybe_get_tz
from pandas.errors import SettingWithCopyError
from pandas.core.dtypes.common import (
is_integer_dtype,
is_list_like,
)
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Period,
PeriodIndex,
Series,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
ok_for_period = PeriodArray._datetimelike_ops
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
ok_for_dt = DatetimeArray._datetimelike_ops
ok_for_dt_methods = [
"to_period",
"to_pydatetime",
"tz_localize",
"tz_convert",
"normalize",
"strftime",
"round",
"floor",
"ceil",
"day_name",
"month_name",
"isocalendar",
"as_unit",
]
ok_for_td = TimedeltaArray._datetimelike_ops
ok_for_td_methods = [
"components",
"to_pytimedelta",
"total_seconds",
"round",
"floor",
"ceil",
"as_unit",
]
def get_dir(ser):
# check limited display api
results = [r for r in ser.dt.__dir__() if not r.startswith("_")]
return sorted(set(results))
class TestSeriesDatetimeValues:
def _compare(self, ser, name):
# GH 7207, 11128
# test .dt namespace accessor
def get_expected(ser, prop):
result = getattr(Index(ser._values), prop)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result) or isinstance(result, DataFrame):
return result
return Series(result, index=ser.index, name=ser.name)
left = getattr(ser.dt, name)
right = get_expected(ser, name)
if not (is_list_like(left) and is_list_like(right)):
assert left == right
elif isinstance(left, DataFrame):
tm.assert_frame_equal(left, right)
else:
tm.assert_series_equal(left, right)
@pytest.mark.parametrize("freq", ["D", "s", "ms"])
def test_dt_namespace_accessor_datetime64(self, freq):
# GH#7207, GH#11128
# test .dt namespace accessor
# datetimeindex
dti = date_range("20130101", periods=5, freq=freq)
ser = Series(dti, name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_dt_methods:
getattr(ser.dt, prop)
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.tz_localize("US/Eastern")
exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern")
expected = Series(exp_values, index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "US/Eastern"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
# let's localize, then convert
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
exp_values = (
DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern")
)
expected = Series(exp_values, index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_namespace_accessor_datetime64tz(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# datetimeindex with tz
dti = date_range("20130101", periods=5, tz="US/Eastern")
ser = Series(dti, name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_dt_methods:
getattr(ser.dt, prop)
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.tz_convert("CET")
expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "CET"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
def test_dt_namespace_accessor_timedelta(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# timedelta index
cases = [
Series(
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
),
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
Series(
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
name="xxx",
),
]
for ser in cases:
for prop in ok_for_td:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_td_methods:
getattr(ser.dt, prop)
result = ser.dt.components
assert isinstance(result, DataFrame)
tm.assert_index_equal(result.index, ser.index)
result = ser.dt.to_pytimedelta()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.total_seconds()
assert isinstance(result, Series)
assert result.dtype == "float64"
freq_result = ser.dt.freq
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq
def test_dt_namespace_accessor_period(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# periodindex
pi = period_range("20130101", periods=5, freq="D")
ser = Series(pi, name="xxx")
for prop in ok_for_period:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_period_methods:
getattr(ser.dt, prop)
freq_result = ser.dt.freq
assert freq_result == PeriodIndex(ser.values).freq
def test_dt_namespace_accessor_index_and_values(self):
# both
index = date_range("20130101", periods=3, freq="D")
dti = date_range("20140204", periods=3, freq="s")
ser = Series(dti, index=index, name="xxx")
exp = Series(
np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx"
)
tm.assert_series_equal(ser.dt.year, exp)
exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx")
tm.assert_series_equal(ser.dt.month, exp)
exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx")
tm.assert_series_equal(ser.dt.second, exp)
exp = Series([ser.iloc[0]] * 3, index=index, name="xxx")
tm.assert_series_equal(ser.dt.normalize(), exp)
def test_dt_accessor_limited_display_api(self):
# tznaive
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
results = get_dir(ser)
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
# tzaware
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
results = get_dir(ser)
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
# Period
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
ser = Series(idx)
results = get_dir(ser)
tm.assert_almost_equal(
results, sorted(set(ok_for_period + ok_for_period_methods))
)
def test_dt_accessor_ambiguous_freq_conversions(self):
# GH#11295
# ambiguous time error on the conversions
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
exp_values = date_range(
"2015-01-01", "2016-01-01", freq="min", tz="UTC"
).tz_convert("America/Chicago")
# freq not preserved by tz_localize above
exp_values = exp_values._with_freq(None)
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(ser, expected)
def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write):
# no setting allowed
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
with pytest.raises(ValueError, match="modifications"):
ser.dt.hour = 5
# trying to set a copy
msg = "modifications to a property of a datetimelike.+not supported"
with pd.option_context("chained_assignment", "raise"):
if using_copy_on_write:
with tm.raises_chained_assignment_error():
ser.dt.hour[0] = 5
elif warn_copy_on_write:
with tm.assert_produces_warning(
FutureWarning, match="ChainedAssignmentError"
):
ser.dt.hour[0] = 5
else:
with pytest.raises(SettingWithCopyError, match=msg):
ser.dt.hour[0] = 5
@pytest.mark.parametrize(
"method, dates",
[
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
],
)
def test_dt_round(self, method, dates):
# round
ser = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = getattr(ser.dt, method)("D")
expected = Series(pd.to_datetime(dates), name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_round_tz(self):
ser = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
exp_values = pd.to_datetime(
["2012-01-01", "2012-01-01", "2012-01-01"]
).tz_localize("US/Eastern")
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
def test_dt_round_tz_ambiguous(self, method):
# GH 18946 round near "fall back" DST
df1 = DataFrame(
[
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
],
columns=["date"],
)
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
# infer
result = getattr(df1.date.dt, method)("h", ambiguous="infer")
expected = df1["date"]
tm.assert_series_equal(result, expected)
# bool-array
result = getattr(df1.date.dt, method)("h", ambiguous=[True, False, False])
tm.assert_series_equal(result, expected)
# NaT
result = getattr(df1.date.dt, method)("h", ambiguous="NaT")
expected = df1["date"].copy()
expected.iloc[0:2] = pd.NaT
tm.assert_series_equal(result, expected)
# raise
with tm.external_error_raised(pytz.AmbiguousTimeError):
getattr(df1.date.dt, method)("h", ambiguous="raise")
@pytest.mark.parametrize(
"method, ts_str, freq",
[
["ceil", "2018-03-11 01:59:00-0600", "5min"],
["round", "2018-03-11 01:59:00-0600", "5min"],
["floor", "2018-03-11 03:01:00-0500", "2h"],
],
)
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
# GH 23324 round near "spring forward" DST
ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
result = getattr(ser.dt, method)(freq, nonexistent="shift_forward")
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
tm.assert_series_equal(result, expected)
result = getattr(ser.dt, method)(freq, nonexistent="NaT")
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
tm.assert_series_equal(result, expected)
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
getattr(ser.dt, method)(freq, nonexistent="raise")
@pytest.mark.parametrize("freq", ["ns", "us", "1000us"])
def test_dt_round_nonnano_higher_resolution_no_op(self, freq):
# GH 52761
ser = Series(
["2020-05-31 08:00:00", "2000-12-31 04:00:05", "1800-03-14 07:30:20"],
dtype="datetime64[ms]",
)
expected = ser.copy()
result = ser.dt.round(freq)
tm.assert_series_equal(result, expected)
assert not np.shares_memory(ser.array._ndarray, result.array._ndarray)
def test_dt_namespace_accessor_categorical(self):
# GH 19468
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
ser = Series(pd.Categorical(dti), name="foo")
result = ser.dt.year
expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo")
tm.assert_series_equal(result, expected)
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_localize(tz)
expected = datetimes.dt.tz_localize(tz)
tm.assert_series_equal(result, expected)
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_convert(tz)
expected = datetimes.dt.tz_convert(tz)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
def test_dt_other_accessors_categorical(self, accessor):
# GH 27952
datetimes = Series(
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = getattr(categorical.dt, accessor)
expected = getattr(datetimes.dt, accessor)
tm.assert_series_equal(result, expected)
def test_dt_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
ser = Series(date_range("20130101", periods=5, freq="D"))
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
ser.dt.xlabel = "a"
# error: Unsupported operand types for + ("List[None]" and "List[str]")
@pytest.mark.parametrize(
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
)
def test_dt_accessor_datetime_name_accessors(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert ser.dt.day_name(locale=time_locale)[day] == name
assert ser.dt.day_name(locale=None)[day] == eng_name
ser = pd.concat([ser, Series([pd.NaT])])
assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1])
ser = Series(date_range(freq="ME", start="2012", end="2013"))
result = ser.dt.month_name(locale=time_locale)
expected = Series([month.capitalize() for month in expected_months])
# work around https://github.com/pandas-dev/pandas/issues/22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_series_equal(result, expected)
for s_date, expected in zip(ser, expected_months):
result = s_date.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", expected)
assert result == expected
ser = pd.concat([ser, Series([pd.NaT])])
assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
def test_strftime(self):
# GH 10086
ser = Series(date_range("20130101", periods=5))
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/04 11-22-33",
"2015/02/05 11-22-33",
"2015/02/06 11-22-33",
"2015/02/07 11-22-33",
]
)
tm.assert_series_equal(result, expected)
ser = Series(period_range("20130101", periods=5))
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/03 11-22-34",
"2015/02/03 11-22-35",
"2015/02/03 11-22-36",
"2015/02/03 11-22-37",
]
)
tm.assert_series_equal(result, expected)
def test_strftime_dt64_days(self):
ser = Series(date_range("20130101", periods=5))
ser.iloc[0] = pd.NaT
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
[np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
datetime_index = date_range("20150301", periods=5)
result = datetime_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
def test_strftime_period_days(self, using_infer_string):
period_index = period_range("20150301", periods=5)
result = period_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype="=U10",
)
if using_infer_string:
expected = expected.astype("string[pyarrow_numpy]")
tm.assert_index_equal(result, expected)
def test_strftime_dt64_microsecond_resolution(self):
ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
tm.assert_series_equal(result, expected)
def test_strftime_period_hours(self):
ser = Series(period_range("20130101", periods=4, freq="h"))
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S")
expected = Series(
[
"2013/01/01 00:00:00",
"2013/01/01 01:00:00",
"2013/01/01 02:00:00",
"2013/01/01 03:00:00",
]
)
tm.assert_series_equal(result, expected)
def test_strftime_period_minutes(self):
ser = Series(period_range("20130101", periods=4, freq="ms"))
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
expected = Series(
[
"2013/01/01 00:00:00.000",
"2013/01/01 00:00:00.001",
"2013/01/01 00:00:00.002",
"2013/01/01 00:00:00.003",
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
DatetimeIndex(["2019-01-01", pd.NaT]),
PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
],
)
def test_strftime_nat(self, data):
# GH 29578
ser = Series(data)
result = ser.dt.strftime("%Y-%m-%d")
expected = Series(["2019-01-01", np.nan])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data", [DatetimeIndex([pd.NaT]), PeriodIndex([pd.NaT], dtype="period[D]")]
)
def test_strftime_all_nat(self, data):
# https://github.com/pandas-dev/pandas/issues/45858
ser = Series(data)
with tm.assert_produces_warning(None):
result = ser.dt.strftime("%Y-%m-%d")
expected = Series([np.nan], dtype=object)
tm.assert_series_equal(result, expected)
def test_valid_dt_with_missing_values(self):
# GH 8689
ser = Series(date_range("20130101", periods=5, freq="D"))
ser.iloc[2] = pd.NaT
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
expected = getattr(ser.dt, attr).copy()
expected.iloc[2] = np.nan
result = getattr(ser.dt, attr)
tm.assert_series_equal(result, expected)
result = ser.dt.date
expected = Series(
[
date(2013, 1, 1),
date(2013, 1, 2),
pd.NaT,
date(2013, 1, 4),
date(2013, 1, 5),
],
dtype="object",
)
tm.assert_series_equal(result, expected)
result = ser.dt.time
expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object")
tm.assert_series_equal(result, expected)
def test_dt_accessor_api(self):
# GH 9322
from pandas.core.indexes.accessors import (
CombinedDatetimelikeProperties,
DatetimeProperties,
)
assert Series.dt is CombinedDatetimelikeProperties
ser = Series(date_range("2000-01-01", periods=3))
assert isinstance(ser.dt, DatetimeProperties)
@pytest.mark.parametrize(
"ser",
[
Series(np.arange(5)),
Series(list("abcde")),
Series(np.random.default_rng(2).standard_normal(5)),
],
)
def test_dt_accessor_invalid(self, ser):
# GH#9322 check that series with incorrect dtypes don't have attr
with pytest.raises(AttributeError, match="only use .dt accessor"):
ser.dt
assert not hasattr(ser, "dt")
def test_dt_accessor_updates_on_inplace(self):
ser = Series(date_range("2018-01-01", periods=10))
ser[2] = None
return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True)
assert return_value is None
result = ser.dt.date
assert result[0] == result[2]
def test_date_tz(self):
# GH11757
rng = DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
tz="US/Eastern",
)
ser = Series(rng)
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
tm.assert_series_equal(ser.dt.date, expected)
tm.assert_series_equal(ser.apply(lambda x: x.date()), expected)
def test_dt_timetz_accessor(self, tz_naive_fixture):
# GH21358
tz = maybe_get_tz(tz_naive_fixture)
dtindex = DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
)
ser = Series(dtindex)
expected = Series(
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
)
result = ser.dt.timetz
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input_series, expected_output",
[
[["2020-01-01"], [[2020, 1, 3]]],
[[pd.NaT], [[np.nan, np.nan, np.nan]]],
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.nan, np.nan, np.nan]]],
# see GH#36032
[["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
[["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
],
)
def test_isocalendar(self, input_series, expected_output):
result = pd.to_datetime(Series(input_series)).dt.isocalendar()
expected_frame = DataFrame(
expected_output, columns=["year", "week", "day"], dtype="UInt32"
)
tm.assert_frame_equal(result, expected_frame)
def test_hour_index(self):
dt_series = Series(
date_range(start="2021-01-01", periods=5, freq="h"),
index=[2, 6, 7, 8, 11],
dtype="category",
)
result = dt_series.dt.hour
expected = Series(
[0, 1, 2, 3, 4],
dtype="int32",
index=[2, 6, 7, 8, 11],
)
tm.assert_series_equal(result, expected)
class TestSeriesPeriodValuesDtAccessor:
@pytest.mark.parametrize(
"input_vals",
[
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
[
Period("2016-01-01 00:00:00", freq="h"),
Period("2016-01-01 01:00:00", freq="h"),
],
[
Period("2016-01-01 00:00:00", freq="M"),
Period("2016-01-01 00:01:00", freq="M"),
],
[
Period("2016-01-01 00:00:00", freq="s"),
Period("2016-01-01 00:00:01", freq="s"),
],
],
)
def test_end_time_timevalues(self, input_vals):
# GH#17157
# Check that the time part of the Period is adjusted by end_time
# when using the dt accessor on a Series
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
ser = Series(input_vals)
result = ser.dt.end_time
expected = ser.apply(lambda x: x.end_time)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
def test_to_period(self, input_vals):
# GH#21205
expected = Series([input_vals], dtype="Period[D]")
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
tm.assert_series_equal(result, expected)
def test_normalize_pre_epoch_dates():
# GH: 36294
ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"]))
result = ser.dt.normalize()
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
tm.assert_series_equal(result, expected)
def test_day_attribute_non_nano_beyond_int32():
# GH 52386
data = np.array(
[
136457654736252,
134736784364431,
245345345545332,
223432411,
2343241,
3634548734,
23234,
],
dtype="timedelta64[s]",
)
ser = Series(data)
result = ser.dt.days
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,129 @@
import re
import pytest
from pandas import (
ArrowDtype,
Series,
)
import pandas._testing as tm
pa = pytest.importorskip("pyarrow")
from pandas.compat import pa_version_under11p0
@pytest.mark.parametrize(
"list_dtype",
(
pa.list_(pa.int64()),
pa.list_(pa.int64(), list_size=3),
pa.large_list(pa.int64()),
),
)
def test_list_getitem(list_dtype):
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(list_dtype),
)
actual = ser.list[1]
expected = Series([2, None, None], dtype="int64[pyarrow]")
tm.assert_series_equal(actual, expected)
def test_list_getitem_slice():
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
if pa_version_under11p0:
with pytest.raises(
NotImplementedError, match="List slice not supported by pyarrow "
):
ser.list[1:None:None]
else:
actual = ser.list[1:None:None]
expected = Series(
[[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
)
tm.assert_series_equal(actual, expected)
def test_list_len():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
actual = ser.list.len()
expected = Series([3, 2, None], dtype=ArrowDtype(pa.int32()))
tm.assert_series_equal(actual, expected)
def test_list_flatten():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
actual = ser.list.flatten()
expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
tm.assert_series_equal(actual, expected)
def test_list_getitem_slice_invalid():
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
if pa_version_under11p0:
with pytest.raises(
NotImplementedError, match="List slice not supported by pyarrow "
):
ser.list[1:None:0]
else:
with pytest.raises(pa.lib.ArrowInvalid, match=re.escape("`step` must be >= 1")):
ser.list[1:None:0]
def test_list_accessor_non_list_dtype():
ser = Series(
[1, 2, 4],
dtype=ArrowDtype(pa.int64()),
)
with pytest.raises(
AttributeError,
match=re.escape(
"Can only use the '.list' accessor with 'list[pyarrow]' dtype, "
"not int64[pyarrow]."
),
):
ser.list[1:None:0]
@pytest.mark.parametrize(
"list_dtype",
(
pa.list_(pa.int64()),
pa.list_(pa.int64(), list_size=3),
pa.large_list(pa.int64()),
),
)
def test_list_getitem_invalid_index(list_dtype):
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(list_dtype),
)
with pytest.raises(pa.lib.ArrowInvalid, match="Index -1 is out of bounds"):
ser.list[-1]
with pytest.raises(pa.lib.ArrowInvalid, match="Index 5 is out of bounds"):
ser.list[5]
with pytest.raises(ValueError, match="key must be an int or slice, got str"):
ser.list["abc"]
def test_list_accessor_not_iterable():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
with pytest.raises(TypeError, match="'ListAccessor' object is not iterable"):
iter(ser.list)

View File

@ -0,0 +1,9 @@
from pandas import Series
class TestSparseAccessor:
def test_sparse_accessor_updates_on_inplace(self):
ser = Series([1, 1, 2, 3], dtype="Sparse[int]")
return_value = ser.drop([0, 1], inplace=True)
assert return_value is None
assert ser.sparse.density == 1.0

View File

@ -0,0 +1,25 @@
import pytest
from pandas import Series
import pandas._testing as tm
class TestStrAccessor:
def test_str_attribute(self):
# GH#9068
methods = ["strip", "rstrip", "lstrip"]
ser = Series([" jack", "jill ", " jesse ", "frank"])
for method in methods:
expected = Series([getattr(str, method)(x) for x in ser.values])
tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected)
# str accessor only valid with string values
ser = Series(range(5))
with pytest.raises(AttributeError, match="only use .str accessor"):
ser.str.repeat(2)
def test_str_accessor_updates_on_inplace(self):
ser = Series(list("abc"))
return_value = ser.drop([0], inplace=True)
assert return_value is None
assert len(ser.str.lower()) == 2

View File

@ -0,0 +1,196 @@
import re
import pytest
from pandas.compat.pyarrow import (
pa_version_under11p0,
pa_version_under13p0,
)
from pandas import (
ArrowDtype,
DataFrame,
Index,
Series,
)
import pandas._testing as tm
pa = pytest.importorskip("pyarrow")
pc = pytest.importorskip("pyarrow.compute")
def test_struct_accessor_dtypes():
ser = Series(
[],
dtype=ArrowDtype(
pa.struct(
[
("int_col", pa.int64()),
("string_col", pa.string()),
(
"struct_col",
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
]
),
),
]
)
),
)
actual = ser.struct.dtypes
expected = Series(
[
ArrowDtype(pa.int64()),
ArrowDtype(pa.string()),
ArrowDtype(
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
]
)
),
],
index=Index(["int_col", "string_col", "struct_col"]),
)
tm.assert_series_equal(actual, expected)
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
def test_struct_accessor_field():
index = Index([-100, 42, 123])
ser = Series(
[
{"rice": 1.0, "maize": -1, "wheat": "a"},
{"rice": 2.0, "maize": 0, "wheat": "b"},
{"rice": 3.0, "maize": 1, "wheat": "c"},
],
dtype=ArrowDtype(
pa.struct(
[
("rice", pa.float64()),
("maize", pa.int64()),
("wheat", pa.string()),
]
)
),
index=index,
)
by_name = ser.struct.field("maize")
by_name_expected = Series(
[-1, 0, 1],
dtype=ArrowDtype(pa.int64()),
index=index,
name="maize",
)
tm.assert_series_equal(by_name, by_name_expected)
by_index = ser.struct.field(2)
by_index_expected = Series(
["a", "b", "c"],
dtype=ArrowDtype(pa.string()),
index=index,
name="wheat",
)
tm.assert_series_equal(by_index, by_index_expected)
def test_struct_accessor_field_with_invalid_name_or_index():
ser = Series([], dtype=ArrowDtype(pa.struct([("field", pa.int64())])))
with pytest.raises(ValueError, match="name_or_index must be an int, str,"):
ser.struct.field(1.1)
@pytest.mark.skipif(pa_version_under11p0, reason="pyarrow>=11.0.0 required")
def test_struct_accessor_explode():
index = Index([-100, 42, 123])
ser = Series(
[
{"painted": 1, "snapping": {"sea": "green"}},
{"painted": 2, "snapping": {"sea": "leatherback"}},
{"painted": 3, "snapping": {"sea": "hawksbill"}},
],
dtype=ArrowDtype(
pa.struct(
[
("painted", pa.int64()),
("snapping", pa.struct([("sea", pa.string())])),
]
)
),
index=index,
)
actual = ser.struct.explode()
expected = DataFrame(
{
"painted": Series([1, 2, 3], index=index, dtype=ArrowDtype(pa.int64())),
"snapping": Series(
[{"sea": "green"}, {"sea": "leatherback"}, {"sea": "hawksbill"}],
index=index,
dtype=ArrowDtype(pa.struct([("sea", pa.string())])),
),
},
)
tm.assert_frame_equal(actual, expected)
@pytest.mark.parametrize(
"invalid",
[
pytest.param(Series([1, 2, 3], dtype="int64"), id="int64"),
pytest.param(
Series(["a", "b", "c"], dtype="string[pyarrow]"), id="string-pyarrow"
),
],
)
def test_struct_accessor_api_for_invalid(invalid):
with pytest.raises(
AttributeError,
match=re.escape(
"Can only use the '.struct' accessor with 'struct[pyarrow]' dtype, "
f"not {invalid.dtype}."
),
):
invalid.struct
@pytest.mark.parametrize(
["indices", "name"],
[
(0, "int_col"),
([1, 2], "str_col"),
(pc.field("int_col"), "int_col"),
("int_col", "int_col"),
(b"string_col", b"string_col"),
([b"string_col"], "string_col"),
],
)
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
def test_struct_accessor_field_expanded(indices, name):
arrow_type = pa.struct(
[
("int_col", pa.int64()),
(
"struct_col",
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
("str_col", pa.string()),
]
),
),
(b"string_col", pa.string()),
]
)
data = pa.array([], type=arrow_type)
ser = Series(data, dtype=ArrowDtype(arrow_type))
expected = pc.struct_field(data, indices)
result = ser.struct.field(indices)
tm.assert_equal(result.array._pa_array.combine_chunks(), expected)
assert result.name == name

View File

@ -0,0 +1,499 @@
"""
Also test support for datetime64[ns] in Series / DataFrame
"""
from datetime import (
datetime,
timedelta,
)
import re
from dateutil.tz import (
gettz,
tzutc,
)
import numpy as np
import pytest
import pytz
from pandas._libs import index as libindex
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
def test_fancy_getitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert s[48] == 48
assert s["1/2/2009"] == 48
assert s["2009-1-2"] == 48
assert s[datetime(2009, 1, 2)] == 48
assert s[Timestamp(datetime(2009, 1, 2))] == 48
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
s["2009-1-3"]
tm.assert_series_equal(
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
)
def test_fancy_setitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
msg = "Series.__setitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
s[48] = -1
assert s.iloc[48] == -1
s["1/2/2009"] = -2
assert s.iloc[48] == -2
s["1/2/2009":"2009-06-05"] = -3
assert (s[48:54] == -3).all()
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
def test_getitem_setitem_datetime_tz(tz_source):
if tz_source == "pytz":
tzget = pytz.timezone
else:
# handle special case for utc in dateutil
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="h", tz=tzget("US/Eastern"))
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts.copy()
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
dt = dt.to_pydatetime()
result[dt] = 0
result[dt] = ts.iloc[4]
tm.assert_series_equal(result, ts)
def test_getitem_setitem_datetimeindex():
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="h", tz="US/Eastern")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
result = ts["1990-01-01 04:00:00"]
expected = ts.iloc[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
# GH#18435 strings get a pass from tzawareness compat
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
lb = "1990-01-01 04:00:00-0500"
rb = "1990-01-01 07:00:00-0500"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
# But we do not give datetimes a pass on tzawareness compat
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
naive = datetime(1990, 1, 1, 4)
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
with pytest.raises(KeyError, match=re.escape(repr(key))):
# GH#36148 as of 2.0 we require tzawareness-compat
ts[key]
result = ts.copy()
# GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
# so setting it as a new key casts to object rather than matching
# rng[4]
result[naive] = ts.iloc[4]
assert result.index.dtype == object
tm.assert_index_equal(result.index[:-1], rng.astype(object))
assert result.index[-1] == naive
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
with pytest.raises(TypeError, match=msg):
# GH#36148 require tzawareness compat as of 2.0
ts[naive : datetime(1990, 1, 1, 7)]
result = ts.copy()
with pytest.raises(TypeError, match=msg):
# GH#36148 require tzawareness compat as of 2.0
result[naive : datetime(1990, 1, 1, 7)] = 0
with pytest.raises(TypeError, match=msg):
# GH#36148 require tzawareness compat as of 2.0
result[naive : datetime(1990, 1, 1, 7)] = 99
# the __setitems__ here failed, so result should still match ts
tm.assert_series_equal(result, ts)
lb = naive
rb = datetime(1990, 1, 1, 7)
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
with pytest.raises(TypeError, match=msg):
# tznaive vs tzaware comparison is invalid
# see GH#18376, GH#18162
ts[(ts.index >= lb) & (ts.index <= rb)]
lb = Timestamp(naive).tz_localize(rng.tzinfo)
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts[ts.index[4]]
expected = ts.iloc[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result.iloc[4:8] = ts.iloc[4:8]
tm.assert_series_equal(result, ts)
# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
tm.assert_series_equal(result, ts)
def test_getitem_setitem_periodindex():
N = 50
rng = period_range("1/1/1990", periods=N, freq="h")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
result = ts["1990-01-01 04"]
expected = ts.iloc[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
# GH 2782
result = ts[ts.index[4]]
expected = ts.iloc[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result.iloc[4:8] = ts.iloc[4:8]
tm.assert_series_equal(result, ts)
def test_datetime_indexing():
index = date_range("1/1/2000", "1/7/2000")
index = index.repeat(3)
s = Series(len(index), index=index)
stamp = Timestamp("1/8/2000")
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# not monotonic
s = Series(len(index), index=index)
s = s[::-1]
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# test duplicates in time series
def test_indexing_with_duplicate_datetimeindex(
rand_series_with_duplicate_datetimeindex,
):
ts = rand_series_with_duplicate_datetimeindex
uniques = ts.index.unique()
for date in uniques:
result = ts[date]
mask = ts.index == date
total = (ts.index == date).sum()
expected = ts[mask]
if total > 1:
tm.assert_series_equal(result, expected)
else:
tm.assert_almost_equal(result, expected.iloc[0])
cp = ts.copy()
cp[date] = 0
expected = Series(np.where(mask, 0, ts), index=ts.index)
tm.assert_series_equal(cp, expected)
key = datetime(2000, 1, 6)
with pytest.raises(KeyError, match=re.escape(repr(key))):
ts[key]
# new index
ts[datetime(2000, 1, 6)] = 0
assert ts[datetime(2000, 1, 6)] == 0
def test_loc_getitem_over_size_cutoff(monkeypatch):
# #1821
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
# create large list of non periodic datetime
dates = []
sec = timedelta(seconds=1)
half_sec = timedelta(microseconds=500000)
d = datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec
# duplicate some values in the list
duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]
df = DataFrame(
np.random.default_rng(2).standard_normal((len(dates), 4)),
index=dates,
columns=list("ABCD"),
)
pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index
# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
def test_indexing_over_size_cutoff_period_index(monkeypatch):
# GH 27136
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
n = 1100
idx = period_range("1/1/2000", freq="min", periods=n)
assert idx._engine.over_size_threshold
s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
pos = n - 1
timestamp = idx[pos]
assert timestamp in s.index
# it works!
s[timestamp]
assert len(s.loc[[timestamp]]) > 0
def test_indexing_unordered():
# GH 2437
rng = date_range(start="2011-01-01", end="2011-01-15")
ts = Series(np.random.default_rng(2).random(len(rng)), index=rng)
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
for t in ts.index:
expected = ts[t]
result = ts2[t]
assert expected == result
# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
for key in [
slice("2011-01-01", "2011-01-15"),
slice("2010-12-30", "2011-01-15"),
slice("2011-01-01", "2011-01-16"),
# partial ranges
slice("2011-01-01", "2011-01-6"),
slice("2011-01-06", "2011-01-8"),
slice("2011-01-06", "2011-01-12"),
]:
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
compare(key)
# single values
result = ts2["2011"].sort_index()
expected = ts["2011"]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
def test_indexing_unordered2():
# diff freq
rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME")
ts = Series(np.arange(len(rng)), index=rng)
ts = ts.take(np.random.default_rng(2).permutation(20))
result = ts["2005"]
for t in result.index:
assert t.year == 2005
def test_indexing():
idx = date_range("2001-1-1", periods=20, freq="ME")
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
# getting
# GH 3070, make sure semantics work on Series/Frame
result = ts["2001"]
tm.assert_series_equal(result, ts.iloc[:12])
df = DataFrame({"A": ts.copy()})
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
# like any other key, so raises
with pytest.raises(KeyError, match="2001"):
df["2001"]
# setting
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
expected = ts.copy()
expected.iloc[:12] = 1
ts["2001"] = 1
tm.assert_series_equal(ts, expected)
expected = df.copy()
expected.iloc[:12, 0] = 1
df.loc["2001", "A"] = 1
tm.assert_frame_equal(df, expected)
def test_getitem_str_month_with_datetimeindex():
# GH3546 (not including times on the last day)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="h")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
tm.assert_series_equal(expected, ts)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="s")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
tm.assert_series_equal(expected, ts)
def test_getitem_str_year_with_datetimeindex():
idx = [
Timestamp("2013-05-31 00:00"),
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
]
ts = Series(range(len(idx)), index=idx)
expected = ts["2013"]
tm.assert_series_equal(expected, ts)
def test_getitem_str_second_with_datetimeindex():
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(
np.random.default_rng(2).random((5, 5)),
columns=["open", "high", "low", "close", "volume"],
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
)
# this is a single date, so will raise
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
df["2012-01-02 18:01:02"]
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
with pytest.raises(KeyError, match=msg):
df[df.index[2]]
def test_compare_datetime_with_all_none():
# GH#54870
ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
ser2 = Series([None, None])
result = ser > ser2
expected = Series([False, False])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,70 @@
import pytest
from pandas import (
Index,
Series,
date_range,
)
import pandas._testing as tm
class TestSeriesDelItem:
def test_delitem(self):
# GH#5542
# should delete the item inplace
s = Series(range(5))
del s[0]
expected = Series(range(1, 5), index=range(1, 5))
tm.assert_series_equal(s, expected)
del s[1]
expected = Series(range(2, 5), index=range(2, 5))
tm.assert_series_equal(s, expected)
# only 1 left, del, add, del
s = Series(1)
del s[0]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
s[0] = 1
tm.assert_series_equal(s, Series(1))
del s[0]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
def test_delitem_object_index(self, using_infer_string):
# Index(dtype=object)
dtype = "string[pyarrow_numpy]" if using_infer_string else object
s = Series(1, index=Index(["a"], dtype=dtype))
del s["a"]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
s["a"] = 1
tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
del s["a"]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
def test_delitem_missing_key(self):
# empty
s = Series(dtype=object)
with pytest.raises(KeyError, match=r"^0$"):
del s[0]
def test_delitem_extension_dtype(self):
# GH#40386
# DatetimeTZDtype
dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
ser = Series(dti)
expected = ser[[0, 2]]
del ser[1]
assert ser.dtype == dti.dtype
tm.assert_series_equal(ser, expected)
# PeriodDtype
pi = dti.tz_localize(None).to_period("D")
ser = Series(pi)
expected = ser[:2]
del ser[2]
assert ser.dtype == pi.dtype
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,238 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
Series,
date_range,
)
import pandas._testing as tm
def test_get():
# GH 6383
s = Series(
np.array(
[
43,
48,
60,
48,
50,
51,
50,
45,
57,
48,
56,
45,
51,
39,
55,
43,
54,
52,
51,
54,
]
)
)
result = s.get(25, 0)
expected = 0
assert result == expected
s = Series(
np.array(
[
43,
48,
60,
48,
50,
51,
50,
45,
57,
48,
56,
45,
51,
39,
55,
43,
54,
52,
51,
54,
]
),
index=Index(
[
25.0,
36.0,
49.0,
64.0,
81.0,
100.0,
121.0,
144.0,
169.0,
196.0,
1225.0,
1296.0,
1369.0,
1444.0,
1521.0,
1600.0,
1681.0,
1764.0,
1849.0,
1936.0,
],
dtype=np.float64,
),
)
result = s.get(25, 0)
expected = 43
assert result == expected
# GH 7407
# with a boolean accessor
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
vc = df.i.value_counts()
result = vc.get(99, default="Missing")
assert result == "Missing"
vc = df.b.value_counts()
result = vc.get(False, default="Missing")
assert result == 3
result = vc.get(True, default="Missing")
assert result == "Missing"
def test_get_nan(float_numpy_dtype):
# GH 8569
s = Index(range(10), dtype=float_numpy_dtype).to_series()
assert s.get(np.nan) is None
assert s.get(np.nan, default="Missing") == "Missing"
def test_get_nan_multiple(float_numpy_dtype):
# GH 8569
# ensure that fixing "test_get_nan" above hasn't broken get
# with multiple elements
s = Index(range(10), dtype=float_numpy_dtype).to_series()
idx = [2, 30]
assert s.get(idx) is None
idx = [2, np.nan]
assert s.get(idx) is None
# GH 17295 - all missing keys
idx = [20, 30]
assert s.get(idx) is None
idx = [np.nan, np.nan]
assert s.get(idx) is None
def test_get_with_default():
# GH#7725
d0 = ["a", "b", "c", "d"]
d1 = np.arange(4, dtype="int64")
for data, index in ((d0, d1), (d1, d0)):
s = Series(data, index=index)
for i, d in zip(index, data):
assert s.get(i) == d
assert s.get(i, d) == d
assert s.get(i, "z") == d
assert s.get("e", "z") == "z"
assert s.get("e", "e") == "e"
msg = "Series.__getitem__ treating keys as positions is deprecated"
warn = None
if index is d0:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
assert s.get(10, "z") == "z"
assert s.get(10, 10) == 10
@pytest.mark.parametrize(
"arr",
[
np.random.default_rng(2).standard_normal(10),
DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize(
tz="US/Eastern"
),
],
)
def test_get_with_ea(arr):
# GH#21260
ser = Series(arr, index=[2 * i for i in range(len(arr))])
assert ser.get(4) == ser.iloc[2]
result = ser.get([4, 6])
expected = ser.iloc[[2, 3]]
tm.assert_series_equal(result, expected)
result = ser.get(slice(2))
expected = ser.iloc[[0, 1]]
tm.assert_series_equal(result, expected)
assert ser.get(-1) is None
assert ser.get(ser.index.max() + 1) is None
ser = Series(arr[:6], index=list("abcdef"))
assert ser.get("c") == ser.iloc[2]
result = ser.get(slice("b", "d"))
expected = ser.iloc[[1, 2, 3]]
tm.assert_series_equal(result, expected)
result = ser.get("Z")
assert result is None
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ser.get(4) == ser.iloc[4]
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ser.get(-1) == ser.iloc[-1]
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ser.get(len(ser)) is None
# GH#21257
ser = Series(arr)
ser2 = ser[::2]
assert ser2.get(1) is None
def test_getitem_get(string_series, object_series):
msg = "Series.__getitem__ treating keys as positions is deprecated"
for obj in [string_series, object_series]:
idx = obj.index[5]
assert obj[idx] == obj.get(idx)
assert obj[idx] == obj.iloc[5]
with tm.assert_produces_warning(FutureWarning, match=msg):
assert string_series.get(-1) == string_series.get(string_series.index[-1])
assert string_series.iloc[5] == string_series.get(string_series.index[5])
def test_get_none():
# GH#5652
s1 = Series(dtype=object)
s2 = Series(dtype=object, index=list("abc"))
for s in [s1, s2]:
result = s.get(None)
assert result is None

View File

@ -0,0 +1,735 @@
"""
Series.__getitem__ test classes are organized by the type of key passed.
"""
from datetime import (
date,
datetime,
time,
)
import numpy as np
import pytest
from pandas._libs.tslibs import (
conversion,
timezones,
)
from pandas.core.dtypes.common import is_scalar
import pandas as pd
from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
Series,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.indexing import IndexingError
from pandas.tseries.offsets import BDay
class TestSeriesGetitemScalars:
def test_getitem_object_index_float_string(self):
# GH#17286
ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0]))
assert ser["a"] == 1
assert ser[1.0] == 1
def test_getitem_float_keys_tuple_values(self):
# see GH#13509
# unique Index
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
result = ser[0.0]
assert result == (1, 1)
# non-unique Index
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
result = ser[0.0]
tm.assert_series_equal(result, expected)
def test_getitem_unrecognized_scalar(self):
# GH#32684 a scalar key that is not recognized by lib.is_scalar
# a series that might be produced via `frame.dtypes`
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
key = ser.index[1]
result = ser[key]
assert result == 2
def test_getitem_negative_out_of_bounds(self):
ser = Series(["a"] * 10, index=["a"] * 10)
msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
ser[-11]
def test_getitem_out_of_bounds_indexerror(self, datetime_series):
# don't segfault, GH#495
msg = r"index \d+ is out of bounds for axis 0 with size \d+"
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
datetime_series[len(datetime_series)]
def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
# GH#917
# With a RangeIndex, an int key gives a KeyError
ser = Series([], dtype=object)
with pytest.raises(KeyError, match="-1"):
ser[-1]
def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype):
dtype = any_int_numpy_dtype
ser = Series(
np.random.default_rng(2).standard_normal(6),
index=Index([0, 0, 1, 1, 2, 2], dtype=dtype),
)
with pytest.raises(KeyError, match=r"^5$"):
ser[5]
with pytest.raises(KeyError, match=r"^'c'$"):
ser["c"]
# not monotonic
ser = Series(
np.random.default_rng(2).standard_normal(6), index=[2, 2, 0, 0, 1, 1]
)
with pytest.raises(KeyError, match=r"^5$"):
ser[5]
with pytest.raises(KeyError, match=r"^'c'$"):
ser["c"]
def test_getitem_int64(self, datetime_series):
idx = np.int64(5)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = datetime_series[idx]
assert res == datetime_series.iloc[5]
def test_getitem_full_range(self):
# github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
ser = Series(range(5), index=list(range(5)))
result = ser[list(range(5))]
tm.assert_series_equal(result, ser)
# ------------------------------------------------------------------
# Series with DatetimeIndex
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
def test_getitem_pydatetime_tz(self, tzstr):
tz = timezones.maybe_get_tz(tzstr)
index = date_range(
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="h", tz=tzstr
)
ts = Series(index=index, data=index.hour)
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
dt = datetime(2012, 12, 24, 17, 0)
time_datetime = conversion.localize_pydatetime(dt, tz)
assert ts[time_pandas] == ts[time_datetime]
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_string_index_alias_tz_aware(self, tz):
rng = date_range("1/1/2000", periods=10, tz=tz)
ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
result = ser["1/3/2000"]
tm.assert_almost_equal(result, ser.iloc[2])
def test_getitem_time_object(self):
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
mask = (rng.hour == 9) & (rng.minute == 30)
result = ts[time(9, 30)]
expected = ts[mask]
result.index = result.index._with_freq(None)
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Series with CategoricalIndex
def test_getitem_scalar_categorical_index(self):
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
ser = Series([1, 2], index=cats)
expected = ser.iloc[0]
result = ser[cats[0]]
assert result == expected
def test_getitem_numeric_categorical_listlike_matches_scalar(self):
# GH#15470
ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0]))
# 0 is treated as a label
assert ser[0] == "c"
# the listlike analogue should also be treated as labels
res = ser[[0]]
expected = ser.iloc[-1:]
tm.assert_series_equal(res, expected)
res2 = ser[[0, 1, 2]]
tm.assert_series_equal(res2, ser.iloc[::-1])
def test_getitem_integer_categorical_not_positional(self):
# GH#14865
ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category"))
assert ser.get(3) == "c"
assert ser[3] == "c"
def test_getitem_str_with_timedeltaindex(self):
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
ser = Series(np.arange(len(rng)), index=rng)
key = "6 days, 23:11:12"
indexer = rng.get_loc(key)
assert indexer == 133
result = ser[key]
assert result == ser.iloc[133]
msg = r"^Timedelta\('50 days 00:00:00'\)$"
with pytest.raises(KeyError, match=msg):
rng.get_loc("50 days")
with pytest.raises(KeyError, match=msg):
ser["50 days"]
def test_getitem_bool_index_positional(self):
# GH#48653
ser = Series({True: 1, False: 0})
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser[0]
assert result == 1
class TestSeriesGetitemSlices:
def test_getitem_partial_str_slice_with_datetimeindex(self):
# GH#34860
arr = date_range("1/1/2008", "1/1/2009")
ser = arr.to_series()
result = ser["2008"]
rng = date_range(start="2008-01-01", end="2008-12-31")
expected = Series(rng, index=rng)
tm.assert_series_equal(result, expected)
def test_getitem_slice_strings_with_datetimeindex(self):
idx = DatetimeIndex(
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
)
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
result = ts["1/2/2000":]
expected = ts[1:]
tm.assert_series_equal(result, expected)
result = ts["1/2/2000":"1/3/2000"]
expected = ts[1:4]
tm.assert_series_equal(result, expected)
def test_getitem_partial_str_slice_with_timedeltaindex(self):
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
ser = Series(np.arange(len(rng)), index=rng)
result = ser["5 day":"6 day"]
expected = ser.iloc[86:134]
tm.assert_series_equal(result, expected)
result = ser["5 day":]
expected = ser.iloc[86:]
tm.assert_series_equal(result, expected)
result = ser[:"6 day"]
expected = ser.iloc[:134]
tm.assert_series_equal(result, expected)
def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
# higher reso
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
ser = Series(np.arange(len(rng)), index=rng)
result = ser["1 day 10:11:12":]
expected = ser.iloc[0:]
tm.assert_series_equal(result, expected)
result = ser["1 day 10:11:12.001":]
expected = ser.iloc[1000:]
tm.assert_series_equal(result, expected)
result = ser["1 days, 10:11:12.001001"]
assert result == ser.iloc[1001]
def test_getitem_slice_2d(self, datetime_series):
# GH#30588 multi-dimensional indexing deprecated
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
datetime_series[:, np.newaxis]
def test_getitem_median_slice_bug(self):
index = date_range("20090415", "20090519", freq="2B")
ser = Series(np.random.default_rng(2).standard_normal(13), index=index)
indexer = [slice(6, 7, None)]
msg = "Indexing with a single-item list"
with pytest.raises(ValueError, match=msg):
# GH#31299
ser[indexer]
# but we're OK with a single-element tuple
result = ser[(indexer[0],)]
expected = ser[indexer[0]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"slc, positions",
[
[slice(date(2018, 1, 1), None), [0, 1, 2]],
[slice(date(2019, 1, 2), None), [2]],
[slice(date(2020, 1, 1), None), []],
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
[slice(None, date(2019, 1, 1)), [0]],
],
)
def test_getitem_slice_date(self, slc, positions):
# https://github.com/pandas-dev/pandas/issues/31501
ser = Series(
[0, 1, 2],
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
)
result = ser[slc]
expected = ser.take(positions)
tm.assert_series_equal(result, expected)
def test_getitem_slice_float_raises(self, datetime_series):
msg = (
"cannot do slice indexing on DatetimeIndex with these indexers "
r"\[{key}\] of type float"
)
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
datetime_series[4.0:10.0]
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
datetime_series[4.5:10.0]
def test_getitem_slice_bug(self):
ser = Series(range(10), index=list(range(10)))
result = ser[-12:]
tm.assert_series_equal(result, ser)
result = ser[-7:]
tm.assert_series_equal(result, ser[3:])
result = ser[:-12]
tm.assert_series_equal(result, ser[:0])
def test_getitem_slice_integers(self):
ser = Series(
np.random.default_rng(2).standard_normal(8),
index=[2, 4, 6, 8, 10, 12, 14, 16],
)
result = ser[:4]
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
tm.assert_series_equal(result, expected)
class TestSeriesGetitemListLike:
@pytest.mark.parametrize("box", [list, np.array, Index, Series])
def test_getitem_no_matches(self, box):
# GH#33462 we expect the same behavior for list/ndarray/Index/Series
ser = Series(["A", "B"])
key = Series(["C"], dtype=object)
key = box(key)
msg = (
r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
)
with pytest.raises(KeyError, match=msg):
ser[key]
def test_getitem_intlist_intindex_periodvalues(self):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
result = ser[[2, 4]]
exp = Series(
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
index=[2, 4],
dtype="Period[D]",
)
tm.assert_series_equal(result, exp)
assert result.dtype == "Period[D]"
@pytest.mark.parametrize("box", [list, np.array, Index])
def test_getitem_intlist_intervalindex_non_int(self, box):
# GH#33404 fall back to positional since ints are unambiguous
dti = date_range("2000-01-03", periods=3)._with_freq(None)
ii = pd.IntervalIndex.from_breaks(dti)
ser = Series(range(len(ii)), index=ii)
expected = ser.iloc[:1]
key = box([0])
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser[key]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("box", [list, np.array, Index])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
# GH#33404 do _not_ fall back to positional since ints are ambiguous
idx = Index(range(4)).astype(dtype)
dti = date_range("2000-01-03", periods=3)
mi = pd.MultiIndex.from_product([idx, dti])
ser = Series(range(len(mi))[::-1], index=mi)
key = box([5])
with pytest.raises(KeyError, match="5"):
ser[key]
def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype):
# GH #37218
ser = Series([1, 2, 3])
key = np.array([4], dtype=any_unsigned_int_numpy_dtype)
with pytest.raises(KeyError, match="4"):
ser[key]
with pytest.raises(KeyError, match="4"):
ser.loc[key]
class TestGetitemBooleanMask:
def test_getitem_boolean(self, string_series):
ser = string_series
mask = ser > ser.median()
# passing list is OK
result = ser[list(mask)]
expected = ser[mask]
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.index, ser.index[mask])
def test_getitem_boolean_empty(self):
ser = Series([], dtype=np.int64)
ser.index.name = "index_name"
ser = ser[ser.isna()]
assert ser.index.name == "index_name"
assert ser.dtype == np.int64
# GH#5877
# indexing with empty series
ser = Series(["A", "B"], dtype=object)
expected = Series(dtype=object, index=Index([], dtype="int64"))
result = ser[Series([], dtype=object)]
tm.assert_series_equal(result, expected)
# invalid because of the boolean indexer
# that's empty or not-aligned
msg = (
r"Unalignable boolean Series provided as indexer \(index of "
r"the boolean Series and of the indexed object do not match"
)
with pytest.raises(IndexingError, match=msg):
ser[Series([], dtype=bool)]
with pytest.raises(IndexingError, match=msg):
ser[Series([True], dtype=bool)]
def test_getitem_boolean_object(self, string_series):
# using column from DataFrame
ser = string_series
mask = ser > ser.median()
omask = mask.astype(object)
# getitem
result = ser[omask]
expected = ser[mask]
tm.assert_series_equal(result, expected)
# setitem
s2 = ser.copy()
cop = ser.copy()
cop[omask] = 5
s2[mask] = 5
tm.assert_series_equal(cop, s2)
# nans raise exception
omask[5:10] = np.nan
msg = "Cannot mask with non-boolean array containing NA / NaN values"
with pytest.raises(ValueError, match=msg):
ser[omask]
with pytest.raises(ValueError, match=msg):
ser[omask] = 5
def test_getitem_boolean_dt64_copies(self):
# GH#36210
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
key = np.array([True, True, False, False])
ser = Series(dti._data)
res = ser[key]
assert res._values._ndarray.base is None
# compare with numeric case for reference
ser2 = Series(range(4))
res2 = ser2[key]
assert res2._values.base is None
def test_getitem_boolean_corner(self, datetime_series):
ts = datetime_series
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
msg = (
r"Unalignable boolean Series provided as indexer \(index of "
r"the boolean Series and of the indexed object do not match"
)
with pytest.raises(IndexingError, match=msg):
ts[mask_shifted]
with pytest.raises(IndexingError, match=msg):
ts.loc[mask_shifted]
def test_getitem_boolean_different_order(self, string_series):
ordered = string_series.sort_values()
sel = string_series[ordered > 0]
exp = string_series[string_series > 0]
tm.assert_series_equal(sel, exp)
def test_getitem_boolean_contiguous_preserve_freq(self):
rng = date_range("1/1/2000", "3/1/2000", freq="B")
mask = np.zeros(len(rng), dtype=bool)
mask[10:20] = True
masked = rng[mask]
expected = rng[10:20]
assert expected.freq == rng.freq
tm.assert_index_equal(masked, expected)
mask[22] = True
masked = rng[mask]
assert masked.freq is None
class TestGetitemCallable:
def test_getitem_callable(self):
# GH#12533
ser = Series(4, index=list("ABCD"))
result = ser[lambda x: "A"]
assert result == ser.loc["A"]
result = ser[lambda x: ["A", "B"]]
expected = ser.loc[["A", "B"]]
tm.assert_series_equal(result, expected)
result = ser[lambda x: [True, False, True, True]]
expected = ser.iloc[[0, 2, 3]]
tm.assert_series_equal(result, expected)
def test_getitem_generator(string_series):
gen = (x > 0 for x in string_series)
result = string_series[gen]
result2 = string_series[iter(string_series > 0)]
expected = string_series[string_series > 0]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)
@pytest.mark.parametrize(
"series",
[
Series([0, 1]),
Series(date_range("2012-01-01", periods=2)),
Series(date_range("2012-01-01", periods=2, tz="CET")),
],
)
def test_getitem_ndim_deprecated(series):
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
series[:, None]
def test_getitem_multilevel_scalar_slice_not_implemented(
multiindex_year_month_day_dataframe_random_data,
):
# not implementing this for now
df = multiindex_year_month_day_dataframe_random_data
ser = df["A"]
msg = r"\(2000, slice\(3, 4, None\)\)"
with pytest.raises(TypeError, match=msg):
ser[2000, 3:4]
def test_getitem_dataframe_raises():
rng = list(range(10))
ser = Series(10, index=rng)
df = DataFrame(rng, index=rng)
msg = (
"Indexing a Series with DataFrame is not supported, "
"use the appropriate DataFrame column"
)
with pytest.raises(TypeError, match=msg):
ser[df > 5]
def test_getitem_assignment_series_alignment():
# https://github.com/pandas-dev/pandas/issues/37427
# with getitem, when assigning with a Series, it is not first aligned
ser = Series(range(10))
idx = np.array([2, 4, 9])
ser[idx] = Series([10, 11, 12])
expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
tm.assert_series_equal(ser, expected)
def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
# GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
with pytest.raises(KeyError, match="None"):
ser[None]
with pytest.raises(KeyError, match="None"):
ser.index.get_loc(None)
with pytest.raises(KeyError, match="None"):
ser.index._engine.get_loc(None)
def test_getitem_1tuple_slice_without_multiindex():
ser = Series(range(5))
key = (slice(3),)
result = ser[key]
expected = ser[key[0]]
tm.assert_series_equal(result, expected)
def test_getitem_preserve_name(datetime_series):
result = datetime_series[datetime_series > 0]
assert result.name == datetime_series.name
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = datetime_series[[0, 2, 4]]
assert result.name == datetime_series.name
result = datetime_series[5:10]
assert result.name == datetime_series.name
def test_getitem_with_integer_labels():
# integer indexes, be careful
ser = Series(
np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
)
inds = [0, 2, 5, 7, 8]
arr_inds = np.array([0, 2, 5, 7, 8])
with pytest.raises(KeyError, match="not in index"):
ser[inds]
with pytest.raises(KeyError, match="not in index"):
ser[arr_inds]
def test_getitem_missing(datetime_series):
# missing
d = datetime_series.index[0] - BDay()
msg = r"Timestamp\('1999-12-31 00:00:00'\)"
with pytest.raises(KeyError, match=msg):
datetime_series[d]
def test_getitem_fancy(string_series, object_series):
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
slice1 = string_series[[1, 2, 3]]
slice2 = object_series[[1, 2, 3]]
assert string_series.index[2] == slice1.index[1]
assert object_series.index[2] == slice2.index[1]
assert string_series.iloc[2] == slice1.iloc[1]
assert object_series.iloc[2] == slice2.iloc[1]
def test_getitem_box_float64(datetime_series):
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
value = datetime_series[5]
assert isinstance(value, np.float64)
def test_getitem_unordered_dup():
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
assert is_scalar(obj["c"])
assert obj["c"] == 0
def test_getitem_dups():
ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
result = ser["C"]
tm.assert_series_equal(result, expected)
def test_getitem_categorical_str():
# GH#31765
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
result = ser["a"]
expected = ser.iloc[[0, 3]]
tm.assert_series_equal(result, expected)
def test_slice_can_reorder_not_uniquely_indexed():
ser = Series(1, index=["a", "a", "b", "b", "c"])
ser[::-1] # it works!
@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
def test_duplicated_index_getitem_positional_indexer(index_vals):
# GH 11747
s = Series(range(5), index=list(index_vals))
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s[3]
assert result == 3
class TestGetitemDeprecatedIndexers:
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2, 3])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser[key]
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_setitem_dict_and_set_disallowed(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2, 3])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser[key] = 1

View File

@ -0,0 +1,518 @@
""" test get/set & misc """
from datetime import timedelta
import re
import numpy as np
import pytest
from pandas.errors import IndexingError
from pandas import (
NA,
DataFrame,
Index,
IndexSlice,
MultiIndex,
NaT,
Series,
Timedelta,
Timestamp,
concat,
date_range,
isna,
period_range,
timedelta_range,
)
import pandas._testing as tm
def test_basic_indexing():
s = Series(
np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"]
)
warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated"
msg = "index 5 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5]
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5] = 0
with pytest.raises(KeyError, match=r"^'c'$"):
s["c"]
s = s.sort_index()
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5]
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5] = 0
def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
# GH51053
dtype = any_numeric_dtype
idx = Index([1, 0, 1], dtype=dtype)
ser = Series(range(3), index=idx)
result = ser[1]
expected = Series([0, 2], index=Index([1, 1], dtype=dtype))
tm.assert_series_equal(result, expected, check_exact=True)
def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
# GH51053
dtype = any_numeric_dtype
idx = Index([1, 0, 1], dtype=dtype)
ser = Series(range(3), index=idx)
ser[1] = 10
expected = Series([10, 1, 10], index=idx)
tm.assert_series_equal(ser, expected, check_exact=True)
def test_basic_getitem_with_labels(datetime_series):
indices = datetime_series.index[[5, 10, 15]]
result = datetime_series[indices]
expected = datetime_series.reindex(indices)
tm.assert_series_equal(result, expected)
result = datetime_series[indices[0] : indices[2]]
expected = datetime_series.loc[indices[0] : indices[2]]
tm.assert_series_equal(result, expected)
def test_basic_getitem_dt64tz_values():
# GH12089
# with tz for values
ser = Series(
date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
)
expected = Timestamp("2011-01-01", tz="US/Eastern")
result = ser.loc["a"]
assert result == expected
result = ser.iloc[0]
assert result == expected
result = ser["a"]
assert result == expected
def test_getitem_setitem_ellipsis(using_copy_on_write, warn_copy_on_write):
s = Series(np.random.default_rng(2).standard_normal(10))
result = s[...]
tm.assert_series_equal(result, s)
with tm.assert_cow_warning(warn_copy_on_write):
s[...] = 5
if not using_copy_on_write:
assert (result == 5).all()
@pytest.mark.parametrize(
"result_1, duplicate_item, expected_1",
[
[
Series({1: 12, 2: [1, 2, 2, 3]}),
Series({1: 313}),
Series({1: 12}, dtype=object),
],
[
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
Series({1: [1, 2, 3]}),
Series({1: [1, 2, 3]}),
],
],
)
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
# GH 17610
result = result_1._append(duplicate_item)
expected = expected_1._append(duplicate_item)
tm.assert_series_equal(result[1], expected)
assert result[2] == result_1[2]
def test_getitem_setitem_integers():
# caused bug without test
s = Series([1, 2, 3], ["a", "b", "c"])
assert s.iloc[0] == s["a"]
s.iloc[0] = 5
tm.assert_almost_equal(s["a"], 5)
def test_series_box_timestamp():
rng = date_range("20090415", "20090519", freq="B")
ser = Series(rng)
assert isinstance(ser[0], Timestamp)
assert isinstance(ser.at[1], Timestamp)
assert isinstance(ser.iat[2], Timestamp)
assert isinstance(ser.loc[3], Timestamp)
assert isinstance(ser.iloc[4], Timestamp)
ser = Series(rng, index=rng)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isinstance(ser[0], Timestamp)
assert isinstance(ser.at[rng[1]], Timestamp)
assert isinstance(ser.iat[2], Timestamp)
assert isinstance(ser.loc[rng[3]], Timestamp)
assert isinstance(ser.iloc[4], Timestamp)
def test_series_box_timedelta():
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
ser = Series(rng)
assert isinstance(ser[0], Timedelta)
assert isinstance(ser.at[1], Timedelta)
assert isinstance(ser.iat[2], Timedelta)
assert isinstance(ser.loc[3], Timedelta)
assert isinstance(ser.iloc[4], Timedelta)
def test_getitem_ambiguous_keyerror(indexer_sl):
ser = Series(range(10), index=list(range(0, 20, 2)))
with pytest.raises(KeyError, match=r"^1$"):
indexer_sl(ser)[1]
def test_getitem_dups_with_missing(indexer_sl):
# breaks reindex, so need to use .loc internally
# GH 4246
ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
def test_setitem_ambiguous_keyerror(indexer_sl):
s = Series(range(10), index=list(range(0, 20, 2)))
# equivalent of an append
s2 = s.copy()
indexer_sl(s2)[1] = 5
expected = concat([s, Series([5], index=[1])])
tm.assert_series_equal(s2, expected)
def test_setitem(datetime_series):
datetime_series[datetime_series.index[5]] = np.nan
datetime_series.iloc[[1, 2, 17]] = np.nan
datetime_series.iloc[6] = np.nan
assert np.isnan(datetime_series.iloc[6])
assert np.isnan(datetime_series.iloc[2])
datetime_series[np.isnan(datetime_series)] = 5
assert not np.isnan(datetime_series.iloc[2])
def test_setslice(datetime_series):
sl = datetime_series[5:20]
assert len(sl) == len(sl.index)
assert sl.index.is_unique is True
def test_basic_getitem_setitem_corner(datetime_series):
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
msg = "key of type tuple not found and not a MultiIndex"
with pytest.raises(KeyError, match=msg):
datetime_series[:, 2]
with pytest.raises(KeyError, match=msg):
datetime_series[:, 2] = 2
# weird lists. [slice(0, 5)] raises but not two slices
msg = "Indexing with a single-item list"
with pytest.raises(ValueError, match=msg):
# GH#31299
datetime_series[[slice(None, 5)]]
# but we're OK with a single-element tuple
result = datetime_series[(slice(None, 5),)]
expected = datetime_series[:5]
tm.assert_series_equal(result, expected)
# OK
msg = r"unhashable type(: 'slice')?"
with pytest.raises(TypeError, match=msg):
datetime_series[[5, [None, None]]]
with pytest.raises(TypeError, match=msg):
datetime_series[[5, [None, None]]] = 2
def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_write):
original = string_series.copy()
numSlice = string_series[10:20]
numSliceEnd = string_series[-10:]
objSlice = object_series[10:20]
assert string_series.index[9] not in numSlice.index
assert object_series.index[9] not in objSlice.index
assert len(numSlice) == len(numSlice.index)
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
assert numSlice.index[1] == string_series.index[11]
tm.assert_numpy_array_equal(np.array(numSliceEnd), np.array(string_series)[-10:])
# Test return view.
sl = string_series[10:20]
with tm.assert_cow_warning(warn_copy_on_write):
sl[:] = 0
if using_copy_on_write:
# Doesn't modify parent (CoW)
tm.assert_series_equal(string_series, original)
else:
assert (string_series[10:20] == 0).all()
def test_timedelta_assignment():
# GH 8209
s = Series([], dtype=object)
s.loc["B"] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
s = s.reindex(s.index.insert(0, "A"))
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
s.loc["A"] = timedelta(1)
expected = Series(Timedelta("1 days"), index=["A", "B"])
tm.assert_series_equal(s, expected)
def test_underlying_data_conversion(using_copy_on_write):
# GH 4080
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
return_value = df.set_index(["a", "b", "c"], inplace=True)
assert return_value is None
s = Series([1], index=[(2, 2, 2)])
df["val"] = 0
df_original = df.copy()
df
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["val"].update(s)
expected = df_original
else:
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
df["val"].update(s)
expected = DataFrame(
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
)
return_value = expected.set_index(["a", "b", "c"], inplace=True)
assert return_value is None
tm.assert_frame_equal(df, expected)
def test_preserve_refs(datetime_series):
seq = datetime_series.iloc[[5, 10, 15]]
seq.iloc[1] = np.nan
assert not np.isnan(datetime_series.iloc[10])
def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
index = lexsorted_two_level_string_multiindex
ser = Series(
np.random.default_rng(2).standard_normal(len(index)), index=index, name="sth"
)
result = indexer_sl(ser)["foo"]
assert result.name == ser.name
# miscellaneous methods
@pytest.mark.parametrize(
"index",
[
date_range("2014-01-01", periods=20, freq="MS"),
period_range("2014-01", periods=20, freq="M"),
timedelta_range("0", periods=20, freq="h"),
],
)
def test_slice_with_negative_step(index):
keystr1 = str(index[9])
keystr2 = str(index[13])
ser = Series(np.arange(20), index)
SLC = IndexSlice
for key in [keystr1, index[9]]:
tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
for key2 in [keystr2, index[13]]:
tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
def test_tuple_index():
# GH 35534 - Selecting values when a Series has an Index of tuples
s = Series([1, 2], index=[("a",), ("b",)])
assert s[("a",)] == 1
assert s[("b",)] == 2
s[("b",)] = 3
assert s[("b",)] == 3
def test_frozenset_index():
# GH35747 - Selecting values when a Series has an Index of frozenset
idx0, idx1 = frozenset("a"), frozenset("b")
s = Series([1, 2], index=[idx0, idx1])
assert s[idx0] == 1
assert s[idx1] == 2
s[idx1] = 3
assert s[idx1] == 3
def test_loc_setitem_all_false_indexer():
# GH#45778
ser = Series([1, 2], index=["a", "b"])
expected = ser.copy()
rhs = Series([6, 7], index=["a", "b"])
ser.loc[ser > 100] = rhs
tm.assert_series_equal(ser, expected)
def test_loc_boolean_indexer_non_matching_index():
# GH#46551
ser = Series([1])
result = ser.loc[Series([NA, False], dtype="boolean")]
expected = Series([], dtype="int64")
tm.assert_series_equal(result, expected)
def test_loc_boolean_indexer_miss_matching_index():
# GH#46551
ser = Series([1])
indexer = Series([NA, False], dtype="boolean", index=[1, 2])
with pytest.raises(IndexingError, match="Unalignable"):
ser.loc[indexer]
def test_loc_setitem_nested_data_enlargement():
# GH#48614
df = DataFrame({"a": [1]})
ser = Series({"label": df})
ser.loc["new_label"] = df
expected = Series({"label": df, "new_label": df})
tm.assert_series_equal(ser, expected)
def test_loc_ea_numeric_index_oob_slice_end():
# GH#50161
ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
result = ser.loc[2:3]
expected = Series(1, index=Index([2], dtype="Int64"))
tm.assert_series_equal(result, expected)
def test_getitem_bool_int_key():
# GH#48653
ser = Series({True: 1, False: 0})
with pytest.raises(KeyError, match="0"):
ser.loc[0]
@pytest.mark.parametrize("val", [{}, {"b": "x"}])
@pytest.mark.parametrize("indexer", [[], [False, False], slice(0, -1), np.array([])])
def test_setitem_empty_indexer(indexer, val):
# GH#45981
df = DataFrame({"a": [1, 2], **val})
expected = df.copy()
df.loc[indexer] = 1.5
tm.assert_frame_equal(df, expected)
class TestDeprecatedIndexers:
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key]
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key]
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_setitem_dict_and_set_disallowed(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key] = 1
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
def test_setitem_dict_and_set_disallowed_multiindex(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key] = 1
class TestSetitemValidation:
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
# but checks for warnings instead of errors.
def _check_setitem_invalid(self, ser, invalid, indexer, warn):
msg = "Setting an item of incompatible dtype is deprecated"
msg = re.escape(msg)
orig_ser = ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser[indexer] = invalid
ser = orig_ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser.iloc[indexer] = invalid
ser = orig_ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser.loc[indexer] = invalid
ser = orig_ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser[:] = invalid
_invalid_scalars = [
1 + 2j,
"True",
"1",
"1.0",
NaT,
np.datetime64("NaT"),
np.timedelta64("NaT"),
]
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
@pytest.mark.parametrize(
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
)
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_bool(self, invalid, indexer):
ser = Series([True, False, False], dtype="bool")
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
warn = None
else:
warn = FutureWarning
self._check_setitem_invalid(ser, invalid, indexer, warn)
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
ser = Series([1, 2, None], dtype=float_numpy_dtype)
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)

View File

@ -0,0 +1,69 @@
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
def test_mask():
# compare with tested results in test_where
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.where(~cond, np.nan)
tm.assert_series_equal(rs, s.mask(cond))
rs = s.where(~cond)
rs2 = s.mask(cond)
tm.assert_series_equal(rs, rs2)
rs = s.where(~cond, -s)
rs2 = s.mask(cond, -s)
tm.assert_series_equal(rs, rs2)
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
rs = s2.where(~cond[:3])
rs2 = s2.mask(cond[:3])
tm.assert_series_equal(rs, rs2)
rs = s2.where(~cond[:3], -s2)
rs2 = s2.mask(cond[:3], -s2)
tm.assert_series_equal(rs, rs2)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.mask(1)
with pytest.raises(ValueError, match=msg):
s.mask(cond[:3].values, -s)
def test_mask_casts():
# dtype changes
ser = Series([1, 2, 3, 4])
result = ser.mask(ser > 2, np.nan)
expected = Series([1, 2, np.nan, np.nan])
tm.assert_series_equal(result, expected)
def test_mask_casts2():
# see gh-21891
ser = Series([1, 2])
res = ser.mask([True, False])
exp = Series([np.nan, 2])
tm.assert_series_equal(res, exp)
def test_mask_inplace():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.copy()
rs.mask(cond, inplace=True)
tm.assert_series_equal(rs.dropna(), s[~cond])
tm.assert_series_equal(rs, s.mask(cond))
rs = s.copy()
rs.mask(cond, -s, inplace=True)
tm.assert_series_equal(rs, s.mask(cond, -s))

View File

@ -0,0 +1,45 @@
from datetime import datetime
import numpy as np
from pandas import (
DatetimeIndex,
Series,
)
import pandas._testing as tm
def test_series_set_value():
# GH#1561
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
index = DatetimeIndex(dates)
s = Series(dtype=object)
s._set_value(dates[0], 1.0)
s._set_value(dates[1], np.nan)
expected = Series([1.0, np.nan], index=index)
tm.assert_series_equal(s, expected)
def test_set_value_dt64(datetime_series):
idx = datetime_series.index[10]
res = datetime_series._set_value(idx, 0)
assert res is None
assert datetime_series[idx] == 0
def test_set_value_str_index(string_series):
# equiv
ser = string_series.copy()
res = ser._set_value("foobar", 0)
assert res is None
assert ser.index[-1] == "foobar"
assert ser["foobar"] == 0
ser2 = string_series.copy()
ser2.loc["foobar"] = 0
assert ser2.index[-1] == "foobar"
assert ser2["foobar"] == 0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
def test_take_validate_axis():
# GH#51022
ser = Series([-1, 5, 6, 2, 4])
msg = "No axis named foo for object type Series"
with pytest.raises(ValueError, match=msg):
ser.take([1, 2], axis="foo")
def test_take():
ser = Series([-1, 5, 6, 2, 4])
actual = ser.take([1, 3, 4])
expected = Series([5, 2, 4], index=[1, 3, 4])
tm.assert_series_equal(actual, expected)
actual = ser.take([-1, 3, 4])
expected = Series([4, 2, 4], index=[4, 3, 4])
tm.assert_series_equal(actual, expected)
msg = "indices are out-of-bounds"
with pytest.raises(IndexError, match=msg):
ser.take([1, 10])
with pytest.raises(IndexError, match=msg):
ser.take([2, 5])
def test_take_categorical():
# https://github.com/pandas-dev/pandas/issues/20664
ser = Series(pd.Categorical(["a", "b", "c"]))
result = ser.take([-2, -2, 0])
expected = Series(
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
)
tm.assert_series_equal(result, expected)
def test_take_slice_raises():
ser = Series([-1, 5, 6, 2, 4])
msg = "Series.take requires a sequence of integers, not slice"
with pytest.raises(TypeError, match=msg):
ser.take(slice(0, 3, 1))

View File

@ -0,0 +1,481 @@
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import (
Series,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
def test_where_unsafe_int(any_signed_int_numpy_dtype):
s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype)
mask = s < 5
s[mask] = range(2, 7)
expected = Series(
list(range(2, 7)) + list(range(5, 10)),
dtype=any_signed_int_numpy_dtype,
)
tm.assert_series_equal(s, expected)
def test_where_unsafe_float(float_numpy_dtype):
s = Series(np.arange(10), dtype=float_numpy_dtype)
mask = s < 5
s[mask] = range(2, 7)
data = list(range(2, 7)) + list(range(5, 10))
expected = Series(data, dtype=float_numpy_dtype)
tm.assert_series_equal(s, expected)
@pytest.mark.parametrize(
"dtype,expected_dtype",
[
(np.int8, np.float64),
(np.int16, np.float64),
(np.int32, np.float64),
(np.int64, np.float64),
(np.float32, np.float32),
(np.float64, np.float64),
],
)
def test_where_unsafe_upcast(dtype, expected_dtype):
# see gh-9743
s = Series(np.arange(10), dtype=dtype)
values = [2.5, 3.5, 4.5, 5.5, 6.5]
mask = s < 5
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
warn = (
None
if np.dtype(dtype).kind == np.dtype(expected_dtype).kind == "f"
else FutureWarning
)
with tm.assert_produces_warning(warn, match="incompatible dtype"):
s[mask] = values
tm.assert_series_equal(s, expected)
def test_where_unsafe():
# see gh-9731
s = Series(np.arange(10), dtype="int64")
values = [2.5, 3.5, 4.5, 5.5]
mask = s > 5
expected = Series(list(range(6)) + values, dtype="float64")
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
s[mask] = values
tm.assert_series_equal(s, expected)
# see gh-3235
s = Series(np.arange(10), dtype="int64")
mask = s < 5
s[mask] = range(2, 7)
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
tm.assert_series_equal(s, expected)
assert s.dtype == expected.dtype
s = Series(np.arange(10), dtype="int64")
mask = s > 5
s[mask] = [0] * 4
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
tm.assert_series_equal(s, expected)
s = Series(np.arange(10))
mask = s > 5
msg = "cannot set using a list-like indexer with a different length than the value"
with pytest.raises(ValueError, match=msg):
s[mask] = [5, 4, 3, 2, 1]
with pytest.raises(ValueError, match=msg):
s[mask] = [0] * 5
# dtype changes
s = Series([1, 2, 3, 4])
result = s.where(s > 2, np.nan)
expected = Series([np.nan, np.nan, 3, 4])
tm.assert_series_equal(result, expected)
# GH 4667
# setting with None changes dtype
s = Series(range(10)).astype(float)
s[8] = None
result = s[8]
assert isna(result)
s = Series(range(10)).astype(float)
s[s > 8] = None
result = s[isna(s)]
expected = Series(np.nan, index=[9])
tm.assert_series_equal(result, expected)
def test_where():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.where(cond).dropna()
rs2 = s[cond]
tm.assert_series_equal(rs, rs2)
rs = s.where(cond, -s)
tm.assert_series_equal(rs, s.abs())
rs = s.where(cond)
assert s.shape == rs.shape
assert rs is not s
# test alignment
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
rs = s2.where(cond[:3])
tm.assert_series_equal(rs, expected)
expected = s2.abs()
expected.iloc[0] = s2[0]
rs = s2.where(cond[:3], -s2)
tm.assert_series_equal(rs, expected)
def test_where_error():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where(1)
with pytest.raises(ValueError, match=msg):
s.where(cond[:3].values, -s)
# GH 2745
s = Series([1, 2])
s[[True, False]] = [0, 1]
expected = Series([0, 2])
tm.assert_series_equal(s, expected)
# failures
msg = "cannot set using a list-like indexer with a different length than the value"
with pytest.raises(ValueError, match=msg):
s[[True, False]] = [0, 2, 3]
with pytest.raises(ValueError, match=msg):
s[[True, False]] = []
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
def test_where_array_like(klass):
# see gh-15414
s = Series([1, 2, 3])
cond = [False, True, True]
expected = Series([np.nan, 2, 3])
result = s.where(klass(cond))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"cond",
[
[1, 0, 1],
Series([2, 5, 7]),
["True", "False", "True"],
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
],
)
def test_where_invalid_input(cond):
# see gh-15414: only boolean arrays accepted
s = Series([1, 2, 3])
msg = "Boolean array expected for the condition"
with pytest.raises(ValueError, match=msg):
s.where(cond)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where([True])
def test_where_ndframe_align():
msg = "Array conditional must be same shape as self"
s = Series([1, 2, 3])
cond = [True]
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([1, np.nan, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
cond = np.array([False, True, False, True])
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([np.nan, 2, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
msg = (
lambda x: f"cannot set using a {x} indexer with a "
"different length than the value"
)
# slice
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg("slice")):
s[0:3] = list(range(27))
s[0:3] = list(range(3))
expected = Series([0, 1, 2])
tm.assert_series_equal(s.astype(np.int64), expected)
# slice with step
s = Series(list("abcdef"))
with pytest.raises(ValueError, match=msg("slice")):
s[0:4:2] = list(range(27))
s = Series(list("abcdef"))
s[0:4:2] = list(range(2))
expected = Series([0, "b", 1, "d", "e", "f"])
tm.assert_series_equal(s, expected)
# neg slices
s = Series(list("abcdef"))
with pytest.raises(ValueError, match=msg("slice")):
s[:-1] = list(range(27))
s[-3:-1] = list(range(2))
expected = Series(["a", "b", "c", 0, 1, "f"])
tm.assert_series_equal(s, expected)
# list
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(27))
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(2))
# scalar
s = Series(list("abc"))
s[0] = list(range(10))
expected = Series([list(range(10)), "b", "c"])
tm.assert_series_equal(s, expected)
@pytest.mark.parametrize("size", range(2, 6))
@pytest.mark.parametrize(
"mask", [[True, False, False, False, False], [True, False], [False]]
)
@pytest.mark.parametrize(
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
)
# Test numpy arrays, lists and tuples as the input to be
# broadcast
@pytest.mark.parametrize(
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
)
def test_broadcast(size, mask, item, box):
# GH#8801, GH#4195
selection = np.resize(mask, size)
data = np.arange(size, dtype=float)
# Construct the expected series by taking the source
# data or item based on the selection
expected = Series(
[item if use_item else data[i] for i, use_item in enumerate(selection)]
)
s = Series(data)
s[selection] = item
tm.assert_series_equal(s, expected)
s = Series(data)
result = s.where(~selection, box(item))
tm.assert_series_equal(result, expected)
s = Series(data)
result = s.mask(selection, box(item))
tm.assert_series_equal(result, expected)
def test_where_inplace():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.copy()
rs.where(cond, inplace=True)
tm.assert_series_equal(rs.dropna(), s[cond])
tm.assert_series_equal(rs, s.where(cond))
rs = s.copy()
rs.where(cond, -s, inplace=True)
tm.assert_series_equal(rs, s.where(cond, -s))
def test_where_dups():
# GH 4550
# where crashes with dups in index
s1 = Series(list(range(3)))
s2 = Series(list(range(3)))
comb = pd.concat([s1, s2])
result = comb.where(comb < 2)
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(result, expected)
# GH 4548
# inplace updating not working with dups
comb[comb < 1] = 5
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(comb, expected)
comb[comb < 2] += 10
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(comb, expected)
def test_where_numeric_with_string():
# GH 9280
s = Series([1, 2, 3])
w = s.where(s > 1, "X")
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
w = s.where(s > 1, ["X", "Y", "Z"])
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"])
def test_where_datetimelike_coerce(dtype):
ser = Series([1, 2], dtype=dtype)
expected = Series([10, 10])
mask = np.array([False, False])
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, [10, 10])
tm.assert_series_equal(rs, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, 10)
tm.assert_series_equal(rs, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, 10.0)
tm.assert_series_equal(rs, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, [10.0, 10.0])
tm.assert_series_equal(rs, expected)
rs = ser.where(mask, [10.0, np.nan])
expected = Series([10, np.nan], dtype="object")
tm.assert_series_equal(rs, expected)
def test_where_datetimetz():
# GH 15701
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]")
rs = ser.where(Series([False, True]))
expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]")
tm.assert_series_equal(rs, expected)
def test_where_sparse():
# GH#17198 make sure we dont get an AttributeError for sp_index
ser = Series(pd.arrays.SparseArray([1, 2]))
result = ser.where(ser >= 2, 0)
expected = Series(pd.arrays.SparseArray([0, 2]))
tm.assert_series_equal(result, expected)
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
# https://github.com/pandas-dev/pandas/issues/34592
ser = Series([], dtype=float)
result = ser.where([])
tm.assert_series_equal(result, ser)
def test_where_categorical(frame_or_series):
# https://github.com/pandas-dev/pandas/issues/18888
exp = frame_or_series(
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
dtype="category",
)
df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category")
res = df.where(df != "C")
tm.assert_equal(exp, res)
def test_where_datetimelike_categorical(tz_naive_fixture):
# GH#37682
tz = tz_naive_fixture
dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
mask = np.array([True, True, False])
# DatetimeIndex.where
res = lvals.where(mask, rvals)
tm.assert_index_equal(res, dr)
# DatetimeArray.where
res = lvals._data._where(mask, rvals)
tm.assert_datetime_array_equal(res, dr._data)
# Series.where
res = Series(lvals).where(mask, rvals)
tm.assert_series_equal(res, Series(dr))
# DataFrame.where
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
tm.assert_frame_equal(res, pd.DataFrame(dr))

View File

@ -0,0 +1,82 @@
import numpy as np
import pytest
from pandas import (
MultiIndex,
Series,
date_range,
)
import pandas._testing as tm
def test_xs_datetimelike_wrapping():
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
arr = date_range("2016-01-01", periods=3)._data._ndarray
ser = Series(arr, dtype=object)
for i in range(len(ser)):
ser.iloc[i] = arr[i]
assert ser.dtype == object
assert isinstance(ser[0], np.datetime64)
result = ser.xs(0)
assert isinstance(result, np.datetime64)
class TestXSWithMultiIndex:
def test_xs_level_series(self, multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
ser = df["A"]
expected = ser[:, "two"]
result = df.xs("two", level=1)["A"]
tm.assert_series_equal(result, expected)
def test_series_getitem_multiindex_xs_by_label(self):
# GH#5684
idx = MultiIndex.from_tuples(
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
)
ser = Series([1, 2, 3, 4], index=idx)
return_value = ser.index.set_names(["L1", "L2"], inplace=True)
assert return_value is None
expected = Series([1, 3], index=["a", "b"])
return_value = expected.index.set_names(["L1"], inplace=True)
assert return_value is None
result = ser.xs("one", level="L2")
tm.assert_series_equal(result, expected)
def test_series_getitem_multiindex_xs(self):
# GH#6258
dt = list(date_range("20130903", periods=3))
idx = MultiIndex.from_product([list("AB"), dt])
ser = Series([1, 3, 4, 1, 3, 4], index=idx)
expected = Series([1, 1], index=list("AB"))
result = ser.xs("20130903", level=1)
tm.assert_series_equal(result, expected)
def test_series_xs_droplevel_false(self):
# GH: 19056
mi = MultiIndex.from_tuples(
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
)
ser = Series([1, 1, 1], index=mi)
result = ser.xs("a", axis=0, drop_level=False)
expected = Series(
[1, 1],
index=MultiIndex.from_tuples(
[("a", "x"), ("a", "y")], names=["level1", "level2"]
),
)
tm.assert_series_equal(result, expected)
def test_xs_key_as_list(self):
# GH#41760
mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"])
ser = Series([1], index=mi)
with pytest.raises(TypeError, match="list keys are not supported"):
ser.xs(["a", "x"], axis=0, drop_level=False)
with pytest.raises(TypeError, match="list keys are not supported"):
ser.xs(["a"], axis=0, drop_level=False)

View File

@ -0,0 +1,7 @@
"""
Test files dedicated to individual (stand-alone) Series methods
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
These may also present opportunities for sharing/de-duplicating test code.
"""

Some files were not shown because too many files have changed in this diff Show More