forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,499 @@
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import re
|
||||
|
||||
from dateutil.tz import (
|
||||
gettz,
|
||||
tzutc,
|
||||
)
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert s[48] == 48
|
||||
assert s["1/2/2009"] == 48
|
||||
assert s["2009-1-2"] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s["2009-1-3"]
|
||||
tm.assert_series_equal(
|
||||
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||||
)
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
msg = "Series.__setitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s[48] = -1
|
||||
assert s.iloc[48] == -1
|
||||
s["1/2/2009"] = -2
|
||||
assert s.iloc[48] == -2
|
||||
s["1/2/2009":"2009-06-05"] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
|
||||
def test_getitem_setitem_datetime_tz(tz_source):
|
||||
if tz_source == "pytz":
|
||||
tzget = pytz.timezone
|
||||
else:
|
||||
# handle special case for utc in dateutil
|
||||
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="h", tz=tzget("US/Eastern"))
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
|
||||
dt = dt.to_pydatetime()
|
||||
result[dt] = 0
|
||||
result[dt] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="h", tz="US/Eastern")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# But we do not give datetimes a pass on tzawareness compat
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
naive = datetime(1990, 1, 1, 4)
|
||||
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
# GH#36148 as of 2.0 we require tzawareness-compat
|
||||
ts[key]
|
||||
|
||||
result = ts.copy()
|
||||
# GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
|
||||
# so setting it as a new key casts to object rather than matching
|
||||
# rng[4]
|
||||
result[naive] = ts.iloc[4]
|
||||
assert result.index.dtype == object
|
||||
tm.assert_index_equal(result.index[:-1], rng.astype(object))
|
||||
assert result.index[-1] == naive
|
||||
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
ts[naive : datetime(1990, 1, 1, 7)]
|
||||
|
||||
result = ts.copy()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
result[naive : datetime(1990, 1, 1, 7)] = 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
result[naive : datetime(1990, 1, 1, 7)] = 99
|
||||
# the __setitems__ here failed, so result should still match ts
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = naive
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = Timestamp(naive).tz_localize(rng.tzinfo)
|
||||
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="h")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
index = date_range("1/1/2000", "1/7/2000")
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp("1/8/2000")
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
# test duplicates in time series
|
||||
|
||||
|
||||
def test_indexing_with_duplicate_datetimeindex(
|
||||
rand_series_with_duplicate_datetimeindex,
|
||||
):
|
||||
ts = rand_series_with_duplicate_datetimeindex
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
tm.assert_almost_equal(result, expected.iloc[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
tm.assert_series_equal(cp, expected)
|
||||
|
||||
key = datetime(2000, 1, 6)
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
ts[key]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_loc_getitem_over_size_cutoff(monkeypatch):
|
||||
# #1821
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = timedelta(seconds=1)
|
||||
half_sec = timedelta(microseconds=500000)
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(dates), 4)),
|
||||
index=dates,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||||
# GH 27136
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
n = 1100
|
||||
idx = period_range("1/1/2000", freq="min", periods=n)
|
||||
assert idx._engine.over_size_threshold
|
||||
|
||||
s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||||
|
||||
pos = n - 1
|
||||
timestamp = idx[pos]
|
||||
assert timestamp in s.index
|
||||
|
||||
# it works!
|
||||
s[timestamp]
|
||||
assert len(s.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||||
ts = Series(np.random.default_rng(2).random(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for key in [
|
||||
slice("2011-01-01", "2011-01-15"),
|
||||
slice("2010-12-30", "2011-01-15"),
|
||||
slice("2011-01-01", "2011-01-16"),
|
||||
# partial ranges
|
||||
slice("2011-01-01", "2011-01-6"),
|
||||
slice("2011-01-06", "2011-01-8"),
|
||||
slice("2011-01-06", "2011-01-12"),
|
||||
]:
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
compare(key)
|
||||
|
||||
# single values
|
||||
result = ts2["2011"].sort_index()
|
||||
expected = ts["2011"]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_unordered2():
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.default_rng(2).permutation(20))
|
||||
|
||||
result = ts["2005"]
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq="ME")
|
||||
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
result = ts["2001"]
|
||||
tm.assert_series_equal(result, ts.iloc[:12])
|
||||
|
||||
df = DataFrame({"A": ts.copy()})
|
||||
|
||||
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
|
||||
# like any other key, so raises
|
||||
with pytest.raises(KeyError, match="2001"):
|
||||
df["2001"]
|
||||
|
||||
# setting
|
||||
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||||
expected = ts.copy()
|
||||
expected.iloc[:12] = 1
|
||||
ts["2001"] = 1
|
||||
tm.assert_series_equal(ts, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected.iloc[:12, 0] = 1
|
||||
df.loc["2001", "A"] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_getitem_str_month_with_datetimeindex():
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="h")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="s")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_year_with_datetimeindex():
|
||||
idx = [
|
||||
Timestamp("2013-05-31 00:00"),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||||
]
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_second_with_datetimeindex():
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((5, 5)),
|
||||
columns=["open", "high", "low", "close", "volume"],
|
||||
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||||
)
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df["2012-01-02 18:01:02"]
|
||||
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
def test_compare_datetime_with_all_none():
|
||||
# GH#54870
|
||||
ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||||
ser2 = Series([None, None])
|
||||
result = ser > ser2
|
||||
expected = Series([False, False])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,70 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDelItem:
|
||||
def test_delitem(self):
|
||||
# GH#5542
|
||||
# should delete the item inplace
|
||||
s = Series(range(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(range(1, 5), index=range(1, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(range(2, 5), index=range(2, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
s[0] = 1
|
||||
tm.assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
|
||||
def test_delitem_object_index(self, using_infer_string):
|
||||
# Index(dtype=object)
|
||||
dtype = "string[pyarrow_numpy]" if using_infer_string else object
|
||||
s = Series(1, index=Index(["a"], dtype=dtype))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
|
||||
s["a"] = 1
|
||||
tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
|
||||
|
||||
def test_delitem_missing_key(self):
|
||||
# empty
|
||||
s = Series(dtype=object)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
def test_delitem_extension_dtype(self):
|
||||
# GH#40386
|
||||
# DatetimeTZDtype
|
||||
dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
ser = Series(dti)
|
||||
|
||||
expected = ser[[0, 2]]
|
||||
del ser[1]
|
||||
assert ser.dtype == dti.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
# PeriodDtype
|
||||
pi = dti.tz_localize(None).to_period("D")
|
||||
ser = Series(pi)
|
||||
|
||||
expected = ser[:2]
|
||||
del ser[2]
|
||||
assert ser.dtype == pi.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
@ -0,0 +1,238 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
),
|
||||
index=Index(
|
||||
[
|
||||
25.0,
|
||||
36.0,
|
||||
49.0,
|
||||
64.0,
|
||||
81.0,
|
||||
100.0,
|
||||
121.0,
|
||||
144.0,
|
||||
169.0,
|
||||
196.0,
|
||||
1225.0,
|
||||
1296.0,
|
||||
1369.0,
|
||||
1444.0,
|
||||
1521.0,
|
||||
1600.0,
|
||||
1681.0,
|
||||
1764.0,
|
||||
1849.0,
|
||||
1936.0,
|
||||
],
|
||||
dtype=np.float64,
|
||||
),
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default="Missing")
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
|
||||
def test_get_nan(float_numpy_dtype):
|
||||
# GH 8569
|
||||
s = Index(range(10), dtype=float_numpy_dtype).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default="Missing") == "Missing"
|
||||
|
||||
|
||||
def test_get_nan_multiple(float_numpy_dtype):
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = Index(range(10), dtype=float_numpy_dtype).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [2, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
|
||||
def test_get_with_default():
|
||||
# GH#7725
|
||||
d0 = ["a", "b", "c", "d"]
|
||||
d1 = np.arange(4, dtype="int64")
|
||||
|
||||
for data, index in ((d0, d1), (d1, d0)):
|
||||
s = Series(data, index=index)
|
||||
for i, d in zip(index, data):
|
||||
assert s.get(i) == d
|
||||
assert s.get(i, d) == d
|
||||
assert s.get(i, "z") == d
|
||||
|
||||
assert s.get("e", "z") == "z"
|
||||
assert s.get("e", "e") == "e"
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
warn = None
|
||||
if index is d0:
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
assert s.get(10, "z") == "z"
|
||||
assert s.get(10, 10) == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[
|
||||
np.random.default_rng(2).standard_normal(10),
|
||||
DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize(
|
||||
tz="US/Eastern"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_with_ea(arr):
|
||||
# GH#21260
|
||||
ser = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert ser.get(4) == ser.iloc[2]
|
||||
|
||||
result = ser.get([4, 6])
|
||||
expected = ser.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get(slice(2))
|
||||
expected = ser.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert ser.get(-1) is None
|
||||
assert ser.get(ser.index.max() + 1) is None
|
||||
|
||||
ser = Series(arr[:6], index=list("abcdef"))
|
||||
assert ser.get("c") == ser.iloc[2]
|
||||
|
||||
result = ser.get(slice("b", "d"))
|
||||
expected = ser.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get("Z")
|
||||
assert result is None
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(4) == ser.iloc[4]
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(-1) == ser.iloc[-1]
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(len(ser)) is None
|
||||
|
||||
# GH#21257
|
||||
ser = Series(arr)
|
||||
ser2 = ser[::2]
|
||||
assert ser2.get(1) is None
|
||||
|
||||
|
||||
def test_getitem_get(string_series, object_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
|
||||
for obj in [string_series, object_series]:
|
||||
idx = obj.index[5]
|
||||
|
||||
assert obj[idx] == obj.get(idx)
|
||||
assert obj[idx] == obj.iloc[5]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert string_series.get(-1) == string_series.get(string_series.index[-1])
|
||||
assert string_series.iloc[5] == string_series.get(string_series.index[5])
|
||||
|
||||
|
||||
def test_get_none():
|
||||
# GH#5652
|
||||
s1 = Series(dtype=object)
|
||||
s2 = Series(dtype=object, index=list("abc"))
|
||||
for s in [s1, s2]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
@ -0,0 +1,735 @@
|
||||
"""
|
||||
Series.__getitem__ test classes are organized by the type of key passed.
|
||||
"""
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
conversion,
|
||||
timezones,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
class TestSeriesGetitemScalars:
|
||||
def test_getitem_object_index_float_string(self):
|
||||
# GH#17286
|
||||
ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0]))
|
||||
assert ser["a"] == 1
|
||||
assert ser[1.0] == 1
|
||||
|
||||
def test_getitem_float_keys_tuple_values(self):
|
||||
# see GH#13509
|
||||
|
||||
# unique Index
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
|
||||
result = ser[0.0]
|
||||
assert result == (1, 1)
|
||||
|
||||
# non-unique Index
|
||||
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
|
||||
|
||||
result = ser[0.0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_unrecognized_scalar(self):
|
||||
# GH#32684 a scalar key that is not recognized by lib.is_scalar
|
||||
|
||||
# a series that might be produced via `frame.dtypes`
|
||||
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
|
||||
|
||||
key = ser.index[1]
|
||||
|
||||
result = ser[key]
|
||||
assert result == 2
|
||||
|
||||
def test_getitem_negative_out_of_bounds(self):
|
||||
ser = Series(["a"] * 10, index=["a"] * 10)
|
||||
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
|
||||
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
ser[-11]
|
||||
|
||||
def test_getitem_out_of_bounds_indexerror(self, datetime_series):
|
||||
# don't segfault, GH#495
|
||||
msg = r"index \d+ is out of bounds for axis 0 with size \d+"
|
||||
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
datetime_series[len(datetime_series)]
|
||||
|
||||
def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
|
||||
# GH#917
|
||||
# With a RangeIndex, an int key gives a KeyError
|
||||
ser = Series([], dtype=object)
|
||||
with pytest.raises(KeyError, match="-1"):
|
||||
ser[-1]
|
||||
|
||||
def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype):
|
||||
dtype = any_int_numpy_dtype
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(6),
|
||||
index=Index([0, 0, 1, 1, 2, 2], dtype=dtype),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
# not monotonic
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(6), index=[2, 2, 0, 0, 1, 1]
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
def test_getitem_int64(self, datetime_series):
|
||||
idx = np.int64(5)
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = datetime_series[idx]
|
||||
assert res == datetime_series.iloc[5]
|
||||
|
||||
def test_getitem_full_range(self):
|
||||
# github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
|
||||
ser = Series(range(5), index=list(range(5)))
|
||||
result = ser[list(range(5))]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with DatetimeIndex
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(
|
||||
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="h", tz=tzstr
|
||||
)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = conversion.localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range("1/1/2000", periods=10, tz=tz)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
result = ser["1/3/2000"]
|
||||
tm.assert_almost_equal(result, ser.iloc[2])
|
||||
|
||||
def test_getitem_time_object(self):
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
mask = (rng.hour == 9) & (rng.minute == 30)
|
||||
result = ts[time(9, 30)]
|
||||
expected = ts[mask]
|
||||
result.index = result.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with CategoricalIndex
|
||||
|
||||
def test_getitem_scalar_categorical_index(self):
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
ser = Series([1, 2], index=cats)
|
||||
|
||||
expected = ser.iloc[0]
|
||||
result = ser[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_getitem_numeric_categorical_listlike_matches_scalar(self):
|
||||
# GH#15470
|
||||
ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0]))
|
||||
|
||||
# 0 is treated as a label
|
||||
assert ser[0] == "c"
|
||||
|
||||
# the listlike analogue should also be treated as labels
|
||||
res = ser[[0]]
|
||||
expected = ser.iloc[-1:]
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
res2 = ser[[0, 1, 2]]
|
||||
tm.assert_series_equal(res2, ser.iloc[::-1])
|
||||
|
||||
def test_getitem_integer_categorical_not_positional(self):
|
||||
# GH#14865
|
||||
ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category"))
|
||||
assert ser.get(3) == "c"
|
||||
assert ser[3] == "c"
|
||||
|
||||
def test_getitem_str_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
key = "6 days, 23:11:12"
|
||||
indexer = rng.get_loc(key)
|
||||
assert indexer == 133
|
||||
|
||||
result = ser[key]
|
||||
assert result == ser.iloc[133]
|
||||
|
||||
msg = r"^Timedelta\('50 days 00:00:00'\)$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
rng.get_loc("50 days")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser["50 days"]
|
||||
|
||||
def test_getitem_bool_index_positional(self):
|
||||
# GH#48653
|
||||
ser = Series({True: 1, False: 0})
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser[0]
|
||||
assert result == 1
|
||||
|
||||
|
||||
class TestSeriesGetitemSlices:
|
||||
def test_getitem_partial_str_slice_with_datetimeindex(self):
|
||||
# GH#34860
|
||||
arr = date_range("1/1/2008", "1/1/2009")
|
||||
ser = arr.to_series()
|
||||
result = ser["2008"]
|
||||
|
||||
rng = date_range(start="2008-01-01", end="2008-12-31")
|
||||
expected = Series(rng, index=rng)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_strings_with_datetimeindex(self):
|
||||
idx = DatetimeIndex(
|
||||
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
|
||||
)
|
||||
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||||
|
||||
result = ts["1/2/2000":]
|
||||
expected = ts[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts["1/2/2000":"1/3/2000"]
|
||||
expected = ts[1:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["5 day":"6 day"]
|
||||
expected = ser.iloc[86:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["5 day":]
|
||||
expected = ser.iloc[86:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[:"6 day"]
|
||||
expected = ser.iloc[:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
|
||||
# higher reso
|
||||
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["1 day 10:11:12":]
|
||||
expected = ser.iloc[0:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 day 10:11:12.001":]
|
||||
expected = ser.iloc[1000:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 days, 10:11:12.001001"]
|
||||
assert result == ser.iloc[1001]
|
||||
|
||||
def test_getitem_slice_2d(self, datetime_series):
|
||||
# GH#30588 multi-dimensional indexing deprecated
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
datetime_series[:, np.newaxis]
|
||||
|
||||
def test_getitem_median_slice_bug(self):
|
||||
index = date_range("20090415", "20090519", freq="2B")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
msg = "Indexing with a single-item list"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#31299
|
||||
ser[indexer]
|
||||
# but we're OK with a single-element tuple
|
||||
result = ser[(indexer[0],)]
|
||||
expected = ser[indexer[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"slc, positions",
|
||||
[
|
||||
[slice(date(2018, 1, 1), None), [0, 1, 2]],
|
||||
[slice(date(2019, 1, 2), None), [2]],
|
||||
[slice(date(2020, 1, 1), None), []],
|
||||
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
|
||||
[slice(None, date(2019, 1, 1)), [0]],
|
||||
],
|
||||
)
|
||||
def test_getitem_slice_date(self, slc, positions):
|
||||
# https://github.com/pandas-dev/pandas/issues/31501
|
||||
ser = Series(
|
||||
[0, 1, 2],
|
||||
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
|
||||
)
|
||||
result = ser[slc]
|
||||
expected = ser.take(positions)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_float_raises(self, datetime_series):
|
||||
msg = (
|
||||
"cannot do slice indexing on DatetimeIndex with these indexers "
|
||||
r"\[{key}\] of type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
datetime_series[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
datetime_series[4.5:10.0]
|
||||
|
||||
def test_getitem_slice_bug(self):
|
||||
ser = Series(range(10), index=list(range(10)))
|
||||
result = ser[-12:]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
result = ser[-7:]
|
||||
tm.assert_series_equal(result, ser[3:])
|
||||
|
||||
result = ser[:-12]
|
||||
tm.assert_series_equal(result, ser[:0])
|
||||
|
||||
def test_getitem_slice_integers(self):
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(8),
|
||||
index=[2, 4, 6, 8, 10, 12, 14, 16],
|
||||
)
|
||||
|
||||
result = ser[:4]
|
||||
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesGetitemListLike:
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index, Series])
|
||||
def test_getitem_no_matches(self, box):
|
||||
# GH#33462 we expect the same behavior for list/ndarray/Index/Series
|
||||
ser = Series(["A", "B"])
|
||||
|
||||
key = Series(["C"], dtype=object)
|
||||
key = box(key)
|
||||
|
||||
msg = (
|
||||
r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_intlist_intindex_periodvalues(self):
|
||||
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
|
||||
result = ser[[2, 4]]
|
||||
exp = Series(
|
||||
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
|
||||
index=[2, 4],
|
||||
dtype="Period[D]",
|
||||
)
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == "Period[D]"
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
def test_getitem_intlist_intervalindex_non_int(self, box):
|
||||
# GH#33404 fall back to positional since ints are unambiguous
|
||||
dti = date_range("2000-01-03", periods=3)._with_freq(None)
|
||||
ii = pd.IntervalIndex.from_breaks(dti)
|
||||
ser = Series(range(len(ii)), index=ii)
|
||||
|
||||
expected = ser.iloc[:1]
|
||||
key = box([0])
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser[key]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
|
||||
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
|
||||
# GH#33404 do _not_ fall back to positional since ints are ambiguous
|
||||
idx = Index(range(4)).astype(dtype)
|
||||
dti = date_range("2000-01-03", periods=3)
|
||||
mi = pd.MultiIndex.from_product([idx, dti])
|
||||
ser = Series(range(len(mi))[::-1], index=mi)
|
||||
|
||||
key = box([5])
|
||||
with pytest.raises(KeyError, match="5"):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype):
|
||||
# GH #37218
|
||||
ser = Series([1, 2, 3])
|
||||
key = np.array([4], dtype=any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser[key]
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser.loc[key]
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_boolean(self, string_series):
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
|
||||
# passing list is OK
|
||||
result = ser[list(mask)]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, ser.index[mask])
|
||||
|
||||
def test_getitem_boolean_empty(self):
|
||||
ser = Series([], dtype=np.int64)
|
||||
ser.index.name = "index_name"
|
||||
ser = ser[ser.isna()]
|
||||
assert ser.index.name == "index_name"
|
||||
assert ser.dtype == np.int64
|
||||
|
||||
# GH#5877
|
||||
# indexing with empty series
|
||||
ser = Series(["A", "B"], dtype=object)
|
||||
expected = Series(dtype=object, index=Index([], dtype="int64"))
|
||||
result = ser[Series([], dtype=object)]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([True], dtype=bool)]
|
||||
|
||||
def test_getitem_boolean_object(self, string_series):
|
||||
# using column from DataFrame
|
||||
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = ser[omask]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = ser.copy()
|
||||
cop = ser.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
tm.assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "Cannot mask with non-boolean array containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask] = 5
|
||||
|
||||
def test_getitem_boolean_dt64_copies(self):
|
||||
# GH#36210
|
||||
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
|
||||
key = np.array([True, True, False, False])
|
||||
|
||||
ser = Series(dti._data)
|
||||
|
||||
res = ser[key]
|
||||
assert res._values._ndarray.base is None
|
||||
|
||||
# compare with numeric case for reference
|
||||
ser2 = Series(range(4))
|
||||
res2 = ser2[key]
|
||||
assert res2._values.base is None
|
||||
|
||||
def test_getitem_boolean_corner(self, datetime_series):
|
||||
ts = datetime_series
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
|
||||
def test_getitem_boolean_different_order(self, string_series):
|
||||
ordered = string_series.sort_values()
|
||||
|
||||
sel = string_series[ordered > 0]
|
||||
exp = string_series[string_series > 0]
|
||||
tm.assert_series_equal(sel, exp)
|
||||
|
||||
def test_getitem_boolean_contiguous_preserve_freq(self):
|
||||
rng = date_range("1/1/2000", "3/1/2000", freq="B")
|
||||
|
||||
mask = np.zeros(len(rng), dtype=bool)
|
||||
mask[10:20] = True
|
||||
|
||||
masked = rng[mask]
|
||||
expected = rng[10:20]
|
||||
assert expected.freq == rng.freq
|
||||
tm.assert_index_equal(masked, expected)
|
||||
|
||||
mask[22] = True
|
||||
masked = rng[mask]
|
||||
assert masked.freq is None
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self):
|
||||
# GH#12533
|
||||
ser = Series(4, index=list("ABCD"))
|
||||
result = ser[lambda x: "A"]
|
||||
assert result == ser.loc["A"]
|
||||
|
||||
result = ser[lambda x: ["A", "B"]]
|
||||
expected = ser.loc[["A", "B"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[lambda x: [True, False, True, True]]
|
||||
expected = ser.iloc[[0, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_generator(string_series):
|
||||
gen = (x > 0 for x in string_series)
|
||||
result = string_series[gen]
|
||||
result2 = string_series[iter(string_series > 0)]
|
||||
expected = string_series[string_series > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([0, 1]),
|
||||
Series(date_range("2012-01-01", periods=2)),
|
||||
Series(date_range("2012-01-01", periods=2, tz="CET")),
|
||||
],
|
||||
)
|
||||
def test_getitem_ndim_deprecated(series):
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
series[:, None]
|
||||
|
||||
|
||||
def test_getitem_multilevel_scalar_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
|
||||
msg = r"\(2000, slice\(3, 4, None\)\)"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[2000, 3:4]
|
||||
|
||||
|
||||
def test_getitem_dataframe_raises():
|
||||
rng = list(range(10))
|
||||
ser = Series(10, index=rng)
|
||||
df = DataFrame(rng, index=rng)
|
||||
msg = (
|
||||
"Indexing a Series with DataFrame is not supported, "
|
||||
"use the appropriate DataFrame column"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[df > 5]
|
||||
|
||||
|
||||
def test_getitem_assignment_series_alignment():
|
||||
# https://github.com/pandas-dev/pandas/issues/37427
|
||||
# with getitem, when assigning with a Series, it is not first aligned
|
||||
ser = Series(range(10))
|
||||
idx = np.array([2, 4, 9])
|
||||
ser[idx] = Series([10, 11, 12])
|
||||
expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
|
||||
# GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
|
||||
ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser[None]
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index.get_loc(None)
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index._engine.get_loc(None)
|
||||
|
||||
|
||||
def test_getitem_1tuple_slice_without_multiindex():
|
||||
ser = Series(range(5))
|
||||
key = (slice(3),)
|
||||
|
||||
result = ser[key]
|
||||
expected = ser[key[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_preserve_name(datetime_series):
|
||||
result = datetime_series[datetime_series > 0]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = datetime_series[[0, 2, 4]]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
result = datetime_series[5:10]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
|
||||
def test_getitem_with_integer_labels():
|
||||
# integer indexes, be careful
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
|
||||
)
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[inds]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[arr_inds]
|
||||
|
||||
|
||||
def test_getitem_missing(datetime_series):
|
||||
# missing
|
||||
d = datetime_series.index[0] - BDay()
|
||||
msg = r"Timestamp\('1999-12-31 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[d]
|
||||
|
||||
|
||||
def test_getitem_fancy(string_series, object_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
slice1 = string_series[[1, 2, 3]]
|
||||
slice2 = object_series[[1, 2, 3]]
|
||||
assert string_series.index[2] == slice1.index[1]
|
||||
assert object_series.index[2] == slice2.index[1]
|
||||
assert string_series.iloc[2] == slice1.iloc[1]
|
||||
assert object_series.iloc[2] == slice2.iloc[1]
|
||||
|
||||
|
||||
def test_getitem_box_float64(datetime_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
value = datetime_series[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
||||
assert is_scalar(obj["c"])
|
||||
assert obj["c"] == 0
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
||||
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
||||
result = ser["C"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_categorical_str():
|
||||
# GH#31765
|
||||
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
|
||||
result = ser["a"]
|
||||
expected = ser.iloc[[0, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
ser = Series(1, index=["a", "a", "b", "b", "c"])
|
||||
ser[::-1] # it works!
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
|
||||
def test_duplicated_index_getitem_positional_indexer(index_vals):
|
||||
# GH 11747
|
||||
s = Series(range(5), index=list(index_vals))
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s[3]
|
||||
assert result == 3
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_disallowed(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser[key] = 1
|
@ -0,0 +1,518 @@
|
||||
""" test get/set & misc """
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import IndexingError
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Index,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
isna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"]
|
||||
)
|
||||
|
||||
warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated"
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
|
||||
# GH51053
|
||||
dtype = any_numeric_dtype
|
||||
idx = Index([1, 0, 1], dtype=dtype)
|
||||
ser = Series(range(3), index=idx)
|
||||
result = ser[1]
|
||||
expected = Series([0, 2], index=Index([1, 1], dtype=dtype))
|
||||
tm.assert_series_equal(result, expected, check_exact=True)
|
||||
|
||||
|
||||
def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
|
||||
# GH51053
|
||||
dtype = any_numeric_dtype
|
||||
idx = Index([1, 0, 1], dtype=dtype)
|
||||
ser = Series(range(3), index=idx)
|
||||
ser[1] = 10
|
||||
expected = Series([10, 1, 10], index=idx)
|
||||
tm.assert_series_equal(ser, expected, check_exact=True)
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(datetime_series):
|
||||
indices = datetime_series.index[[5, 10, 15]]
|
||||
|
||||
result = datetime_series[indices]
|
||||
expected = datetime_series.reindex(indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = datetime_series[indices[0] : indices[2]]
|
||||
expected = datetime_series.loc[indices[0] : indices[2]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_basic_getitem_dt64tz_values():
|
||||
# GH12089
|
||||
# with tz for values
|
||||
ser = Series(
|
||||
date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
||||
result = ser.loc["a"]
|
||||
assert result == expected
|
||||
result = ser.iloc[0]
|
||||
assert result == expected
|
||||
result = ser["a"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis(using_copy_on_write, warn_copy_on_write):
|
||||
s = Series(np.random.default_rng(2).standard_normal(10))
|
||||
|
||||
result = s[...]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[...] = 5
|
||||
if not using_copy_on_write:
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result_1, duplicate_item, expected_1",
|
||||
[
|
||||
[
|
||||
Series({1: 12, 2: [1, 2, 2, 3]}),
|
||||
Series({1: 313}),
|
||||
Series({1: 12}, dtype=object),
|
||||
],
|
||||
[
|
||||
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1._append(duplicate_item)
|
||||
expected = expected_1._append(duplicate_item)
|
||||
tm.assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ["a", "b", "c"])
|
||||
|
||||
assert s.iloc[0] == s["a"]
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s["a"], 5)
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[1], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[3], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
ser = Series(rng, index=rng)
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[rng[1]], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[rng[3]], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
|
||||
def test_series_box_timedelta():
|
||||
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timedelta)
|
||||
assert isinstance(ser.at[1], Timedelta)
|
||||
assert isinstance(ser.iat[2], Timedelta)
|
||||
assert isinstance(ser.loc[3], Timedelta)
|
||||
assert isinstance(ser.iloc[4], Timedelta)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror(indexer_sl):
|
||||
ser = Series(range(10), index=list(range(0, 20, 2)))
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
indexer_sl(ser)[1]
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing(indexer_sl):
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
||||
with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
|
||||
indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror(indexer_sl):
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[1] = 5
|
||||
expected = concat([s, Series([5], index=[1])])
|
||||
tm.assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_setitem(datetime_series):
|
||||
datetime_series[datetime_series.index[5]] = np.nan
|
||||
datetime_series.iloc[[1, 2, 17]] = np.nan
|
||||
datetime_series.iloc[6] = np.nan
|
||||
assert np.isnan(datetime_series.iloc[6])
|
||||
assert np.isnan(datetime_series.iloc[2])
|
||||
datetime_series[np.isnan(datetime_series)] = 5
|
||||
assert not np.isnan(datetime_series.iloc[2])
|
||||
|
||||
|
||||
def test_setslice(datetime_series):
|
||||
sl = datetime_series[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
def test_basic_getitem_setitem_corner(datetime_series):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "key of type tuple not found and not a MultiIndex"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] raises but not two slices
|
||||
msg = "Indexing with a single-item list"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#31299
|
||||
datetime_series[[slice(None, 5)]]
|
||||
|
||||
# but we're OK with a single-element tuple
|
||||
result = datetime_series[(slice(None, 5),)]
|
||||
expected = datetime_series[:5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, [None, None]]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, [None, None]]] = 2
|
||||
|
||||
|
||||
def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_write):
|
||||
original = string_series.copy()
|
||||
numSlice = string_series[10:20]
|
||||
numSliceEnd = string_series[-10:]
|
||||
objSlice = object_series[10:20]
|
||||
|
||||
assert string_series.index[9] not in numSlice.index
|
||||
assert object_series.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == string_series.index[11]
|
||||
tm.assert_numpy_array_equal(np.array(numSliceEnd), np.array(string_series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = string_series[10:20]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
sl[:] = 0
|
||||
|
||||
if using_copy_on_write:
|
||||
# Doesn't modify parent (CoW)
|
||||
tm.assert_series_equal(string_series, original)
|
||||
else:
|
||||
assert (string_series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([], dtype=object)
|
||||
s.loc["B"] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
||||
|
||||
s = s.reindex(s.index.insert(0, "A"))
|
||||
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
||||
|
||||
s.loc["A"] = timedelta(1)
|
||||
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion(using_copy_on_write):
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
||||
return_value = df.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df["val"] = 0
|
||||
df_original = df.copy()
|
||||
df
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["val"].update(s)
|
||||
expected = df_original
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["val"].update(s)
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
|
||||
)
|
||||
return_value = expected.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(datetime_series):
|
||||
seq = datetime_series.iloc[[5, 10, 15]]
|
||||
seq.iloc[1] = np.nan
|
||||
assert not np.isnan(datetime_series.iloc[10])
|
||||
|
||||
|
||||
def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
|
||||
index = lexsorted_two_level_string_multiindex
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(len(index)), index=index, name="sth"
|
||||
)
|
||||
|
||||
result = indexer_sl(ser)["foo"]
|
||||
assert result.name == ser.name
|
||||
|
||||
|
||||
# miscellaneous methods
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
date_range("2014-01-01", periods=20, freq="MS"),
|
||||
period_range("2014-01", periods=20, freq="M"),
|
||||
timedelta_range("0", periods=20, freq="h"),
|
||||
],
|
||||
)
|
||||
def test_slice_with_negative_step(index):
|
||||
keystr1 = str(index[9])
|
||||
keystr2 = str(index[13])
|
||||
|
||||
ser = Series(np.arange(20), index)
|
||||
SLC = IndexSlice
|
||||
|
||||
for key in [keystr1, index[9]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
|
||||
|
||||
for key2 in [keystr2, index[13]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
|
||||
|
||||
|
||||
def test_tuple_index():
|
||||
# GH 35534 - Selecting values when a Series has an Index of tuples
|
||||
s = Series([1, 2], index=[("a",), ("b",)])
|
||||
assert s[("a",)] == 1
|
||||
assert s[("b",)] == 2
|
||||
s[("b",)] = 3
|
||||
assert s[("b",)] == 3
|
||||
|
||||
|
||||
def test_frozenset_index():
|
||||
# GH35747 - Selecting values when a Series has an Index of frozenset
|
||||
idx0, idx1 = frozenset("a"), frozenset("b")
|
||||
s = Series([1, 2], index=[idx0, idx1])
|
||||
assert s[idx0] == 1
|
||||
assert s[idx1] == 2
|
||||
s[idx1] = 3
|
||||
assert s[idx1] == 3
|
||||
|
||||
|
||||
def test_loc_setitem_all_false_indexer():
|
||||
# GH#45778
|
||||
ser = Series([1, 2], index=["a", "b"])
|
||||
expected = ser.copy()
|
||||
rhs = Series([6, 7], index=["a", "b"])
|
||||
ser.loc[ser > 100] = rhs
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_loc_boolean_indexer_non_matching_index():
|
||||
# GH#46551
|
||||
ser = Series([1])
|
||||
result = ser.loc[Series([NA, False], dtype="boolean")]
|
||||
expected = Series([], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_boolean_indexer_miss_matching_index():
|
||||
# GH#46551
|
||||
ser = Series([1])
|
||||
indexer = Series([NA, False], dtype="boolean", index=[1, 2])
|
||||
with pytest.raises(IndexingError, match="Unalignable"):
|
||||
ser.loc[indexer]
|
||||
|
||||
|
||||
def test_loc_setitem_nested_data_enlargement():
|
||||
# GH#48614
|
||||
df = DataFrame({"a": [1]})
|
||||
ser = Series({"label": df})
|
||||
ser.loc["new_label"] = df
|
||||
expected = Series({"label": df, "new_label": df})
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_loc_ea_numeric_index_oob_slice_end():
|
||||
# GH#50161
|
||||
ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
|
||||
result = ser.loc[2:3]
|
||||
expected = Series(1, index=Index([2], dtype="Int64"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_bool_int_key():
|
||||
# GH#48653
|
||||
ser = Series({True: 1, False: 0})
|
||||
with pytest.raises(KeyError, match="0"):
|
||||
ser.loc[0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [{}, {"b": "x"}])
|
||||
@pytest.mark.parametrize("indexer", [[], [False, False], slice(0, -1), np.array([])])
|
||||
def test_setitem_empty_indexer(indexer, val):
|
||||
# GH#45981
|
||||
df = DataFrame({"a": [1, 2], **val})
|
||||
expected = df.copy()
|
||||
df.loc[indexer] = 1.5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_disallowed(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key] = 1
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_setitem_dict_and_set_disallowed_multiindex(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key] = 1
|
||||
|
||||
|
||||
class TestSetitemValidation:
|
||||
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
|
||||
# but checks for warnings instead of errors.
|
||||
def _check_setitem_invalid(self, ser, invalid, indexer, warn):
|
||||
msg = "Setting an item of incompatible dtype is deprecated"
|
||||
msg = re.escape(msg)
|
||||
|
||||
orig_ser = ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.iloc[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.loc[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser[:] = invalid
|
||||
|
||||
_invalid_scalars = [
|
||||
1 + 2j,
|
||||
"True",
|
||||
"1",
|
||||
"1.0",
|
||||
NaT,
|
||||
np.datetime64("NaT"),
|
||||
np.timedelta64("NaT"),
|
||||
]
|
||||
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
|
||||
)
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_bool(self, invalid, indexer):
|
||||
ser = Series([True, False, False], dtype="bool")
|
||||
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
|
||||
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
|
||||
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
|
||||
warn = None
|
||||
else:
|
||||
warn = FutureWarning
|
||||
self._check_setitem_invalid(ser, invalid, indexer, warn)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
|
||||
ser = Series([1, 2, None], dtype=float_numpy_dtype)
|
||||
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
|
@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
|
||||
def test_mask_casts():
|
||||
# dtype changes
|
||||
ser = Series([1, 2, 3, 4])
|
||||
result = ser.mask(ser > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_casts2():
|
||||
# see gh-21891
|
||||
ser = Series([1, 2])
|
||||
res = ser.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[~cond])
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.mask(cond, -s))
|
@ -0,0 +1,45 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# GH#1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
s = Series(dtype=object)
|
||||
s._set_value(dates[0], 1.0)
|
||||
s._set_value(dates[1], np.nan)
|
||||
|
||||
expected = Series([1.0, np.nan], index=index)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_set_value_dt64(datetime_series):
|
||||
idx = datetime_series.index[10]
|
||||
res = datetime_series._set_value(idx, 0)
|
||||
assert res is None
|
||||
assert datetime_series[idx] == 0
|
||||
|
||||
|
||||
def test_set_value_str_index(string_series):
|
||||
# equiv
|
||||
ser = string_series.copy()
|
||||
res = ser._set_value("foobar", 0)
|
||||
assert res is None
|
||||
assert ser.index[-1] == "foobar"
|
||||
assert ser["foobar"] == 0
|
||||
|
||||
ser2 = string_series.copy()
|
||||
ser2.loc["foobar"] = 0
|
||||
assert ser2.index[-1] == "foobar"
|
||||
assert ser2["foobar"] == 0
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take_validate_axis():
|
||||
# GH#51022
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
msg = "No axis named foo for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.take([1, 2], axis="foo")
|
||||
|
||||
|
||||
def test_take():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = ser.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = ser.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "indices are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser.take([2, 5])
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
ser = Series(pd.Categorical(["a", "b", "c"]))
|
||||
result = ser.take([-2, -2, 0])
|
||||
expected = Series(
|
||||
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_take_slice_raises():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
msg = "Series.take requires a sequence of integers, not slice"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.take(slice(0, 3, 1))
|
@ -0,0 +1,481 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_where_unsafe_int(any_signed_int_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(
|
||||
list(range(2, 7)) + list(range(5, 10)),
|
||||
dtype=any_signed_int_numpy_dtype,
|
||||
)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=float_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
data = list(range(2, 7)) + list(range(5, 10))
|
||||
expected = Series(data, dtype=float_numpy_dtype)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected_dtype",
|
||||
[
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64),
|
||||
],
|
||||
)
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
||||
warn = (
|
||||
None
|
||||
if np.dtype(dtype).kind == np.dtype(expected_dtype).kind == "f"
|
||||
else FutureWarning
|
||||
)
|
||||
with tm.assert_produces_warning(warn, match="incompatible dtype"):
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(list(range(6)) + values, dtype="float64")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s < 5
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
tm.assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert s.shape == rs.shape
|
||||
assert rs is not s
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = (
|
||||
lambda x: f"cannot set using a {x} indexer with a "
|
||||
"different length than the value"
|
||||
)
|
||||
# slice
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
tm.assert_series_equal(s.astype(np.int64), expected)
|
||||
|
||||
# slice with step
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list("abcdef"))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, "b", 1, "d", "e", "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(["a", "b", "c", 0, 1, "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list("abc"))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), "b", "c"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", range(2, 6))
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False, False, False], [True, False], [False]]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
|
||||
)
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize(
|
||||
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
||||
)
|
||||
def test_broadcast(size, mask, item, box):
|
||||
# GH#8801, GH#4195
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series(
|
||||
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
||||
)
|
||||
|
||||
s = Series(data)
|
||||
|
||||
s[selection] = item
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[cond])
|
||||
tm.assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = Series([1, 2, 3])
|
||||
w = s.where(s > 1, "X")
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, ["X", "Y", "Z"])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"])
|
||||
def test_where_datetimelike_coerce(dtype):
|
||||
ser = Series([1, 2], dtype=dtype)
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, [10, 10])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, 10)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, 10.0)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, [10.0, 10.0])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, np.nan], dtype="object")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetimetz():
|
||||
# GH 15701
|
||||
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
||||
ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]")
|
||||
rs = ser.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_sparse():
|
||||
# GH#17198 make sure we dont get an AttributeError for sp_index
|
||||
ser = Series(pd.arrays.SparseArray([1, 2]))
|
||||
result = ser.where(ser >= 2, 0)
|
||||
expected = Series(pd.arrays.SparseArray([0, 2]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
|
||||
# https://github.com/pandas-dev/pandas/issues/34592
|
||||
ser = Series([], dtype=float)
|
||||
result = ser.where([])
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_where_categorical(frame_or_series):
|
||||
# https://github.com/pandas-dev/pandas/issues/18888
|
||||
exp = frame_or_series(
|
||||
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
|
||||
dtype="category",
|
||||
)
|
||||
df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category")
|
||||
res = df.where(df != "C")
|
||||
tm.assert_equal(exp, res)
|
||||
|
||||
|
||||
def test_where_datetimelike_categorical(tz_naive_fixture):
|
||||
# GH#37682
|
||||
tz = tz_naive_fixture
|
||||
|
||||
dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
|
||||
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
|
||||
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
|
||||
|
||||
mask = np.array([True, True, False])
|
||||
|
||||
# DatetimeIndex.where
|
||||
res = lvals.where(mask, rvals)
|
||||
tm.assert_index_equal(res, dr)
|
||||
|
||||
# DatetimeArray.where
|
||||
res = lvals._data._where(mask, rvals)
|
||||
tm.assert_datetime_array_equal(res, dr._data)
|
||||
|
||||
# Series.where
|
||||
res = Series(lvals).where(mask, rvals)
|
||||
tm.assert_series_equal(res, Series(dr))
|
||||
|
||||
# DataFrame.where
|
||||
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
|
||||
|
||||
tm.assert_frame_equal(res, pd.DataFrame(dr))
|
@ -0,0 +1,82 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_xs_datetimelike_wrapping():
|
||||
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
|
||||
arr = date_range("2016-01-01", periods=3)._data._ndarray
|
||||
|
||||
ser = Series(arr, dtype=object)
|
||||
for i in range(len(ser)):
|
||||
ser.iloc[i] = arr[i]
|
||||
assert ser.dtype == object
|
||||
assert isinstance(ser[0], np.datetime64)
|
||||
|
||||
result = ser.xs(0)
|
||||
assert isinstance(result, np.datetime64)
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_level_series(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
ser = df["A"]
|
||||
expected = ser[:, "two"]
|
||||
result = df.xs("two", level=1)["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label(self):
|
||||
# GH#5684
|
||||
idx = MultiIndex.from_tuples(
|
||||
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
|
||||
)
|
||||
ser = Series([1, 2, 3, 4], index=idx)
|
||||
return_value = ser.index.set_names(["L1", "L2"], inplace=True)
|
||||
assert return_value is None
|
||||
expected = Series([1, 3], index=["a", "b"])
|
||||
return_value = expected.index.set_names(["L1"], inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
result = ser.xs("one", level="L2")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs(self):
|
||||
# GH#6258
|
||||
dt = list(date_range("20130903", periods=3))
|
||||
idx = MultiIndex.from_product([list("AB"), dt])
|
||||
ser = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list("AB"))
|
||||
|
||||
result = ser.xs("20130903", level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_xs_droplevel_false(self):
|
||||
# GH: 19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
ser = Series([1, 1, 1], index=mi)
|
||||
result = ser.xs("a", axis=0, drop_level=False)
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_key_as_list(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"])
|
||||
ser = Series([1], index=mi)
|
||||
with pytest.raises(TypeError, match="list keys are not supported"):
|
||||
ser.xs(["a", "x"], axis=0, drop_level=False)
|
||||
|
||||
with pytest.raises(TypeError, match="list keys are not supported"):
|
||||
ser.xs(["a"], axis=0, drop_level=False)
|
Reference in New Issue
Block a user