forked from Alsan/Post_finder
venv
This commit is contained in:
@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) Series methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,41 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_add_prefix_suffix(string_series):
|
||||
with_prefix = string_series.add_prefix("foo#")
|
||||
expected = Index([f"foo#{c}" for c in string_series.index])
|
||||
tm.assert_index_equal(with_prefix.index, expected)
|
||||
|
||||
with_suffix = string_series.add_suffix("#foo")
|
||||
expected = Index([f"{c}#foo" for c in string_series.index])
|
||||
tm.assert_index_equal(with_suffix.index, expected)
|
||||
|
||||
with_pct_prefix = string_series.add_prefix("%")
|
||||
expected = Index([f"%{c}" for c in string_series.index])
|
||||
tm.assert_index_equal(with_pct_prefix.index, expected)
|
||||
|
||||
with_pct_suffix = string_series.add_suffix("%")
|
||||
expected = Index([f"{c}%" for c in string_series.index])
|
||||
tm.assert_index_equal(with_pct_suffix.index, expected)
|
||||
|
||||
|
||||
def test_add_prefix_suffix_axis(string_series):
|
||||
# GH 47819
|
||||
with_prefix = string_series.add_prefix("foo#", axis=0)
|
||||
expected = Index([f"foo#{c}" for c in string_series.index])
|
||||
tm.assert_index_equal(with_prefix.index, expected)
|
||||
|
||||
with_pct_suffix = string_series.add_suffix("#foo", axis=0)
|
||||
expected = Index([f"{c}#foo" for c in string_series.index])
|
||||
tm.assert_index_equal(with_pct_suffix.index, expected)
|
||||
|
||||
|
||||
def test_add_prefix_suffix_invalid_axis(string_series):
|
||||
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
|
||||
string_series.add_prefix("foo#", axis=1)
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
|
||||
string_series.add_suffix("foo#", axis=1)
|
@ -0,0 +1,249 @@
|
||||
from datetime import timezone
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill", [None, -1])
|
||||
def test_align(datetime_series, first_slice, second_slice, join_type, fill):
|
||||
a = datetime_series[slice(*first_slice)]
|
||||
b = datetime_series[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
tm.assert_series_equal(aa, ea)
|
||||
tm.assert_series_equal(ab, eb)
|
||||
assert aa.name == "ts"
|
||||
assert ea.name == "ts"
|
||||
assert ab.name == "ts"
|
||||
assert eb.name == "ts"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["pad", "bfill"])
|
||||
@pytest.mark.parametrize("limit", [None, 1])
|
||||
def test_align_fill_method(
|
||||
datetime_series, first_slice, second_slice, join_type, method, limit
|
||||
):
|
||||
a = datetime_series[slice(*first_slice)]
|
||||
b = datetime_series[slice(*second_slice)]
|
||||
|
||||
msg = (
|
||||
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
|
||||
"are deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
msg2 = "Series.fillna with 'method' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
tm.assert_series_equal(aa, ea)
|
||||
tm.assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(datetime_series, using_copy_on_write):
|
||||
b = datetime_series[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = datetime_series.copy()
|
||||
ra, _ = a.align(b, join="left")
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = datetime_series.copy()
|
||||
ra, _ = a.align(b, join="left", copy=False)
|
||||
ra[:5] = 5
|
||||
if using_copy_on_write:
|
||||
assert not (a[:5] == 5).any()
|
||||
else:
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = datetime_series.copy()
|
||||
b = datetime_series[:5].copy()
|
||||
_, rb = a.align(b, join="right")
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = datetime_series.copy()
|
||||
b = datetime_series[:5].copy()
|
||||
_, rb = a.align(b, join="right", copy=False)
|
||||
rb[:2] = 5
|
||||
if using_copy_on_write:
|
||||
assert not (b[:2] == 5).any()
|
||||
else:
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(datetime_series, using_copy_on_write):
|
||||
a, b = datetime_series.align(datetime_series, copy=False)
|
||||
if not using_copy_on_write:
|
||||
assert a.index is datetime_series.index
|
||||
assert b.index is datetime_series.index
|
||||
else:
|
||||
assert a.index.is_(datetime_series.index)
|
||||
assert b.index.is_(datetime_series.index)
|
||||
|
||||
a, b = datetime_series.align(datetime_series, copy=True)
|
||||
assert a.index is not datetime_series.index
|
||||
assert b.index is not datetime_series.index
|
||||
assert a.index.is_(datetime_series.index)
|
||||
assert b.index.is_(datetime_series.index)
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product(
|
||||
[range(2), range(3), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
idx = pd.Index(range(2), name="b")
|
||||
s1 = Series(np.arange(12, dtype="int64"), index=midx)
|
||||
s2 = Series(np.arange(2, dtype="int64"), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join="left")
|
||||
res2l, res2r = s2.align(s1, join="right")
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join="right")
|
||||
res2l, res2r = s2.align(s1, join="left")
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product(
|
||||
[range(2), range(2), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
|
||||
def test_align_with_dataframe_method(method):
|
||||
# GH31788
|
||||
ser = Series(range(3), index=range(3))
|
||||
df = pd.DataFrame(0.0, index=range(3), columns=range(3))
|
||||
|
||||
msg = (
|
||||
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
|
||||
"are deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result_ser, result_df = ser.align(df, method=method)
|
||||
tm.assert_series_equal(result_ser, ser)
|
||||
tm.assert_frame_equal(result_df, df)
|
||||
|
||||
|
||||
def test_align_dt64tzindex_mismatched_tzs():
|
||||
idx1 = date_range("2001", periods=5, freq="h", tz="US/Eastern")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1)
|
||||
ser_central = ser.tz_convert("US/Central")
|
||||
# different timezones convert to UTC
|
||||
|
||||
new1, new2 = ser.align(ser_central)
|
||||
assert new1.index.tz is timezone.utc
|
||||
assert new2.index.tz is timezone.utc
|
||||
|
||||
|
||||
def test_align_periodindex(join_type):
|
||||
rng = period_range("1/1/2000", "1/1/2010", freq="Y")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
# TODO: assert something?
|
||||
ts.align(ts[::2], join=join_type)
|
||||
|
||||
|
||||
def test_align_left_fewer_levels():
|
||||
# GH#45224
|
||||
left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
|
||||
right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
|
||||
)
|
||||
result_left, result_right = left.align(right)
|
||||
|
||||
expected_right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
|
||||
)
|
||||
expected_left = Series(
|
||||
[2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
|
||||
)
|
||||
tm.assert_series_equal(result_left, expected_left)
|
||||
tm.assert_series_equal(result_right, expected_right)
|
||||
|
||||
|
||||
def test_align_left_different_named_levels():
|
||||
# GH#45224
|
||||
left = Series(
|
||||
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
|
||||
)
|
||||
right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
|
||||
)
|
||||
result_left, result_right = left.align(right)
|
||||
|
||||
expected_left = Series(
|
||||
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
|
||||
)
|
||||
expected_right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
|
||||
)
|
||||
tm.assert_series_equal(result_left, expected_left)
|
||||
tm.assert_series_equal(result_right, expected_right)
|
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesArgsort:
|
||||
def test_argsort_axis(self):
|
||||
# GH#54257
|
||||
ser = Series(range(3))
|
||||
|
||||
msg = "No axis named 2 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.argsort(axis=2)
|
||||
|
||||
def test_argsort_numpy(self, datetime_series):
|
||||
ser = datetime_series
|
||||
|
||||
res = np.argsort(ser).values
|
||||
expected = np.argsort(np.array(ser))
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
# with missing values
|
||||
ts = ser.copy()
|
||||
ts[::2] = np.nan
|
||||
|
||||
msg = "The behavior of Series.argsort in the presence of NA values"
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match=msg, check_stacklevel=False
|
||||
):
|
||||
result = np.argsort(ts)[1::2]
|
||||
expected = np.argsort(np.array(ts.dropna()))
|
||||
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
|
||||
def test_argsort(self, datetime_series):
|
||||
argsorted = datetime_series.argsort()
|
||||
assert issubclass(argsorted.dtype.type, np.integer)
|
||||
|
||||
def test_argsort_dt64(self, unit):
|
||||
# GH#2967 (introduced bug in 0.11-dev I think)
|
||||
ser = Series(
|
||||
[Timestamp(f"201301{i:02d}") for i in range(1, 6)], dtype=f"M8[{unit}]"
|
||||
)
|
||||
assert ser.dtype == f"datetime64[{unit}]"
|
||||
shifted = ser.shift(-1)
|
||||
assert shifted.dtype == f"datetime64[{unit}]"
|
||||
assert isna(shifted[4])
|
||||
|
||||
result = ser.argsort()
|
||||
expected = Series(range(5), dtype=np.intp)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "The behavior of Series.argsort in the presence of NA values"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = shifted.argsort()
|
||||
expected = Series(list(range(4)) + [-1], dtype=np.intp)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_stable(self):
|
||||
ser = Series(np.random.default_rng(2).integers(0, 100, size=10000))
|
||||
mindexer = ser.argsort(kind="mergesort")
|
||||
qindexer = ser.argsort()
|
||||
|
||||
mexpected = np.argsort(ser.values, kind="mergesort")
|
||||
qexpected = np.argsort(ser.values, kind="quicksort")
|
||||
|
||||
tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected))
|
||||
tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected))
|
||||
msg = (
|
||||
r"ndarray Expected type <class 'numpy\.ndarray'>, "
|
||||
r"found <class 'pandas\.core\.series\.Series'> instead"
|
||||
)
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
tm.assert_numpy_array_equal(qindexer, mindexer)
|
||||
|
||||
def test_argsort_preserve_name(self, datetime_series):
|
||||
result = datetime_series.argsort()
|
||||
assert result.name == datetime_series.name
|
@ -0,0 +1,205 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import IncompatibleFrequency
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
offsets,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesAsof:
|
||||
def test_asof_nanosecond_index_access(self):
|
||||
ts = Timestamp("20130101").as_unit("ns")._value
|
||||
dti = DatetimeIndex([ts + 50 + i for i in range(100)])
|
||||
ser = Series(np.random.default_rng(2).standard_normal(100), index=dti)
|
||||
|
||||
first_value = ser.asof(ser.index[0])
|
||||
|
||||
# GH#46903 previously incorrectly was "day"
|
||||
assert dti.resolution == "nanosecond"
|
||||
|
||||
# this used to not work bc parsing was done by dateutil that didn't
|
||||
# handle nanoseconds
|
||||
assert first_value == ser["2013-01-01 00:00:00.000000050"]
|
||||
|
||||
expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
|
||||
assert first_value == ser[Timestamp(expected_ts)]
|
||||
|
||||
def test_basic(self):
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
ts.iloc[15:30] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
mask = (result.index >= lb) & (result.index < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
val = result[result.index[result.index >= ub][0]]
|
||||
assert ts[ub] == val
|
||||
|
||||
def test_scalar(self):
|
||||
N = 30
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
# Explicit cast to float avoid implicit cast when setting nan
|
||||
ts = Series(np.arange(N), index=rng, dtype="float")
|
||||
ts.iloc[5:10] = np.nan
|
||||
ts.iloc[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts.iloc[4]
|
||||
assert val2 == ts.iloc[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts.iloc[4]
|
||||
|
||||
# in there
|
||||
result = ts.asof(ts.index[3])
|
||||
assert result == ts.iloc[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0] - offsets.BDay()
|
||||
assert np.isnan(ts.asof(d))
|
||||
|
||||
def test_with_nan(self):
|
||||
# basic asof test
|
||||
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
r = s.resample("2h").mean()
|
||||
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[3:5] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[-3:] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="h")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
ts.iloc[15:30] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
pix = PeriodIndex(result.index.values, freq="h")
|
||||
mask = (pix >= lb) & (pix < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
ts.iloc[5:10] = np.nan
|
||||
ts.iloc[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts.iloc[4]
|
||||
assert val2 == ts.iloc[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts.iloc[4]
|
||||
|
||||
# in there
|
||||
assert ts.asof(ts.index[3]) == ts.iloc[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0].to_timestamp() - offsets.BDay()
|
||||
assert isna(ts.asof(d))
|
||||
|
||||
# Mismatched freq
|
||||
msg = "Input has different freq"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
ts.asof(rng.asfreq("D"))
|
||||
|
||||
def test_errors(self):
|
||||
s = Series(
|
||||
[1, 2, 3],
|
||||
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
|
||||
)
|
||||
|
||||
# non-monotonic
|
||||
assert not s.index.is_monotonic_increasing
|
||||
with pytest.raises(ValueError, match="requires a sorted index"):
|
||||
s.asof(s.index[0])
|
||||
|
||||
# subset with Series
|
||||
N = 10
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
s = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
with pytest.raises(ValueError, match="not valid for Series"):
|
||||
s.asof(s.index[0], subset="foo")
|
||||
|
||||
def test_all_nans(self):
|
||||
# GH 15713
|
||||
# series is all nans
|
||||
|
||||
# testing non-default indexes
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
result = Series(np.nan, index=rng).asof(dates)
|
||||
expected = Series(np.nan, index=dates)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing scalar input
|
||||
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
|
||||
result = Series(np.nan, index=rng).asof(date)
|
||||
assert isna(result)
|
||||
|
||||
# test name is propagated
|
||||
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
|
||||
expected = Series(np.nan, index=[4, 5], name="test")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,683 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
from importlib import reload
|
||||
import string
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
DatetimeTZDtype,
|
||||
Index,
|
||||
Interval,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
cut,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def rand_str(nchars: int) -> str:
|
||||
"""
|
||||
Generate one random byte string.
|
||||
"""
|
||||
RANDS_CHARS = np.array(
|
||||
list(string.ascii_letters + string.digits), dtype=(np.str_, 1)
|
||||
)
|
||||
return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
|
||||
|
||||
|
||||
class TestAstypeAPI:
|
||||
def test_astype_unitless_dt64_raises(self):
|
||||
# GH#47844
|
||||
ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]")
|
||||
df = ser.to_frame()
|
||||
|
||||
msg = "Casting to unit-less dtype 'datetime64' is not supported"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype(np.datetime64)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.astype(np.datetime64)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype("datetime64")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.astype("datetime64")
|
||||
|
||||
def test_arg_for_errors_in_astype(self):
|
||||
# see GH#14878
|
||||
ser = Series([1, 2, 3])
|
||||
|
||||
msg = (
|
||||
r"Expected value of kwarg 'errors' to be one of \['raise', "
|
||||
r"'ignore'\]\. Supplied value is 'False'"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(np.float64, errors=False)
|
||||
|
||||
ser.astype(np.int8, errors="raise")
|
||||
|
||||
@pytest.mark.parametrize("dtype_class", [dict, Series])
|
||||
def test_astype_dict_like(self, dtype_class):
|
||||
# see GH#7271
|
||||
ser = Series(range(0, 10, 2), name="abc")
|
||||
|
||||
dt1 = dtype_class({"abc": str})
|
||||
result = ser.astype(dt1)
|
||||
expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt2 = dtype_class({"abc": "float64"})
|
||||
result = ser.astype(dt2)
|
||||
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt3 = dtype_class({"abc": str, "def": str})
|
||||
msg = (
|
||||
"Only the Series name can be used for the key in Series dtype "
|
||||
r"mappings\."
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.astype(dt3)
|
||||
|
||||
dt4 = dtype_class({0: str})
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.astype(dt4)
|
||||
|
||||
# GH#16717
|
||||
# if dtypes provided is empty, it should error
|
||||
if dtype_class is Series:
|
||||
dt5 = dtype_class({}, dtype=object)
|
||||
else:
|
||||
dt5 = dtype_class({})
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.astype(dt5)
|
||||
|
||||
|
||||
class TestAstype:
|
||||
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
|
||||
def test_astype_object_to_dt64_non_nano(self, tz):
|
||||
# GH#55756, GH#54620
|
||||
ts = Timestamp("2999-01-01")
|
||||
dtype = "M8[us]"
|
||||
if tz is not None:
|
||||
dtype = f"M8[us, {tz}]"
|
||||
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
|
||||
ser = Series(vals, dtype=object)
|
||||
result = ser.astype(dtype)
|
||||
|
||||
# The 2500 is interpreted as microseconds, consistent with what
|
||||
# we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
|
||||
# and concated the results.
|
||||
pointwise = [
|
||||
vals[0].tz_localize(tz),
|
||||
Timestamp(vals[1], tz=tz),
|
||||
to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
|
||||
]
|
||||
exp_vals = [x.as_unit("us").asm8 for x in pointwise]
|
||||
exp_arr = np.array(exp_vals, dtype="M8[us]")
|
||||
expected = Series(exp_arr, dtype="M8[us]")
|
||||
if tz is not None:
|
||||
expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_mixed_object_to_dt64tz(self):
|
||||
# pre-2.0 this raised ValueError bc of tz mismatch
|
||||
# xref GH#32581
|
||||
ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific")
|
||||
ts2 = ts.tz_convert("Asia/Tokyo")
|
||||
|
||||
ser = Series([ts, ts2], dtype=object)
|
||||
res = ser.astype("datetime64[ns, Europe/Brussels]")
|
||||
expected = Series(
|
||||
[ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")],
|
||||
dtype="datetime64[ns, Europe/Brussels]",
|
||||
)
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", np.typecodes["All"])
|
||||
def test_astype_empty_constructor_equality(self, dtype):
|
||||
# see GH#15524
|
||||
|
||||
if dtype not in (
|
||||
"S",
|
||||
"V", # poor support (if any) currently
|
||||
"M",
|
||||
"m", # Generic timestamps raise a ValueError. Already tested.
|
||||
):
|
||||
init_empty = Series([], dtype=dtype)
|
||||
as_type_empty = Series([]).astype(dtype)
|
||||
tm.assert_series_equal(init_empty, as_type_empty)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [str, np.str_])
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([string.digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
|
||||
Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
|
||||
],
|
||||
)
|
||||
def test_astype_str_map(self, dtype, series, using_infer_string):
|
||||
# see GH#4405
|
||||
result = series.astype(dtype)
|
||||
expected = series.map(str)
|
||||
if using_infer_string:
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_float_to_period(self):
|
||||
result = Series([np.nan]).astype("period[D]")
|
||||
expected = Series([NaT], dtype="period[D]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_no_pandas_dtype(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/24866
|
||||
ser = Series([1, 2], dtype="int64")
|
||||
# Don't have NumpyEADtype in the public API, so we use `.array.dtype`,
|
||||
# which is a NumpyEADtype.
|
||||
result = ser.astype(ser.array.dtype)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
|
||||
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
|
||||
# see GH#15524, GH#15987
|
||||
data = [1]
|
||||
ser = Series(data)
|
||||
|
||||
if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
|
||||
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
|
||||
request.applymarker(mark)
|
||||
|
||||
msg = (
|
||||
rf"The '{dtype.__name__}' dtype has no unit\. "
|
||||
rf"Please pass in '{dtype.__name__}\[ns\]' instead."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(dtype)
|
||||
|
||||
def test_astype_dt64_to_str(self):
|
||||
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
|
||||
dti = date_range("2012-01-01", periods=3)
|
||||
result = Series(dti).astype(str)
|
||||
expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_dt64tz_to_str(self):
|
||||
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
|
||||
dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
|
||||
result = Series(dti_tz).astype(str)
|
||||
expected = Series(
|
||||
[
|
||||
"2012-01-01 00:00:00-05:00",
|
||||
"2012-01-02 00:00:00-05:00",
|
||||
"2012-01-03 00:00:00-05:00",
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_datetime(self, unit):
|
||||
ser = Series(iNaT, dtype=f"M8[{unit}]", index=range(5))
|
||||
|
||||
ser = ser.astype("O")
|
||||
assert ser.dtype == np.object_
|
||||
|
||||
ser = Series([datetime(2001, 1, 2, 0, 0)])
|
||||
|
||||
ser = ser.astype("O")
|
||||
assert ser.dtype == np.object_
|
||||
|
||||
ser = Series(
|
||||
[datetime(2001, 1, 2, 0, 0) for i in range(3)], dtype=f"M8[{unit}]"
|
||||
)
|
||||
|
||||
ser[1] = np.nan
|
||||
assert ser.dtype == f"M8[{unit}]"
|
||||
|
||||
ser = ser.astype("O")
|
||||
assert ser.dtype == np.object_
|
||||
|
||||
def test_astype_datetime64tz(self):
|
||||
ser = Series(date_range("20130101", periods=3, tz="US/Eastern"))
|
||||
|
||||
# astype
|
||||
result = ser.astype(object)
|
||||
expected = Series(ser.astype(object), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
# astype - object, preserves on construction
|
||||
result = Series(ser.astype(object))
|
||||
expected = ser.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# astype - datetime64[ns, tz]
|
||||
msg = "Cannot use .astype to convert from timezone-naive"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64->dt64tz astype deprecated
|
||||
Series(ser.values).astype("datetime64[ns, US/Eastern]")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64->dt64tz astype deprecated
|
||||
Series(ser.values).astype(ser.dtype)
|
||||
|
||||
result = ser.astype("datetime64[ns, CET]")
|
||||
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_str_cast_dt64(self):
|
||||
# see GH#9757
|
||||
ts = Series([Timestamp("2010-01-04 00:00:00")])
|
||||
res = ts.astype(str)
|
||||
|
||||
expected = Series(["2010-01-04"], dtype=object)
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
|
||||
res = ts.astype(str)
|
||||
|
||||
expected = Series(["2010-01-04 00:00:00-05:00"], dtype=object)
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
def test_astype_str_cast_td64(self):
|
||||
# see GH#9757
|
||||
|
||||
td = Series([Timedelta(1, unit="d")])
|
||||
ser = td.astype(str)
|
||||
|
||||
expected = Series(["1 days"], dtype=object)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_dt64_series_astype_object(self):
|
||||
dt64ser = Series(date_range("20130101", periods=3))
|
||||
result = dt64ser.astype(object)
|
||||
assert isinstance(result.iloc[0], datetime)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
def test_td64_series_astype_object(self):
|
||||
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
|
||||
result = tdser.astype(object)
|
||||
assert isinstance(result.iloc[0], timedelta)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
(["x", "y", "z"], "string[python]"),
|
||||
pytest.param(
|
||||
["x", "y", "z"],
|
||||
"string[pyarrow]",
|
||||
marks=td.skip_if_no("pyarrow"),
|
||||
),
|
||||
(["x", "y", "z"], "category"),
|
||||
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
|
||||
(3 * [Interval(0, 1)], None),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("errors", ["raise", "ignore"])
|
||||
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
|
||||
# https://github.com/pandas-dev/pandas/issues/35471
|
||||
ser = Series(data, dtype=dtype)
|
||||
if errors == "ignore":
|
||||
expected = ser
|
||||
result = ser.astype(float, errors="ignore")
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
msg = "(Cannot cast)|(could not convert)"
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
ser.astype(float, errors=errors)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
|
||||
def test_astype_from_float_to_str(self, dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/36451
|
||||
ser = Series([0.1], dtype=dtype)
|
||||
result = ser.astype(str)
|
||||
expected = Series(["0.1"], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value, string_value",
|
||||
[
|
||||
(None, "None"),
|
||||
(np.nan, "nan"),
|
||||
(NA, "<NA>"),
|
||||
],
|
||||
)
|
||||
def test_astype_to_str_preserves_na(self, value, string_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/36904
|
||||
ser = Series(["a", "b", value], dtype=object)
|
||||
result = ser.astype(str)
|
||||
expected = Series(["a", "b", string_value], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
|
||||
def test_astype(self, dtype):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(5), name="foo")
|
||||
as_typed = ser.astype(dtype)
|
||||
|
||||
assert as_typed.dtype == dtype
|
||||
assert as_typed.name == ser.name
|
||||
|
||||
@pytest.mark.parametrize("value", [np.nan, np.inf])
|
||||
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
|
||||
def test_astype_cast_nan_inf_int(self, dtype, value):
|
||||
# gh-14265: check NaN and inf raise error when converting to int
|
||||
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
|
||||
ser = Series([value])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
|
||||
def test_astype_cast_object_int_fail(self, dtype):
|
||||
arr = Series(["car", "house", "tree", "1"])
|
||||
msg = r"invalid literal for int\(\) with base 10: 'car'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype(dtype)
|
||||
|
||||
def test_astype_float_to_uint_negatives_raise(
|
||||
self, float_numpy_dtype, any_unsigned_int_numpy_dtype
|
||||
):
|
||||
# GH#45151 We don't cast negative numbers to nonsense values
|
||||
# TODO: same for EA float/uint dtypes, signed integers?
|
||||
arr = np.arange(5).astype(float_numpy_dtype) - 3 # includes negatives
|
||||
ser = Series(arr)
|
||||
|
||||
msg = "Cannot losslessly cast from .* to .*"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.to_frame().astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# We currently catch and re-raise in Index.astype
|
||||
Index(ser).astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.array.astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
def test_astype_cast_object_int(self):
|
||||
arr = Series(["1", "2", "3", "4"], dtype=object)
|
||||
result = arr.astype(int)
|
||||
|
||||
tm.assert_series_equal(result, Series(np.arange(1, 5)))
|
||||
|
||||
def test_astype_unicode(self, using_infer_string):
|
||||
# see GH#7758: A bit of magic is required to set
|
||||
# default encoding to utf-8
|
||||
digits = string.digits
|
||||
test_series = [
|
||||
Series([digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
|
||||
Series(["データーサイエンス、お前はもう死んでいる"]),
|
||||
]
|
||||
|
||||
former_encoding = None
|
||||
|
||||
if sys.getdefaultencoding() == "utf-8":
|
||||
# GH#45326 as of 2.0 Series.astype matches Index.astype by handling
|
||||
# bytes with obj.decode() instead of str(obj)
|
||||
item = "野菜食べないとやばい"
|
||||
ser = Series([item.encode()])
|
||||
result = ser.astype(np.str_)
|
||||
expected = Series([item], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for ser in test_series:
|
||||
res = ser.astype(np.str_)
|
||||
expec = ser.map(str)
|
||||
if using_infer_string:
|
||||
expec = expec.astype(object)
|
||||
tm.assert_series_equal(res, expec)
|
||||
|
||||
# Restore the former encoding
|
||||
if former_encoding is not None and former_encoding != "utf-8":
|
||||
reload(sys)
|
||||
sys.setdefaultencoding(former_encoding)
|
||||
|
||||
def test_astype_bytes(self):
|
||||
# GH#39474
|
||||
result = Series(["foo", "bar", "baz"]).astype(bytes)
|
||||
assert result.dtypes == np.dtype("S3")
|
||||
|
||||
def test_astype_nan_to_bool(self):
|
||||
# GH#43018
|
||||
ser = Series(np.nan, dtype="object")
|
||||
result = ser.astype("bool")
|
||||
expected = Series(True, dtype="bool")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES,
|
||||
)
|
||||
def test_astype_ea_to_datetimetzdtype(self, dtype):
|
||||
# GH37553
|
||||
ser = Series([4, 0, 9], dtype=dtype)
|
||||
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))
|
||||
|
||||
expected = Series(
|
||||
{
|
||||
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),
|
||||
1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"),
|
||||
2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"),
|
||||
}
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_retain_attrs(self, any_numpy_dtype):
|
||||
# GH#44414
|
||||
ser = Series([0, 1, 2, 3])
|
||||
ser.attrs["Location"] = "Michigan"
|
||||
|
||||
result = ser.astype(any_numpy_dtype).attrs
|
||||
expected = ser.attrs
|
||||
|
||||
tm.assert_dict_equal(expected, result)
|
||||
|
||||
|
||||
class TestAstypeString:
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
([True, NA], "boolean"),
|
||||
(["A", NA], "category"),
|
||||
(["2020-10-10", "2020-10-10"], "datetime64[ns]"),
|
||||
(["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"),
|
||||
(
|
||||
["2012-01-01 00:00:00-05:00", NaT],
|
||||
"datetime64[ns, US/Eastern]",
|
||||
),
|
||||
([1, None], "UInt16"),
|
||||
(["1/1/2021", "2/1/2021"], "period[M]"),
|
||||
(["1/1/2021", "2/1/2021", NaT], "period[M]"),
|
||||
(["1 Day", "59 Days", NaT], "timedelta64[ns]"),
|
||||
# currently no way to parse IntervalArray from a list of strings
|
||||
],
|
||||
)
|
||||
def test_astype_string_to_extension_dtype_roundtrip(
|
||||
self, data, dtype, request, nullable_string_dtype
|
||||
):
|
||||
if dtype == "boolean":
|
||||
mark = pytest.mark.xfail(
|
||||
reason="TODO StringArray.astype() with missing values #GH40566"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
# GH-40351
|
||||
ser = Series(data, dtype=dtype)
|
||||
|
||||
# Note: just passing .astype(dtype) fails for dtype="category"
|
||||
# with bc ser.dtype.categories will be object dtype whereas
|
||||
# result.dtype.categories will have string dtype
|
||||
result = ser.astype(nullable_string_dtype).astype(ser.dtype)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
class TestAstypeCategorical:
|
||||
def test_astype_categorical_to_other(self):
|
||||
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
|
||||
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
|
||||
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
|
||||
|
||||
expected = ser
|
||||
tm.assert_series_equal(ser.astype("category"), expected)
|
||||
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
|
||||
msg = r"Cannot cast object|string dtype to float64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype("float64")
|
||||
|
||||
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
|
||||
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
|
||||
tm.assert_series_equal(cat.astype("str"), exp)
|
||||
s2 = Series(Categorical(["1", "2", "3", "4"]))
|
||||
exp2 = Series([1, 2, 3, 4]).astype("int")
|
||||
tm.assert_series_equal(s2.astype("int"), exp2)
|
||||
|
||||
# object don't sort correctly, so just compare that we have the same
|
||||
# values
|
||||
def cmp(a, b):
|
||||
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
|
||||
|
||||
expected = Series(np.array(ser.values), name="value_group")
|
||||
cmp(ser.astype("object"), expected)
|
||||
cmp(ser.astype(np.object_), expected)
|
||||
|
||||
# array conversion
|
||||
tm.assert_almost_equal(np.array(ser), np.array(ser.values))
|
||||
|
||||
tm.assert_series_equal(ser.astype("category"), ser)
|
||||
tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
|
||||
|
||||
roundtrip_expected = ser.cat.set_categories(
|
||||
ser.cat.categories.sort_values()
|
||||
).cat.remove_unused_categories()
|
||||
result = ser.astype("object").astype("category")
|
||||
tm.assert_series_equal(result, roundtrip_expected)
|
||||
result = ser.astype("object").astype(CategoricalDtype())
|
||||
tm.assert_series_equal(result, roundtrip_expected)
|
||||
|
||||
def test_astype_categorical_invalid_conversions(self):
|
||||
# invalid conversion (these are NOT a dtype)
|
||||
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
|
||||
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
|
||||
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
|
||||
|
||||
msg = (
|
||||
"dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
|
||||
"not understood"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype(Categorical)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype("object").astype(Categorical)
|
||||
|
||||
def test_astype_categoricaldtype(self):
|
||||
ser = Series(["a", "b", "a"])
|
||||
result = ser.astype(CategoricalDtype(["a", "b"], ordered=True))
|
||||
expected = Series(Categorical(["a", "b", "a"], ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype(CategoricalDtype(["a", "b"], ordered=False))
|
||||
expected = Series(Categorical(["a", "b", "a"], ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("series_ordered", [True, False])
|
||||
def test_astype_categorical_to_categorical(
|
||||
self, name, dtype_ordered, series_ordered
|
||||
):
|
||||
# GH#10696, GH#18593
|
||||
s_data = list("abcaacbab")
|
||||
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
|
||||
ser = Series(s_data, dtype=s_dtype, name=name)
|
||||
|
||||
# unspecified categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = ser.astype(dtype)
|
||||
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
|
||||
expected = Series(s_data, name=name, dtype=exp_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# different categories
|
||||
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||||
result = ser.astype(dtype)
|
||||
expected = Series(s_data, name=name, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# not specifying ordered, so only test once
|
||||
expected = ser
|
||||
result = ser.astype("category")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_bool_missing_to_categorical(self):
|
||||
# GH-19182
|
||||
ser = Series([True, False, np.nan])
|
||||
assert ser.dtypes == np.object_
|
||||
|
||||
result = ser.astype(CategoricalDtype(categories=[True, False]))
|
||||
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_categories_raises(self):
|
||||
# deprecated GH#17636, removed in GH#27141
|
||||
ser = Series(["a", "b", "a"])
|
||||
with pytest.raises(TypeError, match="got an unexpected"):
|
||||
ser.astype("category", categories=["a", "b"], ordered=True)
|
||||
|
||||
@pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]])
|
||||
def test_astype_from_categorical(self, items):
|
||||
ser = Series(items)
|
||||
exp = Series(Categorical(items))
|
||||
res = ser.astype("category")
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_from_categorical_with_keywords(self):
|
||||
# with keywords
|
||||
lst = ["a", "b", "c", "a"]
|
||||
ser = Series(lst)
|
||||
exp = Series(Categorical(lst, ordered=True))
|
||||
res = ser.astype(CategoricalDtype(None, ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
|
||||
res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_timedelta64_with_np_nan(self):
|
||||
# GH45798
|
||||
result = Series([Timedelta(1), np.nan], dtype="timedelta64[ns]")
|
||||
expected = Series([Timedelta(1), NaT], dtype="timedelta64[ns]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_astype_int_na_string(self):
|
||||
# GH#57418
|
||||
ser = Series([12, NA], dtype="Int64[pyarrow]")
|
||||
result = ser.astype("string[pyarrow]")
|
||||
expected = Series(["12", NA], dtype="string[pyarrow]")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestAutoCorr:
|
||||
def test_autocorr(self, datetime_series):
|
||||
# Just run the function
|
||||
corr1 = datetime_series.autocorr()
|
||||
|
||||
# Now run it with the lag parameter
|
||||
corr2 = datetime_series.autocorr(lag=1)
|
||||
|
||||
# corr() with lag needs Series of at least length 2
|
||||
if len(datetime_series) <= 2:
|
||||
assert np.isnan(corr1)
|
||||
assert np.isnan(corr2)
|
||||
else:
|
||||
assert corr1 == corr2
|
||||
|
||||
# Choose a random lag between 1 and length of Series - 2
|
||||
# and compare the result with the Series corr() function
|
||||
n = 1 + np.random.default_rng(2).integers(max(1, len(datetime_series) - 2))
|
||||
corr1 = datetime_series.corr(datetime_series.shift(n))
|
||||
corr2 = datetime_series.autocorr(lag=n)
|
||||
|
||||
# corr() with lag needs Series of at least length 2
|
||||
if len(datetime_series) <= 2:
|
||||
assert np.isnan(corr1)
|
||||
assert np.isnan(corr2)
|
||||
else:
|
||||
assert corr1 == corr2
|
@ -0,0 +1,75 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
bdate_range,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestBetween:
|
||||
def test_between(self):
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
left, right = series[[2, 7]]
|
||||
|
||||
result = series.between(left, right)
|
||||
expected = (series >= left) & (series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_between_datetime_object_dtype(self):
|
||||
ser = Series(bdate_range("1/1/2000", periods=20), dtype=object)
|
||||
ser[::2] = np.nan
|
||||
|
||||
result = ser[ser.between(ser[3], ser[17])]
|
||||
expected = ser[3:18].dropna()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[ser.between(ser[3], ser[17], inclusive="neither")]
|
||||
expected = ser[5:16].dropna()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_between_period_values(self):
|
||||
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
left, right = ser[[2, 7]]
|
||||
result = ser.between(left, right)
|
||||
expected = (ser >= left) & (ser <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_between_inclusive_string(self):
|
||||
# GH 40628
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
left, right = series[[2, 7]]
|
||||
|
||||
result = series.between(left, right, inclusive="both")
|
||||
expected = (series >= left) & (series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.between(left, right, inclusive="left")
|
||||
expected = (series >= left) & (series < right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.between(left, right, inclusive="right")
|
||||
expected = (series > left) & (series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.between(left, right, inclusive="neither")
|
||||
expected = (series > left) & (series < right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("inclusive", ["yes", True, False])
|
||||
def test_between_error_args(self, inclusive):
|
||||
# GH 40628
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
left, right = series[[2, 7]]
|
||||
|
||||
value_error_msg = (
|
||||
"Inclusive has to be either string of 'both',"
|
||||
"'left', 'right', or 'neither'."
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=value_error_msg):
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
series.between(left, right, inclusive=inclusive)
|
@ -0,0 +1,148 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
array as pd_array,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
"""
|
||||
base dataframe for testing
|
||||
"""
|
||||
return DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
|
||||
def test_case_when_caselist_is_not_a_list(df):
|
||||
"""
|
||||
Raise ValueError if caselist is not a list.
|
||||
"""
|
||||
msg = "The caselist argument should be a list; "
|
||||
msg += "instead got.+"
|
||||
with pytest.raises(TypeError, match=msg): # GH39154
|
||||
df["a"].case_when(caselist=())
|
||||
|
||||
|
||||
def test_case_when_no_caselist(df):
|
||||
"""
|
||||
Raise ValueError if no caselist is provided.
|
||||
"""
|
||||
msg = "provide at least one boolean condition, "
|
||||
msg += "with a corresponding replacement."
|
||||
with pytest.raises(ValueError, match=msg): # GH39154
|
||||
df["a"].case_when([])
|
||||
|
||||
|
||||
def test_case_when_odd_caselist(df):
|
||||
"""
|
||||
Raise ValueError if no of caselist is odd.
|
||||
"""
|
||||
msg = "Argument 0 must have length 2; "
|
||||
msg += "a condition and replacement; instead got length 3."
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df["a"].case_when([(df["a"].eq(1), 1, df.a.gt(1))])
|
||||
|
||||
|
||||
def test_case_when_raise_error_from_mask(df):
|
||||
"""
|
||||
Raise Error from within Series.mask
|
||||
"""
|
||||
msg = "Failed to apply condition0 and replacement0."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df["a"].case_when([(df["a"].eq(1), [1, 2])])
|
||||
|
||||
|
||||
def test_case_when_single_condition(df):
|
||||
"""
|
||||
Test output on a single condition.
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when([(df.a.eq(1), 1)])
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions(df):
|
||||
"""
|
||||
Test output when booleans are derived from a computation
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[(df.a.eq(1), 1), (Series([False, True, False]), 2)]
|
||||
)
|
||||
expected = Series([1, 2, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions_replacement_list(df):
|
||||
"""
|
||||
Test output when replacement is a list
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[([True, False, False], 1), (df["a"].gt(1) & df["b"].eq(5), [1, 2, 3])]
|
||||
)
|
||||
expected = Series([1, 2, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions_replacement_extension_dtype(df):
|
||||
"""
|
||||
Test output when replacement has an extension dtype
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[
|
||||
([True, False, False], 1),
|
||||
(df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")),
|
||||
],
|
||||
)
|
||||
expected = Series([1, 2, np.nan], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions_replacement_series(df):
|
||||
"""
|
||||
Test output when replacement is a Series
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[
|
||||
(np.array([True, False, False]), 1),
|
||||
(df["a"].gt(1) & df["b"].eq(5), Series([1, 2, 3])),
|
||||
],
|
||||
)
|
||||
expected = Series([1, 2, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_non_range_index():
|
||||
"""
|
||||
Test output if index is not RangeIndex
|
||||
"""
|
||||
rng = np.random.default_rng(seed=123)
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df = DataFrame(
|
||||
rng.standard_normal(size=(8, 4)), index=dates, columns=["A", "B", "C", "D"]
|
||||
)
|
||||
result = Series(5, index=df.index, name="A").case_when([(df.A.gt(0), df.B)])
|
||||
expected = df.A.mask(df.A.gt(0), df.B).where(df.A.gt(0), 5)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_callable():
|
||||
"""
|
||||
Test output on a callable
|
||||
"""
|
||||
# https://numpy.org/doc/stable/reference/generated/numpy.piecewise.html
|
||||
x = np.linspace(-2.5, 2.5, 6)
|
||||
ser = Series(x)
|
||||
result = ser.case_when(
|
||||
caselist=[
|
||||
(lambda df: df < 0, lambda df: -df),
|
||||
(lambda df: df >= 0, lambda df: df),
|
||||
]
|
||||
)
|
||||
expected = np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x])
|
||||
tm.assert_series_equal(result, Series(expected))
|
@ -0,0 +1,146 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesClip:
|
||||
def test_clip(self, datetime_series):
|
||||
val = datetime_series.median()
|
||||
|
||||
assert datetime_series.clip(lower=val).min() == val
|
||||
assert datetime_series.clip(upper=val).max() == val
|
||||
|
||||
result = datetime_series.clip(-0.5, 0.5)
|
||||
expected = np.clip(datetime_series, -0.5, 0.5)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert isinstance(expected, Series)
|
||||
|
||||
def test_clip_types_and_nulls(self):
|
||||
sers = [
|
||||
Series([np.nan, 1.0, 2.0, 3.0]),
|
||||
Series([None, "a", "b", "c"]),
|
||||
Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
|
||||
]
|
||||
|
||||
for s in sers:
|
||||
thresh = s[2]
|
||||
lower = s.clip(lower=thresh)
|
||||
upper = s.clip(upper=thresh)
|
||||
assert lower[notna(lower)].min() == thresh
|
||||
assert upper[notna(upper)].max() == thresh
|
||||
assert list(isna(s)) == list(isna(lower))
|
||||
assert list(isna(s)) == list(isna(upper))
|
||||
|
||||
def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
|
||||
# Ensure that clipping method can handle NA values with out failing
|
||||
# GH#40581
|
||||
|
||||
if nulls_fixture is pd.NaT:
|
||||
# constructor will raise, see
|
||||
# test_constructor_mismatched_null_nullable_dtype
|
||||
pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
|
||||
|
||||
ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
|
||||
s_clipped_upper = ser.clip(upper=2.0)
|
||||
s_clipped_lower = ser.clip(lower=2.0)
|
||||
|
||||
expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
|
||||
expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
|
||||
|
||||
tm.assert_series_equal(s_clipped_upper, expected_upper)
|
||||
tm.assert_series_equal(s_clipped_lower, expected_lower)
|
||||
|
||||
def test_clip_with_na_args(self):
|
||||
"""Should process np.nan argument as None"""
|
||||
# GH#17276
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
|
||||
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
|
||||
|
||||
# GH#19992
|
||||
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
|
||||
# TODO: avoid this warning here? seems like we should never be upcasting
|
||||
# in the first place?
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = s.clip(lower=[0, 4, np.nan])
|
||||
tm.assert_series_equal(res, Series([1, 4, 3]))
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = s.clip(upper=[1, np.nan, 1])
|
||||
tm.assert_series_equal(res, Series([1, 2, 1]))
|
||||
|
||||
# GH#40420
|
||||
s = Series([1, 2, 3])
|
||||
result = s.clip(0, [np.nan, np.nan, np.nan])
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
def test_clip_against_series(self):
|
||||
# GH#6966
|
||||
|
||||
s = Series([1.0, 1.0, 4.0])
|
||||
|
||||
lower = Series([1.0, 2.0, 3.0])
|
||||
upper = Series([1.5, 2.5, 3.5])
|
||||
|
||||
tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
|
||||
tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
|
||||
def test_clip_against_list_like(self, inplace, upper):
|
||||
# GH#15390
|
||||
original = Series([5, 6, 7])
|
||||
result = original.clip(upper=upper, inplace=inplace)
|
||||
expected = Series([1, 2, 3])
|
||||
|
||||
if inplace:
|
||||
result = original
|
||||
tm.assert_series_equal(result, expected, check_exact=True)
|
||||
|
||||
def test_clip_with_datetimes(self):
|
||||
# GH#11838
|
||||
# naive and tz-aware datetimes
|
||||
|
||||
t = Timestamp("2015-12-01 09:30:30")
|
||||
s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
|
||||
result = s.clip(upper=t)
|
||||
expected = Series(
|
||||
[Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
|
||||
s = Series(
|
||||
[
|
||||
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
|
||||
Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
|
||||
]
|
||||
)
|
||||
result = s.clip(upper=t)
|
||||
expected = Series(
|
||||
[
|
||||
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
|
||||
Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [object, "M8[us]"])
|
||||
def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
|
||||
# GH-42794
|
||||
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
|
||||
|
||||
result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
|
||||
expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,17 @@
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCombine:
|
||||
def test_combine_scalar(self):
|
||||
# GH#21248
|
||||
# Note - combine() with another Series is tested elsewhere because
|
||||
# it is used when testing operators
|
||||
ser = Series([i * 10 for i in range(5)])
|
||||
result = ser.combine(3, lambda x, y: x + y)
|
||||
expected = Series([i * 10 + 3 for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.combine(22, lambda x, y: min(x, y))
|
||||
expected = Series([min(i * 10, 22) for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,149 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Period,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCombineFirst:
|
||||
def test_combine_first_period_datetime(self):
|
||||
# GH#3367
|
||||
didx = date_range(start="1950-01-31", end="1950-07-31", freq="ME")
|
||||
pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M")
|
||||
# check to be consistent with DatetimeIndex
|
||||
for idx in [didx, pidx]:
|
||||
a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
|
||||
b = Series([9, 9, 9, 9, 9, 9, 9], index=idx)
|
||||
|
||||
result = a.combine_first(b)
|
||||
expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first_name(self, datetime_series):
|
||||
result = datetime_series.combine_first(datetime_series[:5])
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
def test_combine_first(self):
|
||||
values = np.arange(20, dtype=np.float64)
|
||||
series = Series(values, index=np.arange(20, dtype=np.int64))
|
||||
|
||||
series_copy = series * 2
|
||||
series_copy[::2] = np.nan
|
||||
|
||||
# nothing used from the input
|
||||
combined = series.combine_first(series_copy)
|
||||
|
||||
tm.assert_series_equal(combined, series)
|
||||
|
||||
# Holes filled from input
|
||||
combined = series_copy.combine_first(series)
|
||||
assert np.isfinite(combined).all()
|
||||
|
||||
tm.assert_series_equal(combined[::2], series[::2])
|
||||
tm.assert_series_equal(combined[1::2], series_copy[1::2])
|
||||
|
||||
# mixed types
|
||||
index = pd.Index([str(i) for i in range(20)])
|
||||
floats = Series(np.random.default_rng(2).standard_normal(20), index=index)
|
||||
strings = Series([str(i) for i in range(10)], index=index[::2], dtype=object)
|
||||
|
||||
combined = strings.combine_first(floats)
|
||||
|
||||
tm.assert_series_equal(strings, combined.loc[index[::2]])
|
||||
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
|
||||
|
||||
# corner case
|
||||
ser = Series([1.0, 2, 3], index=[0, 1, 2])
|
||||
empty = Series([], index=[], dtype=object)
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.combine_first(empty)
|
||||
ser.index = ser.index.astype("O")
|
||||
tm.assert_series_equal(ser, result)
|
||||
|
||||
def test_combine_first_dt64(self, unit):
|
||||
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
|
||||
s1 = to_datetime(Series([np.nan, "2011"])).dt.as_unit(unit)
|
||||
rs = s0.combine_first(s1)
|
||||
xp = to_datetime(Series(["2010", "2011"])).dt.as_unit(unit)
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
|
||||
s1 = Series([np.nan, "2011"])
|
||||
rs = s0.combine_first(s1)
|
||||
|
||||
xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]")
|
||||
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
|
||||
ser1 = Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
|
||||
name="ser1",
|
||||
)
|
||||
ser2 = Series(
|
||||
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
|
||||
index=[2, 3, 4],
|
||||
name="ser2",
|
||||
)
|
||||
result = ser1.combine_first(ser2)
|
||||
exp_vals = pd.DatetimeIndex(
|
||||
["20150101", "20150102", "20150103", "20160515", "20160516"],
|
||||
tz=tz_naive_fixture,
|
||||
)
|
||||
exp = Series(exp_vals, name="ser1")
|
||||
tm.assert_series_equal(exp, result)
|
||||
|
||||
def test_combine_first_timezone_series_with_empty_series(self):
|
||||
# GH 41800
|
||||
time_index = date_range(
|
||||
datetime(2021, 1, 1, 1),
|
||||
datetime(2021, 1, 1, 10),
|
||||
freq="h",
|
||||
tz="Europe/Rome",
|
||||
)
|
||||
s1 = Series(range(10), index=time_index)
|
||||
s2 = Series(index=time_index)
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s1.combine_first(s2)
|
||||
tm.assert_series_equal(result, s1)
|
||||
|
||||
def test_combine_first_preserves_dtype(self):
|
||||
# GH51764
|
||||
s1 = Series([1666880195890293744, 1666880195890293837])
|
||||
s2 = Series([1, 2, 3])
|
||||
result = s1.combine_first(s2)
|
||||
expected = Series([1666880195890293744, 1666880195890293837, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_mixed_timezone(self):
|
||||
# GH 26283
|
||||
uniform_tz = Series({pd.Timestamp("2019-05-01", tz="UTC"): 1.0})
|
||||
multi_tz = Series(
|
||||
{
|
||||
pd.Timestamp("2019-05-01 01:00:00+0100", tz="Europe/London"): 2.0,
|
||||
pd.Timestamp("2019-05-02", tz="UTC"): 3.0,
|
||||
}
|
||||
)
|
||||
|
||||
result = uniform_tz.combine_first(multi_tz)
|
||||
expected = Series(
|
||||
[1.0, 3.0],
|
||||
index=pd.Index(
|
||||
[
|
||||
pd.Timestamp("2019-05-01 00:00:00+00:00", tz="UTC"),
|
||||
pd.Timestamp("2019-05-02 00:00:00+00:00", tz="UTC"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,141 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
|
||||
def test_compare_axis(align_axis):
|
||||
# GH#30429
|
||||
s1 = pd.Series(["a", "b", "c"])
|
||||
s2 = pd.Series(["x", "b", "z"])
|
||||
|
||||
result = s1.compare(s2, align_axis=align_axis)
|
||||
|
||||
if align_axis in (1, "columns"):
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.Index(["self", "other"])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], ["c", "z"]], index=indices, columns=columns
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
|
||||
expected = pd.Series(["a", "x", "c", "z"], index=indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep_shape, keep_equal",
|
||||
[
|
||||
(True, False),
|
||||
(False, True),
|
||||
(True, True),
|
||||
# False, False case is already covered in test_compare_axis
|
||||
],
|
||||
)
|
||||
def test_compare_various_formats(keep_shape, keep_equal):
|
||||
s1 = pd.Series(["a", "b", "c"])
|
||||
s2 = pd.Series(["x", "b", "z"])
|
||||
|
||||
result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
|
||||
|
||||
if keep_shape:
|
||||
indices = pd.Index([0, 1, 2])
|
||||
columns = pd.Index(["self", "other"])
|
||||
if keep_equal:
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
|
||||
)
|
||||
else:
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], [np.nan, np.nan], ["c", "z"]],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
else:
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.Index(["self", "other"])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], ["c", "z"]], index=indices, columns=columns
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_with_equal_nulls():
|
||||
# We want to make sure two NaNs are considered the same
|
||||
# and dropped where applicable
|
||||
s1 = pd.Series(["a", "b", np.nan])
|
||||
s2 = pd.Series(["x", "b", np.nan])
|
||||
|
||||
result = s1.compare(s2)
|
||||
expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_with_non_equal_nulls():
|
||||
# We want to make sure the relevant NaNs do not get dropped
|
||||
s1 = pd.Series(["a", "b", "c"])
|
||||
s2 = pd.Series(["x", "b", np.nan])
|
||||
|
||||
result = s1.compare(s2, align_axis=0)
|
||||
|
||||
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
|
||||
expected = pd.Series(["a", "x", "c", np.nan], index=indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_multi_index():
|
||||
index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
|
||||
s1 = pd.Series(["a", "b", "c"], index=index)
|
||||
s2 = pd.Series(["x", "b", "z"], index=index)
|
||||
|
||||
result = s1.compare(s2, align_axis=0)
|
||||
|
||||
indices = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
|
||||
)
|
||||
expected = pd.Series(["a", "x", "c", "z"], index=indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_unaligned_objects():
|
||||
# test Series with different indices
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
|
||||
ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
|
||||
ser1.compare(ser2)
|
||||
|
||||
# test Series with different lengths
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser1 = pd.Series([1, 2, 3])
|
||||
ser2 = pd.Series([1, 2, 3, 4])
|
||||
ser1.compare(ser2)
|
||||
|
||||
|
||||
def test_compare_datetime64_and_string():
|
||||
# Issue https://github.com/pandas-dev/pandas/issues/45506
|
||||
# Catch OverflowError when comparing datetime64 and string
|
||||
data = [
|
||||
{"a": "2015-07-01", "b": "08335394550"},
|
||||
{"a": "2015-07-02", "b": "+49 (0) 0345 300033"},
|
||||
{"a": "2015-07-03", "b": "+49(0)2598 04457"},
|
||||
{"a": "2015-07-04", "b": "0741470003"},
|
||||
{"a": "2015-07-05", "b": "04181 83668"},
|
||||
]
|
||||
dtypes = {"a": "datetime64[ns]", "b": "string"}
|
||||
df = pd.DataFrame(data=data).astype(dtypes)
|
||||
|
||||
result_eq1 = df["a"].eq(df["b"])
|
||||
result_eq2 = df["a"] == df["b"]
|
||||
result_neq = df["a"] != df["b"]
|
||||
|
||||
expected_eq = pd.Series([False] * 5) # For .eq and ==
|
||||
expected_neq = pd.Series([True] * 5) # For !=
|
||||
|
||||
tm.assert_series_equal(result_eq1, expected_eq)
|
||||
tm.assert_series_equal(result_eq2, expected_eq)
|
||||
tm.assert_series_equal(result_neq, expected_neq)
|
@ -0,0 +1,306 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import lib
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
# Each test case consists of a tuple with the data and dtype to create the
|
||||
# test Series, the default dtype for the expected result (which is valid
|
||||
# for most cases), and the specific cases where the result deviates from
|
||||
# this default. Those overrides are defined as a dict with (keyword, val) as
|
||||
# dictionary key. In case of multiple items, the last override takes precedence.
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(
|
||||
# data
|
||||
[1, 2, 3],
|
||||
# original dtype
|
||||
np.dtype("int32"),
|
||||
# default expected dtype
|
||||
"Int32",
|
||||
# exceptions on expected dtype
|
||||
{("convert_integer", False): np.dtype("int32")},
|
||||
),
|
||||
(
|
||||
[1, 2, 3],
|
||||
np.dtype("int64"),
|
||||
"Int64",
|
||||
{("convert_integer", False): np.dtype("int64")},
|
||||
),
|
||||
(
|
||||
["x", "y", "z"],
|
||||
np.dtype("O"),
|
||||
pd.StringDtype(),
|
||||
{("convert_string", False): np.dtype("O")},
|
||||
),
|
||||
(
|
||||
[True, False, np.nan],
|
||||
np.dtype("O"),
|
||||
pd.BooleanDtype(),
|
||||
{("convert_boolean", False): np.dtype("O")},
|
||||
),
|
||||
(
|
||||
["h", "i", np.nan],
|
||||
np.dtype("O"),
|
||||
pd.StringDtype(),
|
||||
{("convert_string", False): np.dtype("O")},
|
||||
),
|
||||
( # GH32117
|
||||
["h", "i", 1],
|
||||
np.dtype("O"),
|
||||
np.dtype("O"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
[10, np.nan, 20],
|
||||
np.dtype("float"),
|
||||
"Int64",
|
||||
{
|
||||
("convert_integer", False, "convert_floating", True): "Float64",
|
||||
("convert_integer", False, "convert_floating", False): np.dtype(
|
||||
"float"
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
[np.nan, 100.5, 200],
|
||||
np.dtype("float"),
|
||||
"Float64",
|
||||
{("convert_floating", False): np.dtype("float")},
|
||||
),
|
||||
(
|
||||
[3, 4, 5],
|
||||
"Int8",
|
||||
"Int8",
|
||||
{},
|
||||
),
|
||||
(
|
||||
[[1, 2], [3, 4], [5]],
|
||||
None,
|
||||
np.dtype("O"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
[4, 5, 6],
|
||||
np.dtype("uint32"),
|
||||
"UInt32",
|
||||
{("convert_integer", False): np.dtype("uint32")},
|
||||
),
|
||||
(
|
||||
[-10, 12, 13],
|
||||
np.dtype("i1"),
|
||||
"Int8",
|
||||
{("convert_integer", False): np.dtype("i1")},
|
||||
),
|
||||
(
|
||||
[1.2, 1.3],
|
||||
np.dtype("float32"),
|
||||
"Float32",
|
||||
{("convert_floating", False): np.dtype("float32")},
|
||||
),
|
||||
(
|
||||
[1, 2.0],
|
||||
object,
|
||||
"Int64",
|
||||
{
|
||||
("convert_integer", False): "Float64",
|
||||
("convert_integer", False, "convert_floating", False): np.dtype(
|
||||
"float"
|
||||
),
|
||||
("infer_objects", False): np.dtype("object"),
|
||||
},
|
||||
),
|
||||
(
|
||||
[1, 2.5],
|
||||
object,
|
||||
"Float64",
|
||||
{
|
||||
("convert_floating", False): np.dtype("float"),
|
||||
("infer_objects", False): np.dtype("object"),
|
||||
},
|
||||
),
|
||||
(["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("s"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ms"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("us"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
|
||||
"datetime64[ns]",
|
||||
np.dtype("datetime64[ns]"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
|
||||
object,
|
||||
np.dtype("datetime64[ns]"),
|
||||
{("infer_objects", False): np.dtype("object")},
|
||||
),
|
||||
(
|
||||
pd.period_range("1/1/2011", freq="M", periods=3),
|
||||
None,
|
||||
pd.PeriodDtype("M"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
|
||||
None,
|
||||
pd.IntervalDtype("int64", "right"),
|
||||
{},
|
||||
),
|
||||
]
|
||||
)
|
||||
def test_cases(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSeriesConvertDtypes:
|
||||
@pytest.mark.parametrize("params", product(*[(True, False)] * 5))
|
||||
def test_convert_dtypes(
|
||||
self,
|
||||
test_cases,
|
||||
params,
|
||||
using_infer_string,
|
||||
):
|
||||
data, maindtype, expected_default, expected_other = test_cases
|
||||
if (
|
||||
hasattr(data, "dtype")
|
||||
and lib.is_np_dtype(data.dtype, "M")
|
||||
and isinstance(maindtype, pd.DatetimeTZDtype)
|
||||
):
|
||||
# this astype is deprecated in favor of tz_localize
|
||||
msg = "Cannot use .astype to convert from timezone-naive dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(data, dtype=maindtype)
|
||||
return
|
||||
|
||||
if maindtype is not None:
|
||||
series = pd.Series(data, dtype=maindtype)
|
||||
else:
|
||||
series = pd.Series(data)
|
||||
|
||||
result = series.convert_dtypes(*params)
|
||||
|
||||
param_names = [
|
||||
"infer_objects",
|
||||
"convert_string",
|
||||
"convert_integer",
|
||||
"convert_boolean",
|
||||
"convert_floating",
|
||||
]
|
||||
params_dict = dict(zip(param_names, params))
|
||||
|
||||
expected_dtype = expected_default
|
||||
for spec, dtype in expected_other.items():
|
||||
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
|
||||
expected_dtype = dtype
|
||||
if (
|
||||
using_infer_string
|
||||
and expected_default == "string"
|
||||
and expected_dtype == object
|
||||
and params[0]
|
||||
and not params[1]
|
||||
):
|
||||
# If we would convert with convert strings then infer_objects converts
|
||||
# with the option
|
||||
expected_dtype = "string[pyarrow_numpy]"
|
||||
|
||||
expected = pd.Series(data, dtype=expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Test that it is a copy
|
||||
copy = series.copy(deep=True)
|
||||
|
||||
if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]:
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
result[result.notna()] = np.nan
|
||||
else:
|
||||
result[result.notna()] = np.nan
|
||||
|
||||
# Make sure original not changed
|
||||
tm.assert_series_equal(series, copy)
|
||||
|
||||
def test_convert_string_dtype(self, nullable_string_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
|
||||
# that are already string dtype
|
||||
df = pd.DataFrame(
|
||||
{"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype
|
||||
)
|
||||
result = df.convert_dtypes()
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_convert_bool_dtype(self):
|
||||
# GH32287
|
||||
df = pd.DataFrame({"A": pd.array([True])})
|
||||
tm.assert_frame_equal(df, df.convert_dtypes())
|
||||
|
||||
def test_convert_byte_string_dtype(self):
|
||||
# GH-43183
|
||||
byte_str = b"binary-string"
|
||||
|
||||
df = pd.DataFrame(data={"A": byte_str}, index=[0])
|
||||
result = df.convert_dtypes()
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_objects, dtype", [(True, "Int64"), (False, "object")]
|
||||
)
|
||||
def test_convert_dtype_object_with_na(self, infer_objects, dtype):
|
||||
# GH#48791
|
||||
ser = pd.Series([1, pd.NA])
|
||||
result = ser.convert_dtypes(infer_objects=infer_objects)
|
||||
expected = pd.Series([1, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_objects, dtype", [(True, "Float64"), (False, "object")]
|
||||
)
|
||||
def test_convert_dtype_object_with_na_float(self, infer_objects, dtype):
|
||||
# GH#48791
|
||||
ser = pd.Series([1.5, pd.NA])
|
||||
result = ser.convert_dtypes(infer_objects=infer_objects)
|
||||
expected = pd.Series([1.5, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_dtypes_pyarrow_to_np_nullable(self):
|
||||
# GH 53648
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = pd.Series(range(2), dtype="int32[pyarrow]")
|
||||
result = ser.convert_dtypes(dtype_backend="numpy_nullable")
|
||||
expected = pd.Series(range(2), dtype="Int32")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_dtypes_pyarrow_null(self):
|
||||
# GH#55346
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
ser = pd.Series([None, None])
|
||||
result = ser.convert_dtypes(dtype_backend="pyarrow")
|
||||
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,91 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCopy:
|
||||
@pytest.mark.parametrize("deep", ["default", None, False, True])
|
||||
def test_copy(self, deep, using_copy_on_write, warn_copy_on_write):
|
||||
ser = Series(np.arange(10), dtype="float64")
|
||||
|
||||
# default deep is True
|
||||
if deep == "default":
|
||||
ser2 = ser.copy()
|
||||
else:
|
||||
ser2 = ser.copy(deep=deep)
|
||||
|
||||
if using_copy_on_write:
|
||||
# INFO(CoW) a shallow copy doesn't yet copy the data
|
||||
# but parent will not be modified (CoW)
|
||||
if deep is None or deep is False:
|
||||
assert np.may_share_memory(ser.values, ser2.values)
|
||||
else:
|
||||
assert not np.may_share_memory(ser.values, ser2.values)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write and deep is False):
|
||||
ser2[::2] = np.nan
|
||||
|
||||
if deep is not False or using_copy_on_write:
|
||||
# Did not modify original Series
|
||||
assert np.isnan(ser2[0])
|
||||
assert not np.isnan(ser[0])
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert np.isnan(ser2[0])
|
||||
assert np.isnan(ser[0])
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
@pytest.mark.parametrize("deep", ["default", None, False, True])
|
||||
def test_copy_tzaware(self, deep, using_copy_on_write):
|
||||
# GH#11794
|
||||
# copy of tz-aware
|
||||
expected = Series([Timestamp("2012/01/01", tz="UTC")])
|
||||
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
|
||||
|
||||
ser = Series([Timestamp("2012/01/01", tz="UTC")])
|
||||
|
||||
if deep == "default":
|
||||
ser2 = ser.copy()
|
||||
else:
|
||||
ser2 = ser.copy(deep=deep)
|
||||
|
||||
if using_copy_on_write:
|
||||
# INFO(CoW) a shallow copy doesn't yet copy the data
|
||||
# but parent will not be modified (CoW)
|
||||
if deep is None or deep is False:
|
||||
assert np.may_share_memory(ser.values, ser2.values)
|
||||
else:
|
||||
assert not np.may_share_memory(ser.values, ser2.values)
|
||||
|
||||
ser2[0] = Timestamp("1999/01/01", tz="UTC")
|
||||
|
||||
# default deep is True
|
||||
if deep is not False or using_copy_on_write:
|
||||
# Did not modify original Series
|
||||
tm.assert_series_equal(ser2, expected2)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
tm.assert_series_equal(ser2, expected2)
|
||||
tm.assert_series_equal(ser, expected2)
|
||||
|
||||
def test_copy_name(self, datetime_series):
|
||||
result = datetime_series.copy()
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
def test_copy_index_name_checking(self, datetime_series):
|
||||
# don't want to be able to modify the index stored elsewhere after
|
||||
# making a copy
|
||||
|
||||
datetime_series.index.name = None
|
||||
assert datetime_series.index.name is None
|
||||
assert datetime_series is datetime_series
|
||||
|
||||
cp = datetime_series.copy()
|
||||
cp.index.name = "foo"
|
||||
assert datetime_series.index.name is None
|
@ -0,0 +1,34 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesCount:
|
||||
def test_count(self, datetime_series):
|
||||
assert datetime_series.count() == len(datetime_series)
|
||||
|
||||
datetime_series[::2] = np.nan
|
||||
|
||||
assert datetime_series.count() == np.isfinite(datetime_series).sum()
|
||||
|
||||
def test_count_inf_as_na(self):
|
||||
# GH#29478
|
||||
ser = Series([pd.Timestamp("1990/1/1")])
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("use_inf_as_na", True):
|
||||
assert ser.count() == 1
|
||||
|
||||
def test_count_categorical(self):
|
||||
ser = Series(
|
||||
Categorical(
|
||||
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
|
||||
)
|
||||
)
|
||||
result = ser.count()
|
||||
assert result == 2
|
@ -0,0 +1,185 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesCov:
|
||||
def test_cov(self, datetime_series):
|
||||
# full overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series.cov(datetime_series), datetime_series.std() ** 2
|
||||
)
|
||||
|
||||
# partial overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series[:15].cov(datetime_series[5:]),
|
||||
datetime_series[5:15].std() ** 2,
|
||||
)
|
||||
|
||||
# No overlap
|
||||
assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))
|
||||
|
||||
# all NA
|
||||
cp = datetime_series[:10].copy()
|
||||
cp[:] = np.nan
|
||||
assert isna(cp.cov(cp))
|
||||
|
||||
# min_periods
|
||||
assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12))
|
||||
|
||||
ts1 = datetime_series[:15].reindex(datetime_series.index)
|
||||
ts2 = datetime_series[5:].reindex(datetime_series.index)
|
||||
assert isna(ts1.cov(ts2, min_periods=12))
|
||||
|
||||
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_cov_ddof(self, test_ddof, dtype):
|
||||
# GH#34611
|
||||
np_array1 = np.random.default_rng(2).random(10)
|
||||
np_array2 = np.random.default_rng(2).random(10)
|
||||
|
||||
s1 = Series(np_array1, dtype=dtype)
|
||||
s2 = Series(np_array2, dtype=dtype)
|
||||
|
||||
result = s1.cov(s2, ddof=test_ddof)
|
||||
expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1]
|
||||
assert math.isclose(expected, result)
|
||||
|
||||
|
||||
class TestSeriesCorr:
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_corr(self, datetime_series, dtype):
|
||||
stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
datetime_series = datetime_series.astype(dtype)
|
||||
|
||||
# full overlap
|
||||
tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)
|
||||
|
||||
# partial overlap
|
||||
tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1)
|
||||
|
||||
assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12))
|
||||
|
||||
ts1 = datetime_series[:15].reindex(datetime_series.index)
|
||||
ts2 = datetime_series[5:].reindex(datetime_series.index)
|
||||
assert isna(ts1.corr(ts2, min_periods=12))
|
||||
|
||||
# No overlap
|
||||
assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))
|
||||
|
||||
# all NA
|
||||
cp = datetime_series[:10].copy()
|
||||
cp[:] = np.nan
|
||||
assert isna(cp.corr(cp))
|
||||
|
||||
A = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
)
|
||||
B = A.copy()
|
||||
result = A.corr(B)
|
||||
expected, _ = stats.pearsonr(A, B)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_corr_rank(self):
|
||||
stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
# kendall and spearman
|
||||
A = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
)
|
||||
B = A.copy()
|
||||
A[-5:] = A[:5].copy()
|
||||
result = A.corr(B, method="kendall")
|
||||
expected = stats.kendalltau(A, B)[0]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
result = A.corr(B, method="spearman")
|
||||
expected = stats.spearmanr(A, B)[0]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# results from R
|
||||
A = Series(
|
||||
[
|
||||
-0.89926396,
|
||||
0.94209606,
|
||||
-1.03289164,
|
||||
-0.95445587,
|
||||
0.76910310,
|
||||
-0.06430576,
|
||||
-2.09704447,
|
||||
0.40660407,
|
||||
-0.89926396,
|
||||
0.94209606,
|
||||
]
|
||||
)
|
||||
B = Series(
|
||||
[
|
||||
-1.01270225,
|
||||
-0.62210117,
|
||||
-1.56895827,
|
||||
0.59592943,
|
||||
-0.01680292,
|
||||
1.17258718,
|
||||
-1.06009347,
|
||||
-0.10222060,
|
||||
-0.89076239,
|
||||
0.89372375,
|
||||
]
|
||||
)
|
||||
kexp = 0.4319297
|
||||
sexp = 0.5853767
|
||||
tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
|
||||
tm.assert_almost_equal(A.corr(B, method="spearman"), sexp)
|
||||
|
||||
def test_corr_invalid_method(self):
|
||||
# GH PR #22298
|
||||
s1 = Series(np.random.default_rng(2).standard_normal(10))
|
||||
s2 = Series(np.random.default_rng(2).standard_normal(10))
|
||||
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s1.corr(s2, method="____")
|
||||
|
||||
def test_corr_callable_method(self, datetime_series):
|
||||
# simple correlation example
|
||||
# returns 1 if exact equality, 0 otherwise
|
||||
my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0
|
||||
|
||||
# simple example
|
||||
s1 = Series([1, 2, 3, 4, 5])
|
||||
s2 = Series([5, 4, 3, 2, 1])
|
||||
expected = 0
|
||||
tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected)
|
||||
|
||||
# full overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series.corr(datetime_series, method=my_corr), 1.0
|
||||
)
|
||||
|
||||
# partial overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0
|
||||
)
|
||||
|
||||
# No overlap
|
||||
assert np.isnan(
|
||||
datetime_series[::2].corr(datetime_series[1::2], method=my_corr)
|
||||
)
|
||||
|
||||
# dataframe example
|
||||
df = pd.DataFrame([s1, s2])
|
||||
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
|
||||
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
|
@ -0,0 +1,203 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_complex_dtype,
|
||||
is_extension_array_dtype,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Period,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDescribe:
|
||||
def test_describe_ints(self):
|
||||
ser = Series([0, 1, 2, 3, 4], name="int_data")
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, 2, ser.std(), 0, 1, 2, 3, 4],
|
||||
name="int_data",
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_bools(self):
|
||||
ser = Series([True, True, False, False, False], name="bool_data")
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_strs(self):
|
||||
ser = Series(["a", "a", "b", "c", "d"], name="str_data")
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_timedelta64(self):
|
||||
ser = Series(
|
||||
[
|
||||
Timedelta("1 days"),
|
||||
Timedelta("2 days"),
|
||||
Timedelta("3 days"),
|
||||
Timedelta("4 days"),
|
||||
Timedelta("5 days"),
|
||||
],
|
||||
name="timedelta_data",
|
||||
)
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]],
|
||||
name="timedelta_data",
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_period(self):
|
||||
ser = Series(
|
||||
[Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")],
|
||||
name="period_data",
|
||||
)
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[3, 2, ser[0], 2],
|
||||
name="period_data",
|
||||
index=["count", "unique", "top", "freq"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_empty_object(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/27183
|
||||
s = Series([None, None], dtype=object)
|
||||
result = s.describe()
|
||||
expected = Series(
|
||||
[0, 0, np.nan, np.nan],
|
||||
dtype=object,
|
||||
index=["count", "unique", "top", "freq"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s[:0].describe()
|
||||
tm.assert_series_equal(result, expected)
|
||||
# ensure NaN, not None
|
||||
assert np.isnan(result.iloc[2])
|
||||
assert np.isnan(result.iloc[3])
|
||||
|
||||
def test_describe_with_tz(self, tz_naive_fixture):
|
||||
# GH 21332
|
||||
tz = tz_naive_fixture
|
||||
name = str(tz_naive_fixture)
|
||||
start = Timestamp(2018, 1, 1)
|
||||
end = Timestamp(2018, 1, 5)
|
||||
s = Series(date_range(start, end, tz=tz), name=name)
|
||||
result = s.describe()
|
||||
expected = Series(
|
||||
[
|
||||
5,
|
||||
Timestamp(2018, 1, 3).tz_localize(tz),
|
||||
start.tz_localize(tz),
|
||||
s[1],
|
||||
s[2],
|
||||
s[3],
|
||||
end.tz_localize(tz),
|
||||
],
|
||||
name=name,
|
||||
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_with_tz_numeric(self):
|
||||
name = tz = "CET"
|
||||
start = Timestamp(2018, 1, 1)
|
||||
end = Timestamp(2018, 1, 5)
|
||||
s = Series(date_range(start, end, tz=tz), name=name)
|
||||
|
||||
result = s.describe()
|
||||
|
||||
expected = Series(
|
||||
[
|
||||
5,
|
||||
Timestamp("2018-01-03 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-02 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-03 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-04 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-05 00:00:00", tz=tz),
|
||||
],
|
||||
name=name,
|
||||
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_datetime_is_numeric_includes_datetime(self):
|
||||
s = Series(date_range("2012", periods=3))
|
||||
result = s.describe()
|
||||
expected = Series(
|
||||
[
|
||||
3,
|
||||
Timestamp("2012-01-02"),
|
||||
Timestamp("2012-01-01"),
|
||||
Timestamp("2012-01-01T12:00:00"),
|
||||
Timestamp("2012-01-02"),
|
||||
Timestamp("2012-01-02T12:00:00"),
|
||||
Timestamp("2012-01-03"),
|
||||
],
|
||||
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Casting complex values to real discards")
|
||||
def test_numeric_result_dtype(self, any_numeric_dtype):
|
||||
# GH#48340 - describe should always return float on non-complex numeric input
|
||||
if is_extension_array_dtype(any_numeric_dtype):
|
||||
dtype = "Float64"
|
||||
else:
|
||||
dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
|
||||
|
||||
ser = Series([0, 1], dtype=any_numeric_dtype)
|
||||
if dtype == "complex128" and np_version_gte1p25:
|
||||
with pytest.raises(
|
||||
TypeError, match=r"^a must be an array of real numbers$"
|
||||
):
|
||||
ser.describe()
|
||||
return
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[
|
||||
2.0,
|
||||
0.5,
|
||||
ser.std(),
|
||||
0,
|
||||
0.25,
|
||||
0.5,
|
||||
0.75,
|
||||
1.0,
|
||||
],
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
dtype=dtype,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_one_element_ea(self):
|
||||
# GH#52515
|
||||
ser = Series([0.0], dtype="Float64")
|
||||
with tm.assert_produces_warning(None):
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[1, 0, NA, 0, 0, 0, 0, 0],
|
||||
dtype="Float64",
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,88 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDiff:
|
||||
def test_diff_np(self):
|
||||
# TODO(__array_function__): could make np.diff return a Series
|
||||
# matching ser.diff()
|
||||
|
||||
ser = Series(np.arange(5))
|
||||
|
||||
res = np.diff(ser)
|
||||
expected = np.array([1, 1, 1, 1])
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
def test_diff_int(self):
|
||||
# int dtype
|
||||
a = 10000000000000000
|
||||
b = a + 1
|
||||
ser = Series([a, b])
|
||||
|
||||
result = ser.diff()
|
||||
assert result[1] == 1
|
||||
|
||||
def test_diff_tz(self):
|
||||
# Combined datetime diff, normal diff and boolean diff test
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
)
|
||||
ts.diff()
|
||||
|
||||
# neg n
|
||||
result = ts.diff(-1)
|
||||
expected = ts - ts.shift(-1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0
|
||||
result = ts.diff(0)
|
||||
expected = ts - ts
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_diff_dt64(self):
|
||||
# datetime diff (GH#3100)
|
||||
ser = Series(date_range("20130102", periods=5))
|
||||
result = ser.diff()
|
||||
expected = ser - ser.shift(1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# timedelta diff
|
||||
result = result - result.shift(1) # previous result
|
||||
expected = expected.diff() # previously expected
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_diff_dt64tz(self):
|
||||
# with tz
|
||||
ser = Series(
|
||||
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
|
||||
)
|
||||
result = ser.diff()
|
||||
expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input,output,diff",
|
||||
[([False, True, True, False, False], [np.nan, True, False, True, False], 1)],
|
||||
)
|
||||
def test_diff_bool(self, input, output, diff):
|
||||
# boolean series (test for fixing #17294)
|
||||
ser = Series(input)
|
||||
result = ser.diff()
|
||||
expected = Series(output)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_diff_object_dtype(self):
|
||||
# object series
|
||||
ser = Series([False, True, 5.0, np.nan, True, False])
|
||||
result = ser.diff()
|
||||
expected = ser - ser.shift(1)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,99 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_bool_dtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, expected_data, expected_index",
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
|
||||
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
|
||||
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
|
||||
],
|
||||
)
|
||||
def test_drop_unique_and_non_unique_index(
|
||||
data, index, axis, drop_labels, expected_data, expected_index
|
||||
):
|
||||
ser = Series(data=data, index=index)
|
||||
result = ser.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, error_type, error_desc",
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
|
||||
# bad axis
|
||||
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
|
||||
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
|
||||
],
|
||||
)
|
||||
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
|
||||
ser = Series(data, index=index)
|
||||
with pytest.raises(error_type, match=error_desc):
|
||||
ser.drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
ser = Series(range(3), index=list("abc"))
|
||||
result = ser.drop("bc", errors="ignore")
|
||||
tm.assert_series_equal(result, ser)
|
||||
result = ser.drop(["a", "d"], errors="ignore")
|
||||
expected = ser.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
ser = Series([2, 3], index=[True, False])
|
||||
assert is_bool_dtype(ser.index)
|
||||
assert ser.index.dtype == bool
|
||||
result = ser.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = Series(index=index, dtype=object).drop(drop_labels)
|
||||
expected = Series(index=expected_index, dtype=object)
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels",
|
||||
[
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True]),
|
||||
],
|
||||
)
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
dtype = object if data is None else None
|
||||
ser = Series(data=data, index=index, dtype=dtype)
|
||||
with pytest.raises(KeyError, match="not found in axis"):
|
||||
ser.drop(drop_labels)
|
||||
|
||||
|
||||
def test_drop_index_ea_dtype(any_numeric_ea_dtype):
|
||||
# GH#45860
|
||||
df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype))
|
||||
idx = Index([df.index[1]])
|
||||
result = df.drop(idx)
|
||||
expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,267 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, False, False, True, True, False])),
|
||||
("last", Series([False, True, True, False, False, False, False])),
|
||||
(False, Series([False, True, True, False, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(any_numpy_dtype, keep, expected):
|
||||
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
|
||||
|
||||
if tc.dtype == "bool":
|
||||
pytest.skip("tested separately in test_drop_duplicates_bool")
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep=keep, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, True])),
|
||||
("last", Series([True, True, False, False])),
|
||||
(False, Series([True, True, True, True])),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates_bool(keep, expected):
|
||||
tc = Series([True, False, True, False])
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep=keep, inplace=True)
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
assert return_value is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[], list(range(5))])
|
||||
def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
|
||||
tc = Series(values, dtype=np.dtype(any_numpy_dtype))
|
||||
expected = Series([False] * len(tc), dtype="bool")
|
||||
|
||||
if tc.dtype == "bool":
|
||||
# 0 -> False and 1-> True
|
||||
# any other value would be duplicated
|
||||
tc = tc[:2]
|
||||
expected = expected[:2]
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
|
||||
result_dropped = tc.drop_duplicates(keep=keep)
|
||||
tm.assert_series_equal(result_dropped, tc)
|
||||
|
||||
# validate shallow copy
|
||||
assert result_dropped is not tc
|
||||
|
||||
|
||||
class TestSeriesDropDuplicates:
|
||||
@pytest.fixture(
|
||||
params=["int_", "uint", "float64", "str_", "timedelta64[h]", "datetime64[D]"]
|
||||
)
|
||||
def dtype(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def cat_series_unused_category(self, dtype, ordered):
|
||||
# Test case 1
|
||||
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
|
||||
|
||||
input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype))
|
||||
cat = Categorical(input1, categories=cat_array, ordered=ordered)
|
||||
tc1 = Series(cat)
|
||||
return tc1
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category):
|
||||
tc1 = cat_series_unused_category
|
||||
|
||||
expected = Series([False, False, False, True])
|
||||
|
||||
result = tc1.duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc1.drop_duplicates()
|
||||
tm.assert_series_equal(result, tc1[~expected])
|
||||
|
||||
sc = tc1.copy()
|
||||
return_value = sc.drop_duplicates(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc1[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool_keeplast(
|
||||
self, cat_series_unused_category
|
||||
):
|
||||
tc1 = cat_series_unused_category
|
||||
|
||||
expected = Series([False, False, True, False])
|
||||
|
||||
result = tc1.duplicated(keep="last")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc1.drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(result, tc1[~expected])
|
||||
|
||||
sc = tc1.copy()
|
||||
return_value = sc.drop_duplicates(keep="last", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc1[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool_keepfalse(
|
||||
self, cat_series_unused_category
|
||||
):
|
||||
tc1 = cat_series_unused_category
|
||||
|
||||
expected = Series([False, False, True, True])
|
||||
|
||||
result = tc1.duplicated(keep=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc1.drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(result, tc1[~expected])
|
||||
|
||||
sc = tc1.copy()
|
||||
return_value = sc.drop_duplicates(keep=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc1[~expected])
|
||||
|
||||
@pytest.fixture
|
||||
def cat_series(self, dtype, ordered):
|
||||
# no unused categories, unlike cat_series_unused_category
|
||||
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
|
||||
|
||||
input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype))
|
||||
cat = Categorical(input2, categories=cat_array, ordered=ordered)
|
||||
tc2 = Series(cat)
|
||||
return tc2
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool2(self, cat_series):
|
||||
tc2 = cat_series
|
||||
|
||||
expected = Series([False, False, False, False, True, True, False])
|
||||
|
||||
result = tc2.duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc2.drop_duplicates()
|
||||
tm.assert_series_equal(result, tc2[~expected])
|
||||
|
||||
sc = tc2.copy()
|
||||
return_value = sc.drop_duplicates(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc2[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series):
|
||||
tc2 = cat_series
|
||||
|
||||
expected = Series([False, True, True, False, False, False, False])
|
||||
|
||||
result = tc2.duplicated(keep="last")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc2.drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(result, tc2[~expected])
|
||||
|
||||
sc = tc2.copy()
|
||||
return_value = sc.drop_duplicates(keep="last", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc2[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series):
|
||||
tc2 = cat_series
|
||||
|
||||
expected = Series([False, True, True, False, True, True, False])
|
||||
|
||||
result = tc2.duplicated(keep=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc2.drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(result, tc2[~expected])
|
||||
|
||||
sc = tc2.copy()
|
||||
return_value = sc.drop_duplicates(keep=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc2[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_bool(self, ordered):
|
||||
tc = Series(
|
||||
Categorical(
|
||||
[True, False, True, False], categories=[True, False], ordered=ordered
|
||||
)
|
||||
)
|
||||
|
||||
expected = Series([False, False, True, True])
|
||||
tm.assert_series_equal(tc.duplicated(), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
expected = Series([True, True, False, False])
|
||||
tm.assert_series_equal(tc.duplicated(keep="last"), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep="last", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
expected = Series([True, True, True, True])
|
||||
tm.assert_series_equal(tc.duplicated(keep=False), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_bool_na(self, nulls_fixture):
|
||||
# GH#44351
|
||||
ser = Series(
|
||||
Categorical(
|
||||
[True, False, True, False, nulls_fixture],
|
||||
categories=[True, False],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
result = ser.drop_duplicates()
|
||||
expected = Series(
|
||||
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
|
||||
index=[0, 1, 4],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_drop_duplicates_ignore_index(self):
|
||||
# GH#48304
|
||||
ser = Series([1, 2, 2, 3])
|
||||
result = ser.drop_duplicates(ignore_index=True)
|
||||
expected = Series([1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_duplicated_arrow_dtype(self):
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = Series([True, False, None, False], dtype="bool[pyarrow]")
|
||||
result = ser.drop_duplicates()
|
||||
expected = Series([True, False, None], dtype="bool[pyarrow]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_drop_duplicates_arrow_strings(self):
|
||||
# GH#54904
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string()))
|
||||
result = ser.drop_duplicates()
|
||||
expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string()))
|
||||
tm.assert_series_equal(result, expecetd)
|
@ -0,0 +1,117 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDropna:
|
||||
def test_dropna_empty(self):
|
||||
ser = Series([], dtype=object)
|
||||
|
||||
assert len(ser.dropna()) == 0
|
||||
return_value = ser.dropna(inplace=True)
|
||||
assert return_value is None
|
||||
assert len(ser) == 0
|
||||
|
||||
# invalid axis
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.dropna(axis=1)
|
||||
|
||||
def test_dropna_preserve_name(self, datetime_series):
|
||||
datetime_series[:5] = np.nan
|
||||
result = datetime_series.dropna()
|
||||
assert result.name == datetime_series.name
|
||||
name = datetime_series.name
|
||||
ts = datetime_series.copy()
|
||||
return_value = ts.dropna(inplace=True)
|
||||
assert return_value is None
|
||||
assert ts.name == name
|
||||
|
||||
def test_dropna_no_nan(self):
|
||||
for ser in [
|
||||
Series([1, 2, 3], name="x"),
|
||||
Series([False, True, False], name="x"),
|
||||
]:
|
||||
result = ser.dropna()
|
||||
tm.assert_series_equal(result, ser)
|
||||
assert result is not ser
|
||||
|
||||
s2 = ser.copy()
|
||||
return_value = s2.dropna(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(s2, ser)
|
||||
|
||||
def test_dropna_intervals(self):
|
||||
ser = Series(
|
||||
[np.nan, 1, 2, 3],
|
||||
IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
|
||||
)
|
||||
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_period_dtype(self):
|
||||
# GH#13737
|
||||
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
|
||||
result = ser.dropna()
|
||||
expected = Series([Period("2011-01", freq="M")])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_datetime64_tz_dropna(self, unit):
|
||||
# DatetimeLikeBlock
|
||||
ser = Series(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
NaT,
|
||||
Timestamp("2011-01-03 10:00"),
|
||||
NaT,
|
||||
],
|
||||
dtype=f"M8[{unit}]",
|
||||
)
|
||||
result = ser.dropna()
|
||||
expected = Series(
|
||||
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")],
|
||||
index=[0, 2],
|
||||
dtype=f"M8[{unit}]",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# DatetimeTZBlock
|
||||
idx = DatetimeIndex(
|
||||
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo"
|
||||
).as_unit(unit)
|
||||
ser = Series(idx)
|
||||
assert ser.dtype == f"datetime64[{unit}, Asia/Tokyo]"
|
||||
result = ser.dropna()
|
||||
expected = Series(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"),
|
||||
],
|
||||
index=[0, 2],
|
||||
dtype=f"datetime64[{unit}, Asia/Tokyo]",
|
||||
)
|
||||
assert result.dtype == f"datetime64[{unit}, Asia/Tokyo]"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_dropna_ignore_index(self, val):
|
||||
# GH#31725
|
||||
ser = Series([1, 2, val], index=[3, 2, 1])
|
||||
result = ser.dropna(ignore_index=True)
|
||||
expected = Series([1, 2, val])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser.dropna(ignore_index=True, inplace=True)
|
||||
tm.assert_series_equal(ser, expected)
|
@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestSeriesDtypes:
|
||||
def test_dtype(self, datetime_series):
|
||||
assert datetime_series.dtype == np.dtype("float64")
|
||||
assert datetime_series.dtypes == np.dtype("float64")
|
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, False, True], name="name")),
|
||||
("last", Series([True, True, False, False, False], name="name")),
|
||||
(False, Series([True, True, True, False, True], name="name")),
|
||||
],
|
||||
)
|
||||
def test_duplicated_keep(keep, expected):
|
||||
ser = Series(["a", "b", "b", "c", "a"], name="name")
|
||||
|
||||
result = ser.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, False, True])),
|
||||
("last", Series([True, True, False, False, False])),
|
||||
(False, Series([True, True, True, False, True])),
|
||||
],
|
||||
)
|
||||
def test_duplicated_nan_none(keep, expected):
|
||||
ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
|
||||
|
||||
result = ser.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicated_categorical_bool_na(nulls_fixture):
|
||||
# GH#44351
|
||||
ser = Series(
|
||||
Categorical(
|
||||
[True, False, True, False, nulls_fixture],
|
||||
categories=[True, False],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
result = ser.duplicated()
|
||||
expected = Series([False, False, True, True, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, vals",
|
||||
[
|
||||
("last", [True, True, False]),
|
||||
("first", [False, True, True]),
|
||||
(False, [True, True, True]),
|
||||
],
|
||||
)
|
||||
def test_duplicated_mask(keep, vals):
|
||||
# GH#48150
|
||||
ser = Series([1, 2, NA, NA, NA], dtype="Int64")
|
||||
result = ser.duplicated(keep=keep)
|
||||
expected = Series([False, False] + vals)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicated_mask_no_duplicated_na(keep):
|
||||
# GH#48150
|
||||
ser = Series([1, 2, NA], dtype="Int64")
|
||||
result = ser.duplicated(keep=keep)
|
||||
expected = Series([False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,145 @@
|
||||
from contextlib import nullcontext
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.missing import is_matching_na
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
from pandas.core.dtypes.common import is_float
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr, idx",
|
||||
[
|
||||
([1, 2, 3, 4], [0, 2, 1, 3]),
|
||||
([1, np.nan, 3, np.nan], [0, 2, 1, 3]),
|
||||
(
|
||||
[1, np.nan, 3, np.nan],
|
||||
MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_equals(arr, idx):
|
||||
s1 = Series(arr, index=idx)
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
s1[1] = 9
|
||||
assert not s1.equals(s2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
|
||||
)
|
||||
def test_equals_list_array(val):
|
||||
# GH20676 Verify equals operator for list of Numpy arrays
|
||||
arr = np.array([1, 2])
|
||||
s1 = Series([arr, arr])
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
s1[1] = val
|
||||
|
||||
cm = (
|
||||
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
|
||||
if isinstance(val, str) and not np_version_gte1p25
|
||||
else nullcontext()
|
||||
)
|
||||
with cm:
|
||||
assert not s1.equals(s2)
|
||||
|
||||
|
||||
def test_equals_false_negative():
|
||||
# GH8437 Verify false negative behavior of equals function for dtype object
|
||||
arr = [False, np.nan]
|
||||
s1 = Series(arr)
|
||||
s2 = s1.copy()
|
||||
s3 = Series(index=range(2), dtype=object)
|
||||
s4 = s3.copy()
|
||||
s5 = s3.copy()
|
||||
s6 = s3.copy()
|
||||
|
||||
s3[:-1] = s4[:-1] = s5[0] = s6[0] = False
|
||||
assert s1.equals(s1)
|
||||
assert s1.equals(s2)
|
||||
assert s1.equals(s3)
|
||||
assert s1.equals(s4)
|
||||
assert s1.equals(s5)
|
||||
assert s5.equals(s6)
|
||||
|
||||
|
||||
def test_equals_matching_nas():
|
||||
# matching but not identical NAs
|
||||
left = Series([np.datetime64("NaT")], dtype=object)
|
||||
right = Series([np.datetime64("NaT")], dtype=object)
|
||||
assert left.equals(right)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
assert Index(left).equals(Index(right))
|
||||
assert left.array.equals(right.array)
|
||||
|
||||
left = Series([np.timedelta64("NaT")], dtype=object)
|
||||
right = Series([np.timedelta64("NaT")], dtype=object)
|
||||
assert left.equals(right)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
assert Index(left).equals(Index(right))
|
||||
assert left.array.equals(right.array)
|
||||
|
||||
left = Series([np.float64("NaN")], dtype=object)
|
||||
right = Series([np.float64("NaN")], dtype=object)
|
||||
assert left.equals(right)
|
||||
assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype))
|
||||
assert left.array.equals(right.array)
|
||||
|
||||
|
||||
def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2):
|
||||
# GH#39650
|
||||
left = nulls_fixture
|
||||
right = nulls_fixture2
|
||||
if hasattr(right, "copy"):
|
||||
right = right.copy()
|
||||
else:
|
||||
right = copy.copy(right)
|
||||
|
||||
ser = Series([left], dtype=object)
|
||||
ser2 = Series([right], dtype=object)
|
||||
|
||||
if is_matching_na(left, right):
|
||||
assert ser.equals(ser2)
|
||||
elif (left is None and is_float(right)) or (right is None and is_float(left)):
|
||||
assert ser.equals(ser2)
|
||||
else:
|
||||
assert not ser.equals(ser2)
|
||||
|
||||
|
||||
def test_equals_none_vs_nan():
|
||||
# GH#39650
|
||||
ser = Series([1, None], dtype=object)
|
||||
ser2 = Series([1, np.nan], dtype=object)
|
||||
|
||||
assert ser.equals(ser2)
|
||||
assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype))
|
||||
assert ser.array.equals(ser2.array)
|
||||
|
||||
|
||||
def test_equals_None_vs_float():
|
||||
# GH#44190
|
||||
left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object)
|
||||
right = Series([None] * len(left))
|
||||
|
||||
# these series were found to be equal due to a bug, check that they are correctly
|
||||
# found to not equal
|
||||
assert not left.equals(right)
|
||||
assert not right.equals(left)
|
||||
assert not left.to_frame().equals(right.to_frame())
|
||||
assert not right.to_frame().equals(left.to_frame())
|
||||
assert not Index(left, dtype="object").equals(Index(right, dtype="object"))
|
||||
assert not Index(right, dtype="object").equals(Index(left, dtype="object"))
|
@ -0,0 +1,175 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_basic():
|
||||
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mixed_type():
|
||||
s = pd.Series(
|
||||
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
|
||||
)
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
|
||||
index=[0, 0, 0, 1, 2, 3, 4, 4],
|
||||
dtype=object,
|
||||
name="foo",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_empty():
|
||||
s = pd.Series(dtype=object)
|
||||
result = s.explode()
|
||||
expected = s.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_nested_lists():
|
||||
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
|
||||
result = s.explode()
|
||||
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_multi_index():
|
||||
s = pd.Series(
|
||||
[[0, 1, 2], np.nan, [], (3, 4)],
|
||||
name="foo",
|
||||
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
|
||||
)
|
||||
result = s.explode()
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_large():
|
||||
s = pd.Series([range(256)]).explode()
|
||||
result = s.explode()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_invert_array():
|
||||
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
|
||||
|
||||
listify = df.apply(lambda x: x.array, axis=1)
|
||||
result = listify.explode()
|
||||
tm.assert_series_equal(result, df["a"].rename())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
|
||||
)
|
||||
def test_non_object_dtype(s):
|
||||
result = s.explode()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_typical_usecase():
|
||||
df = pd.DataFrame(
|
||||
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
|
||||
columns=["var1", "var2"],
|
||||
)
|
||||
exploded = df.var1.str.split(",").explode()
|
||||
result = df[["var2"]].join(exploded)
|
||||
expected = pd.DataFrame(
|
||||
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
|
||||
columns=["var2", "var1"],
|
||||
index=[0, 0, 0, 1, 1, 1],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_nested_EA():
|
||||
# a nested EA array
|
||||
s = pd.Series(
|
||||
[
|
||||
pd.date_range("20170101", periods=3, tz="UTC"),
|
||||
pd.date_range("20170104", periods=3, tz="UTC"),
|
||||
]
|
||||
)
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_index():
|
||||
# GH 28005
|
||||
s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
|
||||
result = s.explode()
|
||||
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ignore_index():
|
||||
# GH 34932
|
||||
s = pd.Series([[1, 2], [3, 4]])
|
||||
result = s.explode(ignore_index=True)
|
||||
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_explode_sets():
|
||||
# https://github.com/pandas-dev/pandas/issues/35614
|
||||
s = pd.Series([{"a", "b", "c"}], index=[1])
|
||||
result = s.explode().sort_values()
|
||||
expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_explode_scalars_can_ignore_index():
|
||||
# https://github.com/pandas-dev/pandas/issues/40487
|
||||
s = pd.Series([1, 2, 3], index=["a", "b", "c"])
|
||||
result = s.explode(ignore_index=True)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ignore_index", [True, False])
|
||||
def test_explode_pyarrow_list_type(ignore_index):
|
||||
# GH 53602
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
data = [
|
||||
[None, None],
|
||||
[1],
|
||||
[],
|
||||
[2, 3],
|
||||
None,
|
||||
]
|
||||
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
|
||||
result = ser.explode(ignore_index=ignore_index)
|
||||
expected = pd.Series(
|
||||
data=[None, None, 1, None, 2, 3, None],
|
||||
index=None if ignore_index else [0, 0, 1, 2, 3, 3, 4],
|
||||
dtype=pd.ArrowDtype(pa.int64()),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ignore_index", [True, False])
|
||||
def test_explode_pyarrow_non_list_type(ignore_index):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
data = [1, 2, 3]
|
||||
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.int64()))
|
||||
result = ser.explode(ignore_index=ignore_index)
|
||||
expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,38 @@
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetNumericData:
|
||||
def test_get_numeric_data_preserve_dtype(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# get the numeric data
|
||||
obj = Series([1, 2, 3])
|
||||
result = obj._get_numeric_data()
|
||||
tm.assert_series_equal(result, obj)
|
||||
|
||||
# returned object is a shallow copy
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0] = 0
|
||||
if using_copy_on_write:
|
||||
assert obj.iloc[0] == 1
|
||||
else:
|
||||
assert obj.iloc[0] == 0
|
||||
|
||||
obj = Series([1, "2", 3.0])
|
||||
result = obj._get_numeric_data()
|
||||
expected = Series([], dtype=object, index=Index([], dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
obj = Series([True, False, True])
|
||||
result = obj._get_numeric_data()
|
||||
tm.assert_series_equal(result, obj)
|
||||
|
||||
obj = Series(date_range("20130101", periods=3))
|
||||
result = obj._get_numeric_data()
|
||||
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,8 @@
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_head_tail(string_series):
|
||||
tm.assert_series_equal(string_series.head(), string_series[:5])
|
||||
tm.assert_series_equal(string_series.head(0), string_series[0:0])
|
||||
tm.assert_series_equal(string_series.tail(), string_series[-5:])
|
||||
tm.assert_series_equal(string_series.tail(0), string_series[0:0])
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user