venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,7 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def sort(request):
|
||||
"""Boolean sort keyword for concat and DataFrame.append."""
|
||||
return request.param
|
@ -0,0 +1,389 @@
|
||||
import datetime as dt
|
||||
from itertools import combinations
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAppend:
|
||||
def test_append(self, sort, float_frame):
|
||||
mixed_frame = float_frame.copy()
|
||||
mixed_frame["foo"] = "bar"
|
||||
|
||||
begin_index = float_frame.index[:5]
|
||||
end_index = float_frame.index[5:]
|
||||
|
||||
begin_frame = float_frame.reindex(begin_index)
|
||||
end_frame = float_frame.reindex(end_index)
|
||||
|
||||
appended = begin_frame._append(end_frame)
|
||||
tm.assert_almost_equal(appended["A"], float_frame["A"])
|
||||
|
||||
del end_frame["A"]
|
||||
partial_appended = begin_frame._append(end_frame, sort=sort)
|
||||
assert "A" in partial_appended
|
||||
|
||||
partial_appended = end_frame._append(begin_frame, sort=sort)
|
||||
assert "A" in partial_appended
|
||||
|
||||
# mixed type handling
|
||||
appended = mixed_frame[:5]._append(mixed_frame[5:])
|
||||
tm.assert_frame_equal(appended, mixed_frame)
|
||||
|
||||
# what to test here
|
||||
mixed_appended = mixed_frame[:5]._append(float_frame[5:], sort=sort)
|
||||
mixed_appended2 = float_frame[:5]._append(mixed_frame[5:], sort=sort)
|
||||
|
||||
# all equal except 'foo' column
|
||||
tm.assert_frame_equal(
|
||||
mixed_appended.reindex(columns=["A", "B", "C", "D"]),
|
||||
mixed_appended2.reindex(columns=["A", "B", "C", "D"]),
|
||||
)
|
||||
|
||||
def test_append_empty(self, float_frame):
|
||||
empty = DataFrame()
|
||||
|
||||
appended = float_frame._append(empty)
|
||||
tm.assert_frame_equal(float_frame, appended)
|
||||
assert appended is not float_frame
|
||||
|
||||
appended = empty._append(float_frame)
|
||||
tm.assert_frame_equal(float_frame, appended)
|
||||
assert appended is not float_frame
|
||||
|
||||
def test_append_overlap_raises(self, float_frame):
|
||||
msg = "Indexes have overlapping values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
float_frame._append(float_frame, verify_integrity=True)
|
||||
|
||||
def test_append_new_columns(self):
|
||||
# see gh-6129: new columns
|
||||
df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}})
|
||||
row = Series([5, 6, 7], index=["a", "b", "c"], name="z")
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": {"x": 1, "y": 2, "z": 5},
|
||||
"b": {"x": 3, "y": 4, "z": 6},
|
||||
"c": {"z": 7},
|
||||
}
|
||||
)
|
||||
result = df._append(row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_length0_frame(self, sort):
|
||||
df = DataFrame(columns=["A", "B", "C"])
|
||||
df3 = DataFrame(index=[0, 1], columns=["A", "B"])
|
||||
df5 = df._append(df3, sort=sort)
|
||||
|
||||
expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
|
||||
tm.assert_frame_equal(df5, expected)
|
||||
|
||||
def test_append_records(self):
|
||||
arr1 = np.zeros((2,), dtype=("i4,f4,S10"))
|
||||
arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
|
||||
|
||||
arr2 = np.zeros((3,), dtype=("i4,f4,S10"))
|
||||
arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")]
|
||||
|
||||
df1 = DataFrame(arr1)
|
||||
df2 = DataFrame(arr2)
|
||||
|
||||
result = df1._append(df2, ignore_index=True)
|
||||
expected = DataFrame(np.concatenate((arr1, arr2)))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# rewrite sort fixture, since we also want to test default of None
|
||||
def test_append_sorts(self, sort):
|
||||
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
|
||||
df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])
|
||||
|
||||
result = df1._append(df2, sort=sort)
|
||||
|
||||
# for None / True
|
||||
expected = DataFrame(
|
||||
{"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]},
|
||||
columns=["a", "b", "c"],
|
||||
)
|
||||
if sort is False:
|
||||
expected = expected[["b", "a", "c"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_different_columns(self, sort):
|
||||
df = DataFrame(
|
||||
{
|
||||
"bools": np.random.default_rng(2).standard_normal(10) > 0,
|
||||
"ints": np.random.default_rng(2).integers(0, 10, 10),
|
||||
"floats": np.random.default_rng(2).standard_normal(10),
|
||||
"strings": ["foo", "bar"] * 5,
|
||||
}
|
||||
)
|
||||
|
||||
a = df[:5].loc[:, ["bools", "ints", "floats"]]
|
||||
b = df[5:].loc[:, ["strings", "ints", "floats"]]
|
||||
|
||||
appended = a._append(b, sort=sort)
|
||||
assert isna(appended["strings"][0:4]).all()
|
||||
assert isna(appended["bools"][5:]).all()
|
||||
|
||||
def test_append_many(self, sort, float_frame):
|
||||
chunks = [
|
||||
float_frame[:5],
|
||||
float_frame[5:10],
|
||||
float_frame[10:15],
|
||||
float_frame[15:],
|
||||
]
|
||||
|
||||
result = chunks[0]._append(chunks[1:])
|
||||
tm.assert_frame_equal(result, float_frame)
|
||||
|
||||
chunks[-1] = chunks[-1].copy()
|
||||
chunks[-1]["foo"] = "bar"
|
||||
result = chunks[0]._append(chunks[1:], sort=sort)
|
||||
tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
|
||||
assert (result["foo"][15:] == "bar").all()
|
||||
assert result["foo"][:15].isna().all()
|
||||
|
||||
def test_append_preserve_index_name(self):
|
||||
# #980
|
||||
df1 = DataFrame(columns=["A", "B", "C"])
|
||||
df1 = df1.set_index(["A"])
|
||||
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
|
||||
df2 = df2.set_index(["A"])
|
||||
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df1._append(df2)
|
||||
assert result.index.name == "A"
|
||||
|
||||
indexes_can_append = [
|
||||
pd.RangeIndex(3),
|
||||
Index([4, 5, 6]),
|
||||
Index([4.5, 5.5, 6.5]),
|
||||
Index(list("abc")),
|
||||
pd.CategoricalIndex("A B C".split()),
|
||||
pd.CategoricalIndex("D E F".split(), ordered=True),
|
||||
pd.IntervalIndex.from_breaks([7, 8, 9, 10]),
|
||||
pd.DatetimeIndex(
|
||||
[
|
||||
dt.datetime(2013, 1, 3, 0, 0),
|
||||
dt.datetime(2013, 1, 3, 6, 10),
|
||||
dt.datetime(2013, 1, 3, 7, 12),
|
||||
]
|
||||
),
|
||||
pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()]),
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", indexes_can_append, ids=lambda x: type(x).__name__
|
||||
)
|
||||
def test_append_same_columns_type(self, index):
|
||||
# GH18359
|
||||
|
||||
# df wider than ser
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
|
||||
ser_index = index[:2]
|
||||
ser = Series([7, 8], index=ser_index, name=2)
|
||||
result = df._append(ser)
|
||||
expected = DataFrame(
|
||||
[[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
|
||||
)
|
||||
# integer dtype is preserved for columns present in ser.index
|
||||
assert expected.dtypes.iloc[0].kind == "i"
|
||||
assert expected.dtypes.iloc[1].kind == "i"
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ser wider than df
|
||||
ser_index = index
|
||||
index = index[:2]
|
||||
df = DataFrame([[1, 2], [4, 5]], columns=index)
|
||||
ser = Series([7, 8, 9], index=ser_index, name=2)
|
||||
result = df._append(ser)
|
||||
expected = DataFrame(
|
||||
[[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
|
||||
index=[0, 1, 2],
|
||||
columns=ser_index,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df_columns, series_index",
|
||||
combinations(indexes_can_append, r=2),
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_append_different_columns_types(self, df_columns, series_index):
|
||||
# GH18359
|
||||
# See also test 'test_append_different_columns_types_raises' below
|
||||
# for errors raised when appending
|
||||
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
|
||||
ser = Series([7, 8, 9], index=series_index, name=2)
|
||||
|
||||
result = df._append(ser)
|
||||
idx_diff = ser.index.difference(df_columns)
|
||||
combined_columns = Index(df_columns.tolist()).append(idx_diff)
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 3.0, np.nan, np.nan, np.nan],
|
||||
[4, 5, 6, np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan, 7, 8, 9],
|
||||
],
|
||||
index=[0, 1, 2],
|
||||
columns=combined_columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_dtype_coerce(self, sort):
|
||||
# GH 4993
|
||||
# appending with datetime will incorrectly convert datetime64
|
||||
|
||||
df1 = DataFrame(
|
||||
index=[1, 2],
|
||||
data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)],
|
||||
columns=["start_time"],
|
||||
)
|
||||
df2 = DataFrame(
|
||||
index=[4, 5],
|
||||
data=[
|
||||
[dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)],
|
||||
[dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)],
|
||||
],
|
||||
columns=["start_time", "end_time"],
|
||||
)
|
||||
|
||||
expected = concat(
|
||||
[
|
||||
Series(
|
||||
[
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
dt.datetime(2013, 1, 3, 6, 10),
|
||||
dt.datetime(2013, 1, 4, 7, 10),
|
||||
],
|
||||
name="end_time",
|
||||
),
|
||||
Series(
|
||||
[
|
||||
dt.datetime(2013, 1, 1, 0, 0),
|
||||
dt.datetime(2013, 1, 2, 0, 0),
|
||||
dt.datetime(2013, 1, 3, 0, 0),
|
||||
dt.datetime(2013, 1, 4, 0, 0),
|
||||
],
|
||||
name="start_time",
|
||||
),
|
||||
],
|
||||
axis=1,
|
||||
sort=sort,
|
||||
)
|
||||
result = df1._append(df2, ignore_index=True, sort=sort)
|
||||
if sort:
|
||||
expected = expected[["end_time", "start_time"]]
|
||||
else:
|
||||
expected = expected[["start_time", "end_time"]]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_missing_column_proper_upcast(self, sort):
|
||||
df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
|
||||
df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})
|
||||
|
||||
appended = df1._append(df2, ignore_index=True, sort=sort)
|
||||
assert appended["A"].dtype == "f8"
|
||||
assert appended["B"].dtype == "O"
|
||||
|
||||
def test_append_empty_frame_to_series_with_dateutil_tz(self):
|
||||
# GH 23682
|
||||
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
|
||||
ser = Series({"a": 1.0, "b": 2.0, "date": date})
|
||||
df = DataFrame(columns=["c", "d"])
|
||||
result_a = df._append(ser, ignore_index=True)
|
||||
expected = DataFrame(
|
||||
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
|
||||
)
|
||||
# These columns get cast to object after append
|
||||
expected["c"] = expected["c"].astype(object)
|
||||
expected["d"] = expected["d"].astype(object)
|
||||
tm.assert_frame_equal(result_a, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
|
||||
)
|
||||
expected["c"] = expected["c"].astype(object)
|
||||
expected["d"] = expected["d"].astype(object)
|
||||
result_b = result_a._append(ser, ignore_index=True)
|
||||
tm.assert_frame_equal(result_b, expected)
|
||||
|
||||
result = df._append([ser, ser], ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_empty_tz_frame_with_datetime64ns(self, using_array_manager):
|
||||
# https://github.com/pandas-dev/pandas/issues/35460
|
||||
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
|
||||
|
||||
# pd.NaT gets inferred as tz-naive, so append result is tz-naive
|
||||
result = df._append({"a": pd.NaT}, ignore_index=True)
|
||||
if using_array_manager:
|
||||
expected = DataFrame({"a": [pd.NaT]}, dtype=object)
|
||||
else:
|
||||
expected = DataFrame({"a": [np.nan]}, dtype=object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# also test with typed value to append
|
||||
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
|
||||
other = Series({"a": pd.NaT}, dtype="datetime64[ns]")
|
||||
result = df._append(other, ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# mismatched tz
|
||||
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
|
||||
result = df._append(other, ignore_index=True)
|
||||
expected = DataFrame({"a": [pd.NaT]}).astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
|
||||
)
|
||||
@pytest.mark.parametrize("val", [1, "NaT"])
|
||||
def test_append_empty_frame_with_timedelta64ns_nat(
|
||||
self, dtype_str, val, using_array_manager
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/35460
|
||||
df = DataFrame(columns=["a"]).astype(dtype_str)
|
||||
|
||||
other = DataFrame({"a": [np.timedelta64(val, "ns")]})
|
||||
result = df._append(other, ignore_index=True)
|
||||
|
||||
expected = other.astype(object)
|
||||
if isinstance(val, str) and dtype_str != "int64" and not using_array_manager:
|
||||
# TODO: expected used to be `other.astype(object)` which is a more
|
||||
# reasonable result. This was changed when tightening
|
||||
# assert_frame_equal's treatment of mismatched NAs to match the
|
||||
# existing behavior.
|
||||
expected = DataFrame({"a": [np.nan]}, dtype=object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
|
||||
)
|
||||
@pytest.mark.parametrize("val", [1, "NaT"])
|
||||
def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val):
|
||||
# https://github.com/pandas-dev/pandas/issues/35460
|
||||
df = DataFrame({"a": pd.array([1], dtype=dtype_str)})
|
||||
|
||||
other = DataFrame({"a": [np.timedelta64(val, "ns")]})
|
||||
result = df._append(other, ignore_index=True)
|
||||
|
||||
expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,753 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=list(
|
||||
{
|
||||
"bool": [True, False, True],
|
||||
"int64": [1, 2, 3],
|
||||
"float64": [1.1, np.nan, 3.3],
|
||||
"category": Categorical(["X", "Y", "Z"]),
|
||||
"object": ["a", "b", "c"],
|
||||
"datetime64[ns]": [
|
||||
pd.Timestamp("2011-01-01"),
|
||||
pd.Timestamp("2011-01-02"),
|
||||
pd.Timestamp("2011-01-03"),
|
||||
],
|
||||
"datetime64[ns, US/Eastern]": [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-03", tz="US/Eastern"),
|
||||
],
|
||||
"timedelta64[ns]": [
|
||||
pd.Timedelta("1 days"),
|
||||
pd.Timedelta("2 days"),
|
||||
pd.Timedelta("3 days"),
|
||||
],
|
||||
"period[M]": [
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
],
|
||||
}.items()
|
||||
)
|
||||
)
|
||||
def item(request):
|
||||
key, data = request.param
|
||||
return key, data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def item2(item):
|
||||
return item
|
||||
|
||||
|
||||
class TestConcatAppendCommon:
|
||||
"""
|
||||
Test common dtype coercion rules between concat and append.
|
||||
"""
|
||||
|
||||
def test_dtypes(self, item, index_or_series, using_infer_string):
|
||||
# to confirm test case covers intended dtypes
|
||||
typ, vals = item
|
||||
obj = index_or_series(vals)
|
||||
if typ == "object" and using_infer_string:
|
||||
typ = "string"
|
||||
if isinstance(obj, Index):
|
||||
assert obj.dtype == typ
|
||||
elif isinstance(obj, Series):
|
||||
if typ.startswith("period"):
|
||||
assert obj.dtype == "Period[M]"
|
||||
else:
|
||||
assert obj.dtype == typ
|
||||
|
||||
def test_concatlike_same_dtypes(self, item):
|
||||
# GH 13660
|
||||
typ1, vals1 = item
|
||||
|
||||
vals2 = vals1
|
||||
vals3 = vals1
|
||||
|
||||
if typ1 == "category":
|
||||
exp_data = Categorical(list(vals1) + list(vals2))
|
||||
exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
|
||||
else:
|
||||
exp_data = vals1 + vals2
|
||||
exp_data3 = vals1 + vals2 + vals3
|
||||
|
||||
# ----- Index ----- #
|
||||
|
||||
# index.append
|
||||
res = Index(vals1).append(Index(vals2))
|
||||
exp = Index(exp_data)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# 3 elements
|
||||
res = Index(vals1).append([Index(vals2), Index(vals3)])
|
||||
exp = Index(exp_data3)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# index.append name mismatch
|
||||
i1 = Index(vals1, name="x")
|
||||
i2 = Index(vals2, name="y")
|
||||
res = i1.append(i2)
|
||||
exp = Index(exp_data)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# index.append name match
|
||||
i1 = Index(vals1, name="x")
|
||||
i2 = Index(vals2, name="x")
|
||||
res = i1.append(i2)
|
||||
exp = Index(exp_data, name="x")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# cannot append non-index
|
||||
with pytest.raises(TypeError, match="all inputs must be Index"):
|
||||
Index(vals1).append(vals2)
|
||||
|
||||
with pytest.raises(TypeError, match="all inputs must be Index"):
|
||||
Index(vals1).append([Index(vals2), vals3])
|
||||
|
||||
# ----- Series ----- #
|
||||
|
||||
# series.append
|
||||
res = Series(vals1)._append(Series(vals2), ignore_index=True)
|
||||
exp = Series(exp_data)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
# concat
|
||||
res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
# 3 elements
|
||||
res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
|
||||
exp = Series(exp_data3)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = pd.concat(
|
||||
[Series(vals1), Series(vals2), Series(vals3)],
|
||||
ignore_index=True,
|
||||
)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# name mismatch
|
||||
s1 = Series(vals1, name="x")
|
||||
s2 = Series(vals2, name="y")
|
||||
res = s1._append(s2, ignore_index=True)
|
||||
exp = Series(exp_data)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
res = pd.concat([s1, s2], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
# name match
|
||||
s1 = Series(vals1, name="x")
|
||||
s2 = Series(vals2, name="x")
|
||||
res = s1._append(s2, ignore_index=True)
|
||||
exp = Series(exp_data, name="x")
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
res = pd.concat([s1, s2], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
# cannot append non-index
|
||||
msg = (
|
||||
r"cannot concatenate object of type '.+'; "
|
||||
"only Series and DataFrame objs are valid"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Series(vals1)._append(vals2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Series(vals1)._append([Series(vals2), vals3])
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.concat([Series(vals1), vals2])
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.concat([Series(vals1), Series(vals2), vals3])
|
||||
|
||||
def test_concatlike_dtypes_coercion(self, item, item2, request):
|
||||
# GH 13660
|
||||
typ1, vals1 = item
|
||||
typ2, vals2 = item2
|
||||
|
||||
vals3 = vals2
|
||||
|
||||
# basically infer
|
||||
exp_index_dtype = None
|
||||
exp_series_dtype = None
|
||||
|
||||
if typ1 == typ2:
|
||||
pytest.skip("same dtype is tested in test_concatlike_same_dtypes")
|
||||
elif typ1 == "category" or typ2 == "category":
|
||||
pytest.skip("categorical type tested elsewhere")
|
||||
|
||||
# specify expected dtype
|
||||
if typ1 == "bool" and typ2 in ("int64", "float64"):
|
||||
# series coerces to numeric based on numpy rule
|
||||
# index doesn't because bool is object dtype
|
||||
exp_series_dtype = typ2
|
||||
mark = pytest.mark.xfail(reason="GH#39187 casting to object")
|
||||
request.applymarker(mark)
|
||||
elif typ2 == "bool" and typ1 in ("int64", "float64"):
|
||||
exp_series_dtype = typ1
|
||||
mark = pytest.mark.xfail(reason="GH#39187 casting to object")
|
||||
request.applymarker(mark)
|
||||
elif typ1 in {"datetime64[ns, US/Eastern]", "timedelta64[ns]"} or typ2 in {
|
||||
"datetime64[ns, US/Eastern]",
|
||||
"timedelta64[ns]",
|
||||
}:
|
||||
exp_index_dtype = object
|
||||
exp_series_dtype = object
|
||||
|
||||
exp_data = vals1 + vals2
|
||||
exp_data3 = vals1 + vals2 + vals3
|
||||
|
||||
# ----- Index ----- #
|
||||
|
||||
# index.append
|
||||
# GH#39817
|
||||
res = Index(vals1).append(Index(vals2))
|
||||
exp = Index(exp_data, dtype=exp_index_dtype)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# 3 elements
|
||||
res = Index(vals1).append([Index(vals2), Index(vals3)])
|
||||
exp = Index(exp_data3, dtype=exp_index_dtype)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# ----- Series ----- #
|
||||
|
||||
# series._append
|
||||
# GH#39817
|
||||
res = Series(vals1)._append(Series(vals2), ignore_index=True)
|
||||
exp = Series(exp_data, dtype=exp_series_dtype)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
# concat
|
||||
# GH#39817
|
||||
res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp, check_index_type=True)
|
||||
|
||||
# 3 elements
|
||||
# GH#39817
|
||||
res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
|
||||
exp = Series(exp_data3, dtype=exp_series_dtype)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# GH#39817
|
||||
res = pd.concat(
|
||||
[Series(vals1), Series(vals2), Series(vals3)],
|
||||
ignore_index=True,
|
||||
)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_concatlike_common_coerce_to_pandas_object(self):
|
||||
# GH 13626
|
||||
# result must be Timestamp/Timedelta, not datetime.datetime/timedelta
|
||||
dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
|
||||
tdi = pd.TimedeltaIndex(["1 days", "2 days"])
|
||||
|
||||
exp = Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
pd.Timestamp("2011-01-02"),
|
||||
pd.Timedelta("1 days"),
|
||||
pd.Timedelta("2 days"),
|
||||
]
|
||||
)
|
||||
|
||||
res = dti.append(tdi)
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert isinstance(res[0], pd.Timestamp)
|
||||
assert isinstance(res[-1], pd.Timedelta)
|
||||
|
||||
dts = Series(dti)
|
||||
tds = Series(tdi)
|
||||
res = dts._append(tds)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
assert isinstance(res.iloc[0], pd.Timestamp)
|
||||
assert isinstance(res.iloc[-1], pd.Timedelta)
|
||||
|
||||
res = pd.concat([dts, tds])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
assert isinstance(res.iloc[0], pd.Timestamp)
|
||||
assert isinstance(res.iloc[-1], pd.Timedelta)
|
||||
|
||||
def test_concatlike_datetimetz(self, tz_aware_fixture):
|
||||
tz = tz_aware_fixture
|
||||
# GH 7795
|
||||
dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
|
||||
)
|
||||
|
||||
res = dti1.append(dti2)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
dts1 = Series(dti1)
|
||||
dts2 = Series(dti2)
|
||||
res = dts1._append(dts2)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([dts1, dts2])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
|
||||
def test_concatlike_datetimetz_short(self, tz):
|
||||
# GH#7795
|
||||
ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
|
||||
ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
|
||||
df1 = DataFrame(0, index=ix1, columns=["A", "B"])
|
||||
df2 = DataFrame(0, index=ix2, columns=["A", "B"])
|
||||
|
||||
exp_idx = pd.DatetimeIndex(
|
||||
["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
|
||||
tz=tz,
|
||||
).as_unit("ns")
|
||||
exp = DataFrame(0, index=exp_idx, columns=["A", "B"])
|
||||
|
||||
tm.assert_frame_equal(df1._append(df2), exp)
|
||||
tm.assert_frame_equal(pd.concat([df1, df2]), exp)
|
||||
|
||||
def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
|
||||
tz = tz_aware_fixture
|
||||
# GH 13660
|
||||
|
||||
# different tz coerces to object
|
||||
dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])
|
||||
|
||||
exp = Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
pd.Timestamp("2011-01-02", tz=tz),
|
||||
pd.Timestamp("2012-01-01"),
|
||||
pd.Timestamp("2012-01-02"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
res = dti1.append(dti2)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
dts1 = Series(dti1)
|
||||
dts2 = Series(dti2)
|
||||
res = dts1._append(dts2)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([dts1, dts2])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
# different tz
|
||||
dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")
|
||||
|
||||
exp = Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
pd.Timestamp("2011-01-02", tz=tz),
|
||||
pd.Timestamp("2012-01-01", tz="US/Pacific"),
|
||||
pd.Timestamp("2012-01-02", tz="US/Pacific"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
res = dti1.append(dti3)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
dts1 = Series(dti1)
|
||||
dts3 = Series(dti3)
|
||||
res = dts1._append(dts3)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([dts1, dts3])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
def test_concatlike_common_period(self):
|
||||
# GH 13660
|
||||
pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
|
||||
pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")
|
||||
|
||||
exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")
|
||||
|
||||
res = pi1.append(pi2)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
ps1 = Series(pi1)
|
||||
ps2 = Series(pi2)
|
||||
res = ps1._append(ps2)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([ps1, ps2])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
def test_concatlike_common_period_diff_freq_to_object(self):
|
||||
# GH 13221
|
||||
pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
|
||||
pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")
|
||||
|
||||
exp = Index(
|
||||
[
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Period("2012-01-01", freq="D"),
|
||||
pd.Period("2012-02-01", freq="D"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
res = pi1.append(pi2)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
ps1 = Series(pi1)
|
||||
ps2 = Series(pi2)
|
||||
res = ps1._append(ps2)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([ps1, ps2])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
def test_concatlike_common_period_mixed_dt_to_object(self):
|
||||
# GH 13221
|
||||
# different datetimelike
|
||||
pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
|
||||
tdi = pd.TimedeltaIndex(["1 days", "2 days"])
|
||||
exp = Index(
|
||||
[
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Timedelta("1 days"),
|
||||
pd.Timedelta("2 days"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
res = pi1.append(tdi)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
ps1 = Series(pi1)
|
||||
tds = Series(tdi)
|
||||
res = ps1._append(tds)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([ps1, tds])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
# inverse
|
||||
exp = Index(
|
||||
[
|
||||
pd.Timedelta("1 days"),
|
||||
pd.Timedelta("2 days"),
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-02", freq="M"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
res = tdi.append(pi1)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
ps1 = Series(pi1)
|
||||
tds = Series(tdi)
|
||||
res = tds._append(ps1)
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
res = pd.concat([tds, ps1])
|
||||
tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
|
||||
|
||||
def test_concat_categorical(self):
|
||||
# GH 13524
|
||||
|
||||
# same categories -> category
|
||||
s1 = Series([1, 2, np.nan], dtype="category")
|
||||
s2 = Series([2, 1, 2], dtype="category")
|
||||
|
||||
exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category")
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
# partially different categories => not-category
|
||||
s1 = Series([3, 2], dtype="category")
|
||||
s2 = Series([2, 1], dtype="category")
|
||||
|
||||
exp = Series([3, 2, 2, 1])
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
# completely different categories (same dtype) => not-category
|
||||
s1 = Series([10, 11, np.nan], dtype="category")
|
||||
s2 = Series([np.nan, 1, 3, 2], dtype="category")
|
||||
|
||||
exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64)
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
def test_union_categorical_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19096
|
||||
a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
|
||||
b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
|
||||
result = pd.concat([a, b], ignore_index=True)
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_categorical_coercion(self):
|
||||
# GH 13524
|
||||
|
||||
# category + not-category => not-category
|
||||
s1 = Series([1, 2, np.nan], dtype="category")
|
||||
s2 = Series([2, 1, 2])
|
||||
|
||||
exp = Series([1, 2, np.nan, 2, 1, 2], dtype=np.float64)
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
# result shouldn't be affected by 1st elem dtype
|
||||
exp = Series([2, 1, 2, 1, 2, np.nan], dtype=np.float64)
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
# all values are not in category => not-category
|
||||
s1 = Series([3, 2], dtype="category")
|
||||
s2 = Series([2, 1])
|
||||
|
||||
exp = Series([3, 2, 2, 1])
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
exp = Series([2, 1, 3, 2])
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
# completely different categories => not-category
|
||||
s1 = Series([10, 11, np.nan], dtype="category")
|
||||
s2 = Series([1, 3, 2])
|
||||
|
||||
exp = Series([10, 11, np.nan, 1, 3, 2], dtype=np.float64)
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
exp = Series([1, 3, 2, 10, 11, np.nan], dtype=np.float64)
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
# different dtype => not-category
|
||||
s1 = Series([10, 11, np.nan], dtype="category")
|
||||
s2 = Series(["a", "b", "c"])
|
||||
|
||||
exp = Series([10, 11, np.nan, "a", "b", "c"])
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
exp = Series(["a", "b", "c", 10, 11, np.nan])
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
# if normal series only contains NaN-likes => not-category
|
||||
s1 = Series([10, 11], dtype="category")
|
||||
s2 = Series([np.nan, np.nan, np.nan])
|
||||
|
||||
exp = Series([10, 11, np.nan, np.nan, np.nan])
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
exp = Series([np.nan, np.nan, np.nan, 10, 11])
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
def test_concat_categorical_3elem_coercion(self):
|
||||
# GH 13524
|
||||
|
||||
# mixed dtypes => not-category
|
||||
s1 = Series([1, 2, np.nan], dtype="category")
|
||||
s2 = Series([2, 1, 2], dtype="category")
|
||||
s3 = Series([1, 2, 1, 2, np.nan])
|
||||
|
||||
exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
|
||||
tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
|
||||
|
||||
exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
|
||||
tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
|
||||
|
||||
# values are all in either category => not-category
|
||||
s1 = Series([4, 5, 6], dtype="category")
|
||||
s2 = Series([1, 2, 3], dtype="category")
|
||||
s3 = Series([1, 3, 4])
|
||||
|
||||
exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
|
||||
tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
|
||||
|
||||
exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
|
||||
tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
|
||||
|
||||
# values are all in either category => not-category
|
||||
s1 = Series([4, 5, 6], dtype="category")
|
||||
s2 = Series([1, 2, 3], dtype="category")
|
||||
s3 = Series([10, 11, 12])
|
||||
|
||||
exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
|
||||
tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
|
||||
|
||||
exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
|
||||
tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
|
||||
|
||||
def test_concat_categorical_multi_coercion(self):
|
||||
# GH 13524
|
||||
|
||||
s1 = Series([1, 3], dtype="category")
|
||||
s2 = Series([3, 4], dtype="category")
|
||||
s3 = Series([2, 3])
|
||||
s4 = Series([2, 2], dtype="category")
|
||||
s5 = Series([1, np.nan])
|
||||
s6 = Series([1, 3, 2], dtype="category")
|
||||
|
||||
# mixed dtype, values are all in categories => not-category
|
||||
exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
|
||||
res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = s1._append([s2, s3, s4, s5, s6], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
|
||||
res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = s6._append([s5, s4, s3, s2, s1], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_concat_categorical_ordered(self):
|
||||
# GH 13524
|
||||
|
||||
s1 = Series(Categorical([1, 2, np.nan], ordered=True))
|
||||
s2 = Series(Categorical([2, 1, 2], ordered=True))
|
||||
|
||||
exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True))
|
||||
tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp)
|
||||
|
||||
def test_concat_categorical_coercion_nan(self):
|
||||
# GH 13524
|
||||
|
||||
# some edge cases
|
||||
# category + not-category => not category
|
||||
s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
|
||||
s2 = Series([np.nan, 1])
|
||||
|
||||
exp = Series([np.nan, np.nan, np.nan, 1])
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
s1 = Series([1, np.nan], dtype="category")
|
||||
s2 = Series([np.nan, np.nan])
|
||||
|
||||
exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
# mixed dtype, all nan-likes => not-category
|
||||
s1 = Series([np.nan, np.nan], dtype="category")
|
||||
s2 = Series([np.nan, np.nan])
|
||||
|
||||
exp = Series([np.nan, np.nan, np.nan, np.nan])
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
# all category nan-likes => category
|
||||
s1 = Series([np.nan, np.nan], dtype="category")
|
||||
s2 = Series([np.nan, np.nan], dtype="category")
|
||||
|
||||
exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category")
|
||||
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
def test_concat_categorical_empty(self):
|
||||
# GH 13524
|
||||
|
||||
s1 = Series([], dtype="category")
|
||||
s2 = Series([1, 2], dtype="category")
|
||||
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
|
||||
|
||||
s1 = Series([], dtype="category")
|
||||
s2 = Series([], dtype="category")
|
||||
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
|
||||
|
||||
s1 = Series([], dtype="category")
|
||||
s2 = Series([], dtype="object")
|
||||
|
||||
# different dtype => not-category
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
|
||||
|
||||
s1 = Series([], dtype="category")
|
||||
s2 = Series([np.nan, np.nan])
|
||||
|
||||
# empty Series is ignored
|
||||
exp = Series([np.nan, np.nan])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
|
||||
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
|
||||
|
||||
def test_categorical_concat_append(self):
|
||||
cat = Categorical(["a", "b"], categories=["a", "b"])
|
||||
vals = [1, 2]
|
||||
df = DataFrame({"cats": cat, "vals": vals})
|
||||
cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
|
||||
vals2 = [1, 2, 1, 2]
|
||||
exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
|
||||
|
||||
tm.assert_frame_equal(pd.concat([df, df]), exp)
|
||||
tm.assert_frame_equal(df._append(df), exp)
|
||||
|
||||
# GH 13524 can concat different categories
|
||||
cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
vals3 = [1, 2]
|
||||
df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
|
||||
|
||||
res = pd.concat([df, df_different_categories], ignore_index=True)
|
||||
exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df._append(df_different_categories, ignore_index=True)
|
||||
tm.assert_frame_equal(res, exp)
|
@ -0,0 +1,273 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalConcat:
|
||||
def test_categorical_concat(self, sort):
|
||||
# See GH 10177
|
||||
df1 = DataFrame(
|
||||
np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"]
|
||||
)
|
||||
|
||||
df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"])
|
||||
|
||||
cat_values = ["one", "one", "two", "one", "two", "two", "one"]
|
||||
df2["h"] = Series(Categorical(cat_values))
|
||||
|
||||
res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
|
||||
exp = DataFrame(
|
||||
{
|
||||
"a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
|
||||
"b": [
|
||||
1,
|
||||
4,
|
||||
7,
|
||||
10,
|
||||
13,
|
||||
16,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
|
||||
"h": [None] * 6 + cat_values,
|
||||
}
|
||||
)
|
||||
exp["h"] = exp["h"].astype(df2["h"].dtype)
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_categorical_concat_dtypes(self, using_infer_string):
|
||||
# GH8143
|
||||
index = ["cat", "obj", "num"]
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
obj = Series(["a", "b", "c"])
|
||||
num = Series([1, 2, 3])
|
||||
df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
|
||||
|
||||
result = df.dtypes == (
|
||||
object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
expected = Series([False, True, False], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.dtypes == "int64"
|
||||
expected = Series([False, False, True], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.dtypes == "category"
|
||||
expected = Series([True, False, False], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_categoricalindex(self):
|
||||
# GH 16111, categories that aren't lexsorted
|
||||
categories = [9, 0, 1, 2, 3]
|
||||
|
||||
a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
|
||||
b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
|
||||
c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))
|
||||
|
||||
result = pd.concat([a, b, c], axis=1)
|
||||
|
||||
exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
|
||||
exp = DataFrame(
|
||||
{
|
||||
0: [1, 1, np.nan, np.nan],
|
||||
1: [np.nan, 2, 2, np.nan],
|
||||
2: [np.nan, np.nan, 3, 3],
|
||||
},
|
||||
columns=[0, 1, 2],
|
||||
index=exp_idx,
|
||||
)
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
def test_categorical_concat_preserve(self):
|
||||
# GH 8641 series concat not preserving category dtype
|
||||
# GH 13524 can concat different categories
|
||||
s = Series(list("abc"), dtype="category")
|
||||
s2 = Series(list("abd"), dtype="category")
|
||||
|
||||
exp = Series(list("abcabd"))
|
||||
res = pd.concat([s, s2], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(list("abcabc"), dtype="category")
|
||||
res = pd.concat([s, s], ignore_index=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category")
|
||||
res = pd.concat([s, s])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
a = Series(np.arange(6, dtype="int64"))
|
||||
b = Series(list("aabbca"))
|
||||
|
||||
df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))})
|
||||
res = pd.concat([df2, df2])
|
||||
exp = DataFrame(
|
||||
{
|
||||
"A": pd.concat([a, a]),
|
||||
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_categorical_index_preserver(self):
|
||||
a = Series(np.arange(6, dtype="int64"))
|
||||
b = Series(list("aabbca"))
|
||||
|
||||
df2 = DataFrame(
|
||||
{"A": a, "B": b.astype(CategoricalDtype(list("cab")))}
|
||||
).set_index("B")
|
||||
result = pd.concat([df2, df2])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": pd.concat([a, a]),
|
||||
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
|
||||
}
|
||||
).set_index("B")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# wrong categories -> uses concat_compat, which casts to object
|
||||
df3 = DataFrame(
|
||||
{"A": a, "B": Categorical(b, categories=list("abe"))}
|
||||
).set_index("B")
|
||||
result = pd.concat([df2, df3])
|
||||
expected = pd.concat(
|
||||
[
|
||||
df2.set_axis(df2.index.astype(object), axis=0),
|
||||
df3.set_axis(df3.index.astype(object), axis=0),
|
||||
]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_categorical_tz(self):
|
||||
# GH-23816
|
||||
a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific"))
|
||||
b = Series(["a", "b"], dtype="category")
|
||||
result = pd.concat([a, b], ignore_index=True)
|
||||
expected = Series(
|
||||
[
|
||||
pd.Timestamp("2017-01-01", tz="US/Pacific"),
|
||||
pd.Timestamp("2017-01-02", tz="US/Pacific"),
|
||||
"a",
|
||||
"b",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_categorical_datetime(self):
|
||||
# GH-39443
|
||||
df1 = DataFrame(
|
||||
{"x": Series(datetime(2021, 1, 1), index=[0], dtype="category")}
|
||||
)
|
||||
df2 = DataFrame(
|
||||
{"x": Series(datetime(2021, 1, 2), index=[1], dtype="category")}
|
||||
)
|
||||
|
||||
result = pd.concat([df1, df2])
|
||||
expected = DataFrame(
|
||||
{"x": Series([datetime(2021, 1, 1), datetime(2021, 1, 2)])}
|
||||
)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_concat_categorical_unchanged(self):
|
||||
# GH-12007
|
||||
# test fix for when concat on categorical and float
|
||||
# coerces dtype categorical -> float
|
||||
df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A"))
|
||||
ser = Series([0, 1, 2], index=[0, 1, 3], name="B")
|
||||
result = pd.concat([df, ser], axis=1)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": Series(["a", "b", "c", np.nan], dtype="category"),
|
||||
"B": Series([0, 1, np.nan, 2], dtype="float"),
|
||||
}
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_categorical_concat_gh7864(self):
|
||||
# GH 7864
|
||||
# make sure ordering is preserved
|
||||
df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")})
|
||||
df["grade"] = Categorical(df["raw_grade"])
|
||||
df["grade"].cat.set_categories(["e", "a", "b"])
|
||||
|
||||
df1 = df[0:3]
|
||||
df2 = df[3:]
|
||||
|
||||
tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories)
|
||||
tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories)
|
||||
|
||||
dfx = pd.concat([df1, df2])
|
||||
tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories)
|
||||
|
||||
dfa = df1._append(df2)
|
||||
tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories)
|
||||
|
||||
def test_categorical_index_upcast(self):
|
||||
# GH 17629
|
||||
# test upcasting to object when concatenating on categorical indexes
|
||||
# with non-identical categories
|
||||
|
||||
a = DataFrame({"foo": [1, 2]}, index=Categorical(["foo", "bar"]))
|
||||
b = DataFrame({"foo": [4, 3]}, index=Categorical(["baz", "bar"]))
|
||||
|
||||
res = pd.concat([a, b])
|
||||
exp = DataFrame({"foo": [1, 2, 4, 3]}, index=["foo", "bar", "baz", "bar"])
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
a = Series([1, 2], index=Categorical(["foo", "bar"]))
|
||||
b = Series([4, 3], index=Categorical(["baz", "bar"]))
|
||||
|
||||
res = pd.concat([a, b])
|
||||
exp = Series([1, 2, 4, 3], index=["foo", "bar", "baz", "bar"])
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
def test_categorical_missing_from_one_frame(self):
|
||||
# GH 25412
|
||||
df1 = DataFrame({"f1": [1, 2, 3]})
|
||||
df2 = DataFrame({"f1": [2, 3, 1], "f2": Series([4, 4, 4]).astype("category")})
|
||||
result = pd.concat([df1, df2], sort=True)
|
||||
dtype = CategoricalDtype([4])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"f1": [1, 2, 3, 2, 3, 1],
|
||||
"f2": Categorical.from_codes([-1, -1, -1, 0, 0, 0], dtype=dtype),
|
||||
},
|
||||
index=[0, 1, 2, 0, 1, 2],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_categorical_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/24845
|
||||
|
||||
c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False)
|
||||
c3 = pd.CategoricalIndex(
|
||||
["a", "a", "b", "b"], categories=["a", "b"], ordered=False
|
||||
)
|
||||
|
||||
df1 = DataFrame({"A": [1, 2]}, index=c1)
|
||||
df2 = DataFrame({"A": [3, 4]}, index=c2)
|
||||
|
||||
result = pd.concat((df1, df2))
|
||||
expected = DataFrame({"A": [1, 2, 3, 4]}, index=c3)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,912 @@
|
||||
from collections import (
|
||||
abc,
|
||||
deque,
|
||||
)
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import SparseArray
|
||||
from pandas.tests.extension.decimal import to_decimal
|
||||
|
||||
|
||||
class TestConcatenate:
|
||||
def test_append_concat(self):
|
||||
# GH#1815
|
||||
d1 = date_range("12/31/1990", "12/31/1999", freq="YE-DEC")
|
||||
d2 = date_range("12/31/2000", "12/31/2009", freq="YE-DEC")
|
||||
|
||||
s1 = Series(np.random.default_rng(2).standard_normal(10), d1)
|
||||
s2 = Series(np.random.default_rng(2).standard_normal(10), d2)
|
||||
|
||||
s1 = s1.to_period()
|
||||
s2 = s2.to_period()
|
||||
|
||||
# drops index
|
||||
result = concat([s1, s2])
|
||||
assert isinstance(result.index, PeriodIndex)
|
||||
assert result.index[0] == s1.index[0]
|
||||
|
||||
def test_concat_copy(self, using_array_manager, using_copy_on_write):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
|
||||
df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1))
|
||||
df3 = DataFrame({5: "foo"}, index=range(4))
|
||||
|
||||
# These are actual copies.
|
||||
result = concat([df, df2, df3], axis=1, copy=True)
|
||||
|
||||
if not using_copy_on_write:
|
||||
for arr in result._mgr.arrays:
|
||||
assert not any(
|
||||
np.shares_memory(arr, y)
|
||||
for x in [df, df2, df3]
|
||||
for y in x._mgr.arrays
|
||||
)
|
||||
else:
|
||||
for arr in result._mgr.arrays:
|
||||
assert arr.base is not None
|
||||
|
||||
# These are the same.
|
||||
result = concat([df, df2, df3], axis=1, copy=False)
|
||||
|
||||
for arr in result._mgr.arrays:
|
||||
if arr.dtype.kind == "f":
|
||||
assert arr.base is df._mgr.arrays[0].base
|
||||
elif arr.dtype.kind in ["i", "u"]:
|
||||
assert arr.base is df2._mgr.arrays[0].base
|
||||
elif arr.dtype == object:
|
||||
if using_array_manager:
|
||||
# we get the same array object, which has no base
|
||||
assert arr is df3._mgr.arrays[0]
|
||||
else:
|
||||
assert arr.base is not None
|
||||
|
||||
# Float block was consolidated.
|
||||
df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))
|
||||
result = concat([df, df2, df3, df4], axis=1, copy=False)
|
||||
for arr in result._mgr.arrays:
|
||||
if arr.dtype.kind == "f":
|
||||
if using_array_manager or using_copy_on_write:
|
||||
# this is a view on some array in either df or df4
|
||||
assert any(
|
||||
np.shares_memory(arr, other)
|
||||
for other in df._mgr.arrays + df4._mgr.arrays
|
||||
)
|
||||
else:
|
||||
# the block was consolidated, so we got a copy anyway
|
||||
assert arr.base is None
|
||||
elif arr.dtype.kind in ["i", "u"]:
|
||||
assert arr.base is df2._mgr.arrays[0].base
|
||||
elif arr.dtype == object:
|
||||
# this is a view on df3
|
||||
assert any(np.shares_memory(arr, other) for other in df3._mgr.arrays)
|
||||
|
||||
def test_concat_with_group_keys(self):
|
||||
# axis=0
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((3, 4)))
|
||||
df2 = DataFrame(np.random.default_rng(2).standard_normal((4, 4)))
|
||||
|
||||
result = concat([df, df2], keys=[0, 1])
|
||||
exp_index = MultiIndex.from_arrays(
|
||||
[[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]]
|
||||
)
|
||||
expected = DataFrame(np.r_[df.values, df2.values], index=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat([df, df], keys=[0, 1])
|
||||
exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
|
||||
expected = DataFrame(np.r_[df.values, df.values], index=exp_index2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis=1
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
|
||||
df2 = DataFrame(np.random.default_rng(2).standard_normal((4, 4)))
|
||||
|
||||
result = concat([df, df2], keys=[0, 1], axis=1)
|
||||
expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat([df, df], keys=[0, 1], axis=1)
|
||||
expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_keys_specific_levels(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||||
pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]]
|
||||
level = ["three", "two", "one", "zero"]
|
||||
result = concat(
|
||||
pieces,
|
||||
axis=1,
|
||||
keys=["one", "two", "three"],
|
||||
levels=[level],
|
||||
names=["group_key"],
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
|
||||
tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))
|
||||
|
||||
assert result.columns.names == ["group_key", None]
|
||||
|
||||
@pytest.mark.parametrize("mapping", ["mapping", "dict"])
|
||||
def test_concat_mapping(self, mapping, non_dict_mapping_subclass):
|
||||
constructor = dict if mapping == "dict" else non_dict_mapping_subclass
|
||||
frames = constructor(
|
||||
{
|
||||
"foo": DataFrame(np.random.default_rng(2).standard_normal((4, 3))),
|
||||
"bar": DataFrame(np.random.default_rng(2).standard_normal((4, 3))),
|
||||
"baz": DataFrame(np.random.default_rng(2).standard_normal((4, 3))),
|
||||
"qux": DataFrame(np.random.default_rng(2).standard_normal((4, 3))),
|
||||
}
|
||||
)
|
||||
|
||||
sorted_keys = list(frames.keys())
|
||||
|
||||
result = concat(frames)
|
||||
expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat(frames, axis=1)
|
||||
expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys, axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
keys = ["baz", "foo", "bar"]
|
||||
result = concat(frames, keys=keys)
|
||||
expected = concat([frames[k] for k in keys], keys=keys)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_keys_and_levels(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)))
|
||||
df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)))
|
||||
|
||||
levels = [["foo", "baz"], ["one", "two"]]
|
||||
names = ["first", "second"]
|
||||
result = concat(
|
||||
[df, df2, df, df2],
|
||||
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
|
||||
levels=levels,
|
||||
names=names,
|
||||
)
|
||||
expected = concat([df, df2, df, df2])
|
||||
exp_index = MultiIndex(
|
||||
levels=levels + [[0]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]],
|
||||
names=names + [None],
|
||||
)
|
||||
expected.index = exp_index
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# no names
|
||||
result = concat(
|
||||
[df, df2, df, df2],
|
||||
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
|
||||
levels=levels,
|
||||
)
|
||||
assert result.index.names == (None,) * 3
|
||||
|
||||
# no levels
|
||||
result = concat(
|
||||
[df, df2, df, df2],
|
||||
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert result.index.names == ("first", "second", None)
|
||||
tm.assert_index_equal(
|
||||
result.index.levels[0], Index(["baz", "foo"], name="first")
|
||||
)
|
||||
|
||||
def test_concat_keys_levels_no_overlap(self):
|
||||
# GH #1406
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)), index=["a"])
|
||||
df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)), index=["b"])
|
||||
|
||||
msg = "Values not found in passed level"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
concat([df, df], keys=["one", "two"], levels=[["foo", "bar", "baz"]])
|
||||
|
||||
msg = "Key one not in level"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]])
|
||||
|
||||
def test_crossed_dtypes_weird_corner(self):
|
||||
columns = ["A", "B", "C", "D"]
|
||||
df1 = DataFrame(
|
||||
{
|
||||
"A": np.array([1, 2, 3, 4], dtype="f8"),
|
||||
"B": np.array([1, 2, 3, 4], dtype="i8"),
|
||||
"C": np.array([1, 2, 3, 4], dtype="f8"),
|
||||
"D": np.array([1, 2, 3, 4], dtype="i8"),
|
||||
},
|
||||
columns=columns,
|
||||
)
|
||||
|
||||
df2 = DataFrame(
|
||||
{
|
||||
"A": np.array([1, 2, 3, 4], dtype="i8"),
|
||||
"B": np.array([1, 2, 3, 4], dtype="f8"),
|
||||
"C": np.array([1, 2, 3, 4], dtype="i8"),
|
||||
"D": np.array([1, 2, 3, 4], dtype="f8"),
|
||||
},
|
||||
columns=columns,
|
||||
)
|
||||
|
||||
appended = concat([df1, df2], ignore_index=True)
|
||||
expected = DataFrame(
|
||||
np.concatenate([df1.values, df2.values], axis=0), columns=columns
|
||||
)
|
||||
tm.assert_frame_equal(appended, expected)
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)), index=["a"])
|
||||
df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)), index=["b"])
|
||||
result = concat([df, df2], keys=["one", "two"], names=["first", "second"])
|
||||
assert result.index.names == ("first", "second")
|
||||
|
||||
def test_with_mixed_tuples(self, sort):
|
||||
# 10697
|
||||
# columns have mixed tuples, so handle properly
|
||||
df1 = DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2))
|
||||
df2 = DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2))
|
||||
|
||||
# it works
|
||||
concat([df1, df2], sort=sort)
|
||||
|
||||
def test_concat_mixed_objs_columns(self):
|
||||
# Test column-wise concat for mixed series/frames (axis=1)
|
||||
# G2385
|
||||
|
||||
index = date_range("01-Jan-2013", periods=10, freq="h")
|
||||
arr = np.arange(10, dtype="int64")
|
||||
s1 = Series(arr, index=index)
|
||||
s2 = Series(arr, index=index)
|
||||
df = DataFrame(arr.reshape(-1, 1), index=index)
|
||||
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0]
|
||||
)
|
||||
result = concat([df, df], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1]
|
||||
)
|
||||
result = concat([s1, s2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]
|
||||
)
|
||||
result = concat([s1, s2, s1], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3]
|
||||
)
|
||||
result = concat([s1, df, s2, s2, s1], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with names
|
||||
s1.name = "foo"
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0]
|
||||
)
|
||||
result = concat([s1, df, s2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s2.name = "bar"
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"]
|
||||
)
|
||||
result = concat([s1, df, s2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ignore index
|
||||
expected = DataFrame(
|
||||
np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]
|
||||
)
|
||||
result = concat([s1, df, s2], axis=1, ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_objs_index(self):
|
||||
# Test row-wise concat for mixed series/frames with a common name
|
||||
# GH2385, GH15047
|
||||
|
||||
index = date_range("01-Jan-2013", periods=10, freq="h")
|
||||
arr = np.arange(10, dtype="int64")
|
||||
s1 = Series(arr, index=index)
|
||||
s2 = Series(arr, index=index)
|
||||
df = DataFrame(arr.reshape(-1, 1), index=index)
|
||||
|
||||
expected = DataFrame(
|
||||
np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0]
|
||||
)
|
||||
result = concat([s1, df, s2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_objs_index_names(self):
|
||||
# Test row-wise concat for mixed series/frames with distinct names
|
||||
# GH2385, GH15047
|
||||
|
||||
index = date_range("01-Jan-2013", periods=10, freq="h")
|
||||
arr = np.arange(10, dtype="int64")
|
||||
s1 = Series(arr, index=index, name="foo")
|
||||
s2 = Series(arr, index=index, name="bar")
|
||||
df = DataFrame(arr.reshape(-1, 1), index=index)
|
||||
|
||||
expected = DataFrame(
|
||||
np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T,
|
||||
index=index.tolist() * 3,
|
||||
columns=["foo", 0, "bar"],
|
||||
)
|
||||
result = concat([s1, df, s2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Rename all series to 0 when ignore_index=True
|
||||
expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
|
||||
result = concat([s1, df, s2], ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_dtype_coercion(self):
|
||||
# 12411
|
||||
df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]})
|
||||
|
||||
result = concat([df.iloc[[0]], df.iloc[[1]]])
|
||||
tm.assert_series_equal(result.dtypes, df.dtypes)
|
||||
|
||||
# 12045
|
||||
df = DataFrame({"date": [datetime(2012, 1, 1), datetime(1012, 1, 2)]})
|
||||
result = concat([df.iloc[[0]], df.iloc[[1]]])
|
||||
tm.assert_series_equal(result.dtypes, df.dtypes)
|
||||
|
||||
# 11594
|
||||
df = DataFrame({"text": ["some words"] + [None] * 9})
|
||||
result = concat([df.iloc[[0]], df.iloc[[1]]])
|
||||
tm.assert_series_equal(result.dtypes, df.dtypes)
|
||||
|
||||
def test_concat_single_with_key(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||||
|
||||
result = concat([df], keys=["foo"])
|
||||
expected = concat([df, df], keys=["foo", "bar"])
|
||||
tm.assert_frame_equal(result, expected[:10])
|
||||
|
||||
def test_concat_no_items_raises(self):
|
||||
with pytest.raises(ValueError, match="No objects to concatenate"):
|
||||
concat([])
|
||||
|
||||
def test_concat_exclude_none(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||||
|
||||
pieces = [df[:5], None, None, df[5:]]
|
||||
result = concat(pieces)
|
||||
tm.assert_frame_equal(result, df)
|
||||
with pytest.raises(ValueError, match="All objects passed were None"):
|
||||
concat([None, None])
|
||||
|
||||
def test_concat_keys_with_none(self):
|
||||
# #1649
|
||||
df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])
|
||||
|
||||
result = concat({"a": None, "b": df0, "c": df0[:2], "d": df0[:1], "e": df0})
|
||||
expected = concat({"b": df0, "c": df0[:2], "d": df0[:1], "e": df0})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat(
|
||||
[None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"]
|
||||
)
|
||||
expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_bug_1719(self):
|
||||
ts1 = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
ts2 = ts1.copy()[::2]
|
||||
|
||||
# to join with union
|
||||
# these two are of different length!
|
||||
left = concat([ts1, ts2], join="outer", axis=1)
|
||||
right = concat([ts2, ts1], join="outer", axis=1)
|
||||
|
||||
assert len(left) == len(right)
|
||||
|
||||
def test_concat_bug_2972(self):
|
||||
ts0 = Series(np.zeros(5))
|
||||
ts1 = Series(np.ones(5))
|
||||
ts0.name = ts1.name = "same name"
|
||||
result = concat([ts0, ts1], axis=1)
|
||||
|
||||
expected = DataFrame({0: ts0, 1: ts1})
|
||||
expected.columns = ["same name", "same name"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_bug_3602(self):
|
||||
# GH 3602, duplicate columns
|
||||
df1 = DataFrame(
|
||||
{
|
||||
"firmNo": [0, 0, 0, 0],
|
||||
"prc": [6, 6, 6, 6],
|
||||
"stringvar": ["rrr", "rrr", "rrr", "rrr"],
|
||||
}
|
||||
)
|
||||
df2 = DataFrame(
|
||||
{"C": [9, 10, 11, 12], "misc": [1, 2, 3, 4], "prc": [6, 6, 6, 6]}
|
||||
)
|
||||
expected = DataFrame(
|
||||
[
|
||||
[0, 6, "rrr", 9, 1, 6],
|
||||
[0, 6, "rrr", 10, 2, 6],
|
||||
[0, 6, "rrr", 11, 3, 6],
|
||||
[0, 6, "rrr", 12, 4, 6],
|
||||
]
|
||||
)
|
||||
expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"]
|
||||
|
||||
result = concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_iterables(self):
|
||||
# GH8645 check concat works with tuples, list, generators, and weird
|
||||
# stuff like deque and custom iterables
|
||||
df1 = DataFrame([1, 2, 3])
|
||||
df2 = DataFrame([4, 5, 6])
|
||||
expected = DataFrame([1, 2, 3, 4, 5, 6])
|
||||
tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected)
|
||||
tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected)
|
||||
tm.assert_frame_equal(
|
||||
concat((df for df in (df1, df2)), ignore_index=True), expected
|
||||
)
|
||||
tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected)
|
||||
|
||||
class CustomIterator1:
|
||||
def __len__(self) -> int:
|
||||
return 2
|
||||
|
||||
def __getitem__(self, index):
|
||||
try:
|
||||
return {0: df1, 1: df2}[index]
|
||||
except KeyError as err:
|
||||
raise IndexError from err
|
||||
|
||||
tm.assert_frame_equal(concat(CustomIterator1(), ignore_index=True), expected)
|
||||
|
||||
class CustomIterator2(abc.Iterable):
|
||||
def __iter__(self) -> Iterator:
|
||||
yield df1
|
||||
yield df2
|
||||
|
||||
tm.assert_frame_equal(concat(CustomIterator2(), ignore_index=True), expected)
|
||||
|
||||
def test_concat_order(self):
|
||||
# GH 17344, GH#47331
|
||||
dfs = [DataFrame(index=range(3), columns=["a", 1, None])]
|
||||
dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for _ in range(100)]
|
||||
|
||||
result = concat(dfs, sort=True).columns
|
||||
expected = Index([1, "a", None])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_concat_different_extension_dtypes_upcasts(self):
|
||||
a = Series(pd.array([1, 2], dtype="Int64"))
|
||||
b = Series(to_decimal([1, 2]))
|
||||
|
||||
result = concat([a, b], ignore_index=True)
|
||||
expected = Series([1, 2, Decimal(1), Decimal(2)], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_ordered_dict(self):
|
||||
# GH 21510
|
||||
expected = concat(
|
||||
[Series(range(3)), Series(range(4))], keys=["First", "Another"]
|
||||
)
|
||||
result = concat({"First": Series(range(3)), "Another": Series(range(4))})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_duplicate_indices_raise(self):
|
||||
# GH 45888: test raise for concat DataFrames with duplicate indices
|
||||
# https://github.com/pandas-dev/pandas/issues/36263
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal(5),
|
||||
index=[0, 1, 2, 3, 3],
|
||||
columns=["a"],
|
||||
)
|
||||
df2 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal(5),
|
||||
index=[0, 1, 2, 2, 4],
|
||||
columns=["b"],
|
||||
)
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
concat([df1, df2], axis=1)
|
||||
|
||||
|
||||
def test_concat_no_unnecessary_upcast(float_numpy_dtype, frame_or_series):
|
||||
# GH 13247
|
||||
dims = frame_or_series(dtype=object).ndim
|
||||
dt = float_numpy_dtype
|
||||
|
||||
dfs = [
|
||||
frame_or_series(np.array([1], dtype=dt, ndmin=dims)),
|
||||
frame_or_series(np.array([np.nan], dtype=dt, ndmin=dims)),
|
||||
frame_or_series(np.array([5], dtype=dt, ndmin=dims)),
|
||||
]
|
||||
x = concat(dfs)
|
||||
assert x.values.dtype == dt
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pdt", [Series, DataFrame])
|
||||
def test_concat_will_upcast(pdt, any_signed_int_numpy_dtype):
|
||||
dt = any_signed_int_numpy_dtype
|
||||
dims = pdt().ndim
|
||||
dfs = [
|
||||
pdt(np.array([1], dtype=dt, ndmin=dims)),
|
||||
pdt(np.array([np.nan], ndmin=dims)),
|
||||
pdt(np.array([5], dtype=dt, ndmin=dims)),
|
||||
]
|
||||
x = concat(dfs)
|
||||
assert x.values.dtype == "float64"
|
||||
|
||||
|
||||
def test_concat_empty_and_non_empty_frame_regression():
|
||||
# GH 18178 regression test
|
||||
df1 = DataFrame({"foo": [1]})
|
||||
df2 = DataFrame({"foo": []})
|
||||
expected = DataFrame({"foo": [1.0]})
|
||||
result = concat([df1, df2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_sparse():
|
||||
# GH 23557
|
||||
a = Series(SparseArray([0, 1, 2]))
|
||||
expected = DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype(
|
||||
pd.SparseDtype(np.int64, 0)
|
||||
)
|
||||
result = concat([a, a], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_dense_sparse():
|
||||
# GH 30668
|
||||
dtype = pd.SparseDtype(np.float64, None)
|
||||
a = Series(pd.arrays.SparseArray([1, None]), dtype=dtype)
|
||||
b = Series([1], dtype=float)
|
||||
expected = Series(data=[1, None, 1], index=[0, 1, 0]).astype(dtype)
|
||||
result = concat([a, b], axis=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]])
|
||||
def test_duplicate_keys(keys):
|
||||
# GH 33654
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
s1 = Series([7, 8, 9], name="c")
|
||||
s2 = Series([10, 11, 12], name="d")
|
||||
result = concat([df, s1, s2], axis=1, keys=keys)
|
||||
expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]]
|
||||
expected_columns = MultiIndex.from_tuples(
|
||||
[(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")]
|
||||
)
|
||||
expected = DataFrame(expected_values, columns=expected_columns)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_keys_same_frame():
|
||||
# GH 43595
|
||||
keys = ["e", "e"]
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
result = concat([df, df], axis=1, keys=keys)
|
||||
expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]]
|
||||
expected_columns = MultiIndex.from_tuples(
|
||||
[(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")]
|
||||
)
|
||||
expected = DataFrame(expected_values, columns=expected_columns)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"obj",
|
||||
[
|
||||
tm.SubclassedDataFrame({"A": np.arange(0, 10)}),
|
||||
tm.SubclassedSeries(np.arange(0, 10), name="A"),
|
||||
],
|
||||
)
|
||||
def test_concat_preserves_subclass(obj):
|
||||
# GH28330 -- preserve subclass
|
||||
|
||||
result = concat([obj, obj])
|
||||
assert isinstance(result, type(obj))
|
||||
|
||||
|
||||
def test_concat_frame_axis0_extension_dtypes():
|
||||
# preserve extension dtype (through common_dtype mechanism)
|
||||
df1 = DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")})
|
||||
df2 = DataFrame({"a": np.array([4, 5, 6])})
|
||||
|
||||
result = concat([df1, df2], ignore_index=True)
|
||||
expected = DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat([df2, df1], ignore_index=True)
|
||||
expected = DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_preserves_extension_int64_dtype():
|
||||
# GH 24768
|
||||
df_a = DataFrame({"a": [-1]}, dtype="Int64")
|
||||
df_b = DataFrame({"b": [1]}, dtype="Int64")
|
||||
result = concat([df_a, df_b], ignore_index=True)
|
||||
expected = DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype1,dtype2,expected_dtype",
|
||||
[
|
||||
("bool", "bool", "bool"),
|
||||
("boolean", "bool", "boolean"),
|
||||
("bool", "boolean", "boolean"),
|
||||
("boolean", "boolean", "boolean"),
|
||||
],
|
||||
)
|
||||
def test_concat_bool_types(dtype1, dtype2, expected_dtype):
|
||||
# GH 42800
|
||||
ser1 = Series([True, False], dtype=dtype1)
|
||||
ser2 = Series([False, True], dtype=dtype2)
|
||||
result = concat([ser1, ser2], ignore_index=True)
|
||||
expected = Series([True, False, False, True], dtype=expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("keys", "integrity"),
|
||||
[
|
||||
(["red"] * 3, True),
|
||||
(["red"] * 3, False),
|
||||
(["red", "blue", "red"], False),
|
||||
(["red", "blue", "red"], True),
|
||||
],
|
||||
)
|
||||
def test_concat_repeated_keys(keys, integrity):
|
||||
# GH: 20816
|
||||
series_list = [Series({"a": 1}), Series({"b": 2}), Series({"c": 3})]
|
||||
result = concat(series_list, keys=keys, verify_integrity=integrity)
|
||||
tuples = list(zip(keys, ["a", "b", "c"]))
|
||||
expected = Series([1, 2, 3], index=MultiIndex.from_tuples(tuples))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_null_object_with_dti():
|
||||
# GH#40841
|
||||
dti = pd.DatetimeIndex(
|
||||
["2021-04-08 21:21:14+00:00"], dtype="datetime64[ns, UTC]", name="Time (UTC)"
|
||||
)
|
||||
right = DataFrame(data={"C": [0.5274]}, index=dti)
|
||||
|
||||
idx = Index([None], dtype="object", name="Maybe Time (UTC)")
|
||||
left = DataFrame(data={"A": [None], "B": [np.nan]}, index=idx)
|
||||
|
||||
result = concat([left, right], axis="columns")
|
||||
|
||||
exp_index = Index([None, dti[0]], dtype=object)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": np.array([None, np.nan], dtype=object),
|
||||
"B": [np.nan, np.nan],
|
||||
"C": [np.nan, 0.5274],
|
||||
},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_multiindex_with_empty_rangeindex():
|
||||
# GH#41234
|
||||
mi = MultiIndex.from_tuples([("B", 1), ("C", 1)])
|
||||
df1 = DataFrame([[1, 2]], columns=mi)
|
||||
df2 = DataFrame(index=[1], columns=pd.RangeIndex(0))
|
||||
|
||||
result = concat([df1, df2])
|
||||
expected = DataFrame([[1, 2], [np.nan, np.nan]], columns=mi)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
Series(data=[1, 2]),
|
||||
DataFrame(
|
||||
data={
|
||||
"col1": [1, 2],
|
||||
}
|
||||
),
|
||||
DataFrame(dtype=float),
|
||||
Series(dtype=float),
|
||||
],
|
||||
)
|
||||
def test_concat_drop_attrs(data):
|
||||
# GH#41828
|
||||
df1 = data.copy()
|
||||
df1.attrs = {1: 1}
|
||||
df2 = data.copy()
|
||||
df2.attrs = {1: 2}
|
||||
df = concat([df1, df2])
|
||||
assert len(df.attrs) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
Series(data=[1, 2]),
|
||||
DataFrame(
|
||||
data={
|
||||
"col1": [1, 2],
|
||||
}
|
||||
),
|
||||
DataFrame(dtype=float),
|
||||
Series(dtype=float),
|
||||
],
|
||||
)
|
||||
def test_concat_retain_attrs(data):
|
||||
# GH#41828
|
||||
df1 = data.copy()
|
||||
df1.attrs = {1: 1}
|
||||
df2 = data.copy()
|
||||
df2.attrs = {1: 1}
|
||||
df = concat([df1, df2])
|
||||
assert df.attrs[1] == 1
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
@pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"])
|
||||
@pytest.mark.parametrize("empty_dtype", [None, "float64", "object"])
|
||||
def test_concat_ignore_empty_object_float(empty_dtype, df_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/45637
|
||||
df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype)
|
||||
empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype)
|
||||
|
||||
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
|
||||
warn = None
|
||||
if df_dtype == "datetime64[ns]" or (
|
||||
df_dtype == "float64" and empty_dtype != "float64"
|
||||
):
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = concat([empty, df])
|
||||
expected = df
|
||||
if df_dtype == "int64":
|
||||
# TODO what exact behaviour do we want for integer eventually?
|
||||
if empty_dtype == "float64":
|
||||
expected = df.astype("float64")
|
||||
else:
|
||||
expected = df.astype("object")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
@pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"])
|
||||
@pytest.mark.parametrize("empty_dtype", [None, "float64", "object"])
|
||||
def test_concat_ignore_all_na_object_float(empty_dtype, df_dtype):
|
||||
df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype)
|
||||
empty = DataFrame({"foo": [np.nan], "bar": [np.nan]}, dtype=empty_dtype)
|
||||
|
||||
if df_dtype == "int64":
|
||||
# TODO what exact behaviour do we want for integer eventually?
|
||||
if empty_dtype == "object":
|
||||
df_dtype = "object"
|
||||
else:
|
||||
df_dtype = "float64"
|
||||
|
||||
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
|
||||
warn = None
|
||||
if empty_dtype != df_dtype and empty_dtype is not None:
|
||||
warn = FutureWarning
|
||||
elif df_dtype == "datetime64[ns]":
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = concat([empty, df], ignore_index=True)
|
||||
|
||||
expected = DataFrame({"foo": [np.nan, 1, 2], "bar": [np.nan, 1, 2]}, dtype=df_dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_concat_ignore_empty_from_reindex():
|
||||
# https://github.com/pandas-dev/pandas/pull/43507#issuecomment-920375856
|
||||
df1 = DataFrame({"a": [1], "b": [pd.Timestamp("2012-01-01")]})
|
||||
df2 = DataFrame({"a": [2]})
|
||||
|
||||
aligned = df2.reindex(columns=df1.columns)
|
||||
|
||||
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = concat([df1, aligned], ignore_index=True)
|
||||
expected = df1 = DataFrame({"a": [1, 2], "b": [pd.Timestamp("2012-01-01"), pd.NaT]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_mismatched_keys_length():
|
||||
# GH#43485
|
||||
ser = Series(range(5))
|
||||
sers = [ser + n for n in range(4)]
|
||||
keys = ["A", "B", "C"]
|
||||
|
||||
msg = r"The behavior of pd.concat with len\(keys\) != len\(objs\) is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
concat(sers, keys=keys, axis=1)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
concat(sers, keys=keys, axis=0)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
concat((x for x in sers), keys=(y for y in keys), axis=1)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
concat((x for x in sers), keys=(y for y in keys), axis=0)
|
||||
|
||||
|
||||
def test_concat_multiindex_with_category():
|
||||
df1 = DataFrame(
|
||||
{
|
||||
"c1": Series(list("abc"), dtype="category"),
|
||||
"c2": Series(list("eee"), dtype="category"),
|
||||
"i2": Series([1, 2, 3]),
|
||||
}
|
||||
)
|
||||
df1 = df1.set_index(["c1", "c2"])
|
||||
df2 = DataFrame(
|
||||
{
|
||||
"c1": Series(list("abc"), dtype="category"),
|
||||
"c2": Series(list("eee"), dtype="category"),
|
||||
"i2": Series([4, 5, 6]),
|
||||
}
|
||||
)
|
||||
df2 = df2.set_index(["c1", "c2"])
|
||||
result = concat([df1, df2])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"c1": Series(list("abcabc"), dtype="category"),
|
||||
"c2": Series(list("eeeeee"), dtype="category"),
|
||||
"i2": Series([1, 2, 3, 4, 5, 6]),
|
||||
}
|
||||
)
|
||||
expected = expected.set_index(["c1", "c2"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_ea_upcast():
|
||||
# GH#54848
|
||||
df1 = DataFrame(["a"], dtype="string")
|
||||
df2 = DataFrame([1], dtype="Int64")
|
||||
result = concat([df1, df2])
|
||||
expected = DataFrame(["a", 1], index=[0, 0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_none_with_timezone_timestamp():
|
||||
# GH#52093
|
||||
df1 = DataFrame([{"A": None}])
|
||||
df2 = DataFrame([{"A": pd.Timestamp("1990-12-20 00:00:00+00:00")}])
|
||||
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = concat([df1, df2], ignore_index=True)
|
||||
expected = DataFrame({"A": [None, pd.Timestamp("1990-12-20 00:00:00+00:00")]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,230 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameConcat:
|
||||
def test_concat_multiple_frames_dtypes(self):
|
||||
# GH#2759
|
||||
df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
|
||||
df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
|
||||
results = concat((df1, df2), axis=1).dtypes
|
||||
expected = Series(
|
||||
[np.dtype("float64")] * 2 + [np.dtype("float32")] * 2,
|
||||
index=["foo", "bar", 0, 1],
|
||||
)
|
||||
tm.assert_series_equal(results, expected)
|
||||
|
||||
def test_concat_tuple_keys(self):
|
||||
# GH#14438
|
||||
df1 = DataFrame(np.ones((2, 2)), columns=list("AB"))
|
||||
df2 = DataFrame(np.ones((3, 2)) * 2, columns=list("AB"))
|
||||
results = concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": {
|
||||
("bee", "bah", 0): 1.0,
|
||||
("bee", "bah", 1): 1.0,
|
||||
("bee", "boo", 0): 2.0,
|
||||
("bee", "boo", 1): 2.0,
|
||||
("bee", "boo", 2): 2.0,
|
||||
},
|
||||
"B": {
|
||||
("bee", "bah", 0): 1.0,
|
||||
("bee", "bah", 1): 1.0,
|
||||
("bee", "boo", 0): 2.0,
|
||||
("bee", "boo", 1): 2.0,
|
||||
("bee", "boo", 2): 2.0,
|
||||
},
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(results, expected)
|
||||
|
||||
def test_concat_named_keys(self):
|
||||
# GH#14252
|
||||
df = DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]})
|
||||
index = Index(["a", "b"], name="baz")
|
||||
concatted_named_from_keys = concat([df, df], keys=index)
|
||||
expected_named = DataFrame(
|
||||
{"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
|
||||
index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]),
|
||||
)
|
||||
tm.assert_frame_equal(concatted_named_from_keys, expected_named)
|
||||
|
||||
index_no_name = Index(["a", "b"], name=None)
|
||||
concatted_named_from_names = concat([df, df], keys=index_no_name, names=["baz"])
|
||||
tm.assert_frame_equal(concatted_named_from_names, expected_named)
|
||||
|
||||
concatted_unnamed = concat([df, df], keys=index_no_name)
|
||||
expected_unnamed = DataFrame(
|
||||
{"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
|
||||
index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]),
|
||||
)
|
||||
tm.assert_frame_equal(concatted_unnamed, expected_unnamed)
|
||||
|
||||
def test_concat_axis_parameter(self):
|
||||
# GH#14369
|
||||
df1 = DataFrame({"A": [0.1, 0.2]}, index=range(2))
|
||||
df2 = DataFrame({"A": [0.3, 0.4]}, index=range(2))
|
||||
|
||||
# Index/row/0 DataFrame
|
||||
expected_index = DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1])
|
||||
|
||||
concatted_index = concat([df1, df2], axis="index")
|
||||
tm.assert_frame_equal(concatted_index, expected_index)
|
||||
|
||||
concatted_row = concat([df1, df2], axis="rows")
|
||||
tm.assert_frame_equal(concatted_row, expected_index)
|
||||
|
||||
concatted_0 = concat([df1, df2], axis=0)
|
||||
tm.assert_frame_equal(concatted_0, expected_index)
|
||||
|
||||
# Columns/1 DataFrame
|
||||
expected_columns = DataFrame(
|
||||
[[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"]
|
||||
)
|
||||
|
||||
concatted_columns = concat([df1, df2], axis="columns")
|
||||
tm.assert_frame_equal(concatted_columns, expected_columns)
|
||||
|
||||
concatted_1 = concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(concatted_1, expected_columns)
|
||||
|
||||
series1 = Series([0.1, 0.2])
|
||||
series2 = Series([0.3, 0.4])
|
||||
|
||||
# Index/row/0 Series
|
||||
expected_index_series = Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1])
|
||||
|
||||
concatted_index_series = concat([series1, series2], axis="index")
|
||||
tm.assert_series_equal(concatted_index_series, expected_index_series)
|
||||
|
||||
concatted_row_series = concat([series1, series2], axis="rows")
|
||||
tm.assert_series_equal(concatted_row_series, expected_index_series)
|
||||
|
||||
concatted_0_series = concat([series1, series2], axis=0)
|
||||
tm.assert_series_equal(concatted_0_series, expected_index_series)
|
||||
|
||||
# Columns/1 Series
|
||||
expected_columns_series = DataFrame(
|
||||
[[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1]
|
||||
)
|
||||
|
||||
concatted_columns_series = concat([series1, series2], axis="columns")
|
||||
tm.assert_frame_equal(concatted_columns_series, expected_columns_series)
|
||||
|
||||
concatted_1_series = concat([series1, series2], axis=1)
|
||||
tm.assert_frame_equal(concatted_1_series, expected_columns_series)
|
||||
|
||||
# Testing ValueError
|
||||
with pytest.raises(ValueError, match="No axis named"):
|
||||
concat([series1, series2], axis="something")
|
||||
|
||||
def test_concat_numerical_names(self):
|
||||
# GH#15262, GH#12223
|
||||
df = DataFrame(
|
||||
{"col": range(9)},
|
||||
dtype="int32",
|
||||
index=(
|
||||
pd.MultiIndex.from_product(
|
||||
[["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2]
|
||||
)
|
||||
),
|
||||
)
|
||||
result = concat((df.iloc[:2, :], df.iloc[-2:, :]))
|
||||
expected = DataFrame(
|
||||
{"col": [0, 1, 7, 8]},
|
||||
dtype="int32",
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_astype_dup_col(self):
|
||||
# GH#23049
|
||||
df = DataFrame([{"a": "b"}])
|
||||
df = concat([df, df], axis=1)
|
||||
|
||||
result = df.astype("category")
|
||||
expected = DataFrame(
|
||||
np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"]
|
||||
).astype("category")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_dataframe_keys_bug(self, sort):
|
||||
t1 = DataFrame(
|
||||
{"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))}
|
||||
)
|
||||
t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))})
|
||||
|
||||
# it works
|
||||
result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort)
|
||||
assert list(result.columns) == [("t1", "value"), ("t2", "value")]
|
||||
|
||||
def test_concat_bool_with_int(self):
|
||||
# GH#42092 we may want to change this to return object, but that
|
||||
# would need a deprecation
|
||||
df1 = DataFrame(Series([True, False, True, True], dtype="bool"))
|
||||
df2 = DataFrame(Series([1, 0, 1], dtype="int64"))
|
||||
|
||||
result = concat([df1, df2])
|
||||
expected = concat([df1.astype("int64"), df2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_duplicates_in_index_with_keys(self):
|
||||
# GH#42651
|
||||
index = [1, 1, 3]
|
||||
data = [1, 2, 3]
|
||||
|
||||
df = DataFrame(data=data, index=index)
|
||||
result = concat([df], keys=["A"], names=["ID", "date"])
|
||||
mi = pd.MultiIndex.from_product([["A"], index], names=["ID", "date"])
|
||||
expected = DataFrame(data=data, index=mi)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date"))
|
||||
|
||||
@pytest.mark.parametrize("ignore_index", [True, False])
|
||||
@pytest.mark.parametrize("order", ["C", "F"])
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write):
|
||||
# based on asv ConcatDataFrames
|
||||
df = DataFrame(np.zeros((10, 5), dtype=np.float32, order=order))
|
||||
|
||||
res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True)
|
||||
|
||||
if not using_copy_on_write:
|
||||
for arr in res._iter_column_arrays():
|
||||
for arr2 in df._iter_column_arrays():
|
||||
assert not np.shares_memory(arr, arr2)
|
||||
|
||||
def test_outer_sort_columns(self):
|
||||
# GH#47127
|
||||
df1 = DataFrame({"A": [0], "B": [1], 0: 1})
|
||||
df2 = DataFrame({"A": [100]})
|
||||
result = concat([df1, df2], ignore_index=True, join="outer", sort=True)
|
||||
expected = DataFrame({0: [1.0, np.nan], "A": [0, 100], "B": [1.0, np.nan]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_inner_sort_columns(self):
|
||||
# GH#47127
|
||||
df1 = DataFrame({"A": [0], "B": [1], 0: 1})
|
||||
df2 = DataFrame({"A": [100], 0: 2})
|
||||
result = concat([df1, df2], ignore_index=True, join="inner", sort=True)
|
||||
expected = DataFrame({0: [1, 2], "A": [0, 100]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_sort_columns_one_df(self):
|
||||
# GH#47127
|
||||
df1 = DataFrame({"A": [100], 0: 2})
|
||||
result = concat([df1], ignore_index=True, join="inner", sort=True)
|
||||
expected = DataFrame({0: [2], "A": [100]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,606 @@
|
||||
import datetime as dt
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
to_timedelta,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeConcat:
|
||||
def test_concat_datetime64_block(self):
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
|
||||
df = DataFrame({"time": rng})
|
||||
|
||||
result = concat([df, df])
|
||||
assert (result.iloc[:10]["time"] == rng).all()
|
||||
assert (result.iloc[10:]["time"] == rng).all()
|
||||
|
||||
def test_concat_datetime_datetime64_frame(self):
|
||||
# GH#2624
|
||||
rows = []
|
||||
rows.append([datetime(2010, 1, 1), 1])
|
||||
rows.append([datetime(2010, 1, 2), "hi"])
|
||||
|
||||
df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
|
||||
|
||||
ind = date_range(start="2000/1/1", freq="D", periods=10)
|
||||
df1 = DataFrame({"date": ind, "test": range(10)})
|
||||
|
||||
# it works!
|
||||
concat([df1, df2_obj])
|
||||
|
||||
def test_concat_datetime_timezone(self):
|
||||
# GH 18523
|
||||
idx1 = date_range("2011-01-01", periods=3, freq="h", tz="Europe/Paris")
|
||||
idx2 = date_range(start=idx1[0], end=idx1[-1], freq="h")
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=idx1)
|
||||
df2 = DataFrame({"b": [1, 2, 3]}, index=idx2)
|
||||
result = concat([df1, df2], axis=1)
|
||||
|
||||
exp_idx = DatetimeIndex(
|
||||
[
|
||||
"2011-01-01 00:00:00+01:00",
|
||||
"2011-01-01 01:00:00+01:00",
|
||||
"2011-01-01 02:00:00+01:00",
|
||||
],
|
||||
dtype="M8[ns, Europe/Paris]",
|
||||
freq="h",
|
||||
)
|
||||
expected = DataFrame(
|
||||
[[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"]
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx3 = date_range("2011-01-01", periods=3, freq="h", tz="Asia/Tokyo")
|
||||
df3 = DataFrame({"b": [1, 2, 3]}, index=idx3)
|
||||
result = concat([df1, df3], axis=1)
|
||||
|
||||
exp_idx = DatetimeIndex(
|
||||
[
|
||||
"2010-12-31 15:00:00+00:00",
|
||||
"2010-12-31 16:00:00+00:00",
|
||||
"2010-12-31 17:00:00+00:00",
|
||||
"2010-12-31 23:00:00+00:00",
|
||||
"2011-01-01 00:00:00+00:00",
|
||||
"2011-01-01 01:00:00+00:00",
|
||||
]
|
||||
).as_unit("ns")
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[np.nan, 1],
|
||||
[np.nan, 2],
|
||||
[np.nan, 3],
|
||||
[1, np.nan],
|
||||
[2, np.nan],
|
||||
[3, np.nan],
|
||||
],
|
||||
index=exp_idx,
|
||||
columns=["a", "b"],
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH 13783: Concat after resample
|
||||
result = concat([df1.resample("h").mean(), df2.resample("h").mean()], sort=True)
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]},
|
||||
index=idx1.append(idx1),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_datetimeindex_freq(self):
|
||||
# GH 3232
|
||||
# Monotonic index result
|
||||
dr = date_range("01-Jan-2013", periods=100, freq="50ms", tz="UTC")
|
||||
data = list(range(100))
|
||||
expected = DataFrame(data, index=dr)
|
||||
result = concat([expected[:50], expected[50:]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Non-monotonic index result
|
||||
result = concat([expected[50:], expected[:50]])
|
||||
expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50]))
|
||||
expected.index._data.freq = None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_multiindex_datetime_object_index(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/11058
|
||||
idx = Index(
|
||||
[dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)],
|
||||
dtype="object",
|
||||
)
|
||||
|
||||
s = Series(
|
||||
["a", "b"],
|
||||
index=MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2],
|
||||
idx[:-1],
|
||||
],
|
||||
names=["first", "second"],
|
||||
),
|
||||
)
|
||||
s2 = Series(
|
||||
["a", "b"],
|
||||
index=MultiIndex.from_arrays(
|
||||
[[1, 2], idx[::2]],
|
||||
names=["first", "second"],
|
||||
),
|
||||
)
|
||||
mi = MultiIndex.from_arrays(
|
||||
[[1, 2, 2], idx],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert mi.levels[1].dtype == object
|
||||
|
||||
expected = DataFrame(
|
||||
[["a", "a"], ["b", np.nan], [np.nan, "b"]],
|
||||
index=mi,
|
||||
)
|
||||
result = concat([s, s2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_NaT_series(self):
|
||||
# GH 11693
|
||||
# test for merging NaT series with datetime series.
|
||||
x = Series(
|
||||
date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern")
|
||||
)
|
||||
y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
|
||||
expected = Series([x[0], x[1], pd.NaT, pd.NaT])
|
||||
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# all NaT with tz
|
||||
expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]")
|
||||
result = concat([y, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_NaT_series2(self):
|
||||
# without tz
|
||||
x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h"))
|
||||
y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h"))
|
||||
y[:] = pd.NaT
|
||||
expected = Series([x[0], x[1], pd.NaT, pd.NaT])
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# all NaT without tz
|
||||
x[:] = pd.NaT
|
||||
expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "UTC"])
|
||||
def test_concat_NaT_dataframes(self, tz):
|
||||
# GH 12396
|
||||
|
||||
dti = DatetimeIndex([pd.NaT, pd.NaT], tz=tz)
|
||||
first = DataFrame({0: dti})
|
||||
second = DataFrame(
|
||||
[[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]],
|
||||
index=[2, 3],
|
||||
)
|
||||
expected = DataFrame(
|
||||
[
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
Timestamp("2015/01/01", tz=tz),
|
||||
Timestamp("2016/01/01", tz=tz),
|
||||
]
|
||||
)
|
||||
|
||||
result = concat([first, second], axis=0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz1", [None, "UTC"])
|
||||
@pytest.mark.parametrize("tz2", [None, "UTC"])
|
||||
@pytest.mark.parametrize("item", [pd.NaT, Timestamp("20150101")])
|
||||
def test_concat_NaT_dataframes_all_NaT_axis_0(
|
||||
self, tz1, tz2, item, using_array_manager
|
||||
):
|
||||
# GH 12396
|
||||
|
||||
# tz-naive
|
||||
first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1))
|
||||
second = DataFrame([item]).apply(lambda x: x.dt.tz_localize(tz2))
|
||||
|
||||
result = concat([first, second], axis=0)
|
||||
expected = DataFrame(Series([pd.NaT, pd.NaT, item], index=[0, 1, 0]))
|
||||
expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
|
||||
if tz1 != tz2:
|
||||
expected = expected.astype(object)
|
||||
if item is pd.NaT and not using_array_manager:
|
||||
# GH#18463
|
||||
# TODO: setting nan here is to keep the test passing as we
|
||||
# make assert_frame_equal stricter, but is nan really the
|
||||
# ideal behavior here?
|
||||
if tz1 is not None:
|
||||
expected.iloc[-1, 0] = np.nan
|
||||
else:
|
||||
expected.iloc[:-1, 0] = np.nan
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz1", [None, "UTC"])
|
||||
@pytest.mark.parametrize("tz2", [None, "UTC"])
|
||||
def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
|
||||
# GH 12396
|
||||
|
||||
first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
|
||||
second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1])
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
|
||||
1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2),
|
||||
}
|
||||
)
|
||||
result = concat([first, second], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz1", [None, "UTC"])
|
||||
@pytest.mark.parametrize("tz2", [None, "UTC"])
|
||||
def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
|
||||
# GH 12396
|
||||
|
||||
# tz-naive
|
||||
first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
|
||||
second = DataFrame(
|
||||
[
|
||||
[Timestamp("2015/01/01", tz=tz2)],
|
||||
[Timestamp("2016/01/01", tz=tz2)],
|
||||
],
|
||||
index=[2, 3],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
Timestamp("2015/01/01", tz=tz2),
|
||||
Timestamp("2016/01/01", tz=tz2),
|
||||
]
|
||||
)
|
||||
if tz1 != tz2:
|
||||
expected = expected.astype(object)
|
||||
|
||||
result = concat([first, second])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestTimezoneConcat:
|
||||
def test_concat_tz_series(self):
|
||||
# gh-11755: tz and no tz
|
||||
x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
|
||||
y = Series(date_range("2012-01-01", "2012-01-02"))
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_tz_series2(self):
|
||||
# gh-11887: concat tz and object
|
||||
x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
|
||||
y = Series(["a", "b"])
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_tz_series3(self, unit, unit2):
|
||||
# see gh-12217 and gh-12306
|
||||
# Concatenating two UTC times
|
||||
first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]")
|
||||
first[0] = first[0].dt.tz_localize("UTC")
|
||||
|
||||
second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]")
|
||||
second[0] = second[0].dt.tz_localize("UTC")
|
||||
|
||||
result = concat([first, second])
|
||||
exp_unit = tm.get_finest_unit(unit, unit2)
|
||||
assert result[0].dtype == f"datetime64[{exp_unit}, UTC]"
|
||||
|
||||
def test_concat_tz_series4(self, unit, unit2):
|
||||
# Concatenating two London times
|
||||
first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]")
|
||||
first[0] = first[0].dt.tz_localize("Europe/London")
|
||||
|
||||
second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]")
|
||||
second[0] = second[0].dt.tz_localize("Europe/London")
|
||||
|
||||
result = concat([first, second])
|
||||
exp_unit = tm.get_finest_unit(unit, unit2)
|
||||
assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]"
|
||||
|
||||
def test_concat_tz_series5(self, unit, unit2):
|
||||
# Concatenating 2+1 London times
|
||||
first = DataFrame(
|
||||
[[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]], dtype=f"M8[{unit}]"
|
||||
)
|
||||
first[0] = first[0].dt.tz_localize("Europe/London")
|
||||
|
||||
second = DataFrame([[datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]")
|
||||
second[0] = second[0].dt.tz_localize("Europe/London")
|
||||
|
||||
result = concat([first, second])
|
||||
exp_unit = tm.get_finest_unit(unit, unit2)
|
||||
assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]"
|
||||
|
||||
def test_concat_tz_series6(self, unit, unit2):
|
||||
# Concatenating 1+2 London times
|
||||
first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]")
|
||||
first[0] = first[0].dt.tz_localize("Europe/London")
|
||||
|
||||
second = DataFrame(
|
||||
[[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]"
|
||||
)
|
||||
second[0] = second[0].dt.tz_localize("Europe/London")
|
||||
|
||||
result = concat([first, second])
|
||||
exp_unit = tm.get_finest_unit(unit, unit2)
|
||||
assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]"
|
||||
|
||||
def test_concat_tz_series_tzlocal(self):
|
||||
# see gh-13583
|
||||
x = [
|
||||
Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()),
|
||||
Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()),
|
||||
]
|
||||
y = [
|
||||
Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()),
|
||||
Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()),
|
||||
]
|
||||
|
||||
result = concat([Series(x), Series(y)], ignore_index=True)
|
||||
tm.assert_series_equal(result, Series(x + y))
|
||||
assert result.dtype == "datetime64[ns, tzlocal()]"
|
||||
|
||||
def test_concat_tz_series_with_datetimelike(self):
|
||||
# see gh-12620: tz and timedelta
|
||||
x = [
|
||||
Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
Timestamp("2011-02-01", tz="US/Eastern"),
|
||||
]
|
||||
y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
|
||||
result = concat([Series(x), Series(y)], ignore_index=True)
|
||||
tm.assert_series_equal(result, Series(x + y, dtype="object"))
|
||||
|
||||
# tz and period
|
||||
y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")]
|
||||
result = concat([Series(x), Series(y)], ignore_index=True)
|
||||
tm.assert_series_equal(result, Series(x + y, dtype="object"))
|
||||
|
||||
def test_concat_tz_frame(self):
|
||||
df2 = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz="US/Eastern"),
|
||||
"B": Timestamp("20130603", tz="CET"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
# concat
|
||||
df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
|
||||
tm.assert_frame_equal(df2, df3)
|
||||
|
||||
def test_concat_multiple_tzs(self):
|
||||
# GH#12467
|
||||
# combining datetime tz-aware and naive DataFrames
|
||||
ts1 = Timestamp("2015-01-01", tz=None)
|
||||
ts2 = Timestamp("2015-01-01", tz="UTC")
|
||||
ts3 = Timestamp("2015-01-01", tz="EST")
|
||||
|
||||
df1 = DataFrame({"time": [ts1]})
|
||||
df2 = DataFrame({"time": [ts2]})
|
||||
df3 = DataFrame({"time": [ts3]})
|
||||
|
||||
results = concat([df1, df2]).reset_index(drop=True)
|
||||
expected = DataFrame({"time": [ts1, ts2]}, dtype=object)
|
||||
tm.assert_frame_equal(results, expected)
|
||||
|
||||
results = concat([df1, df3]).reset_index(drop=True)
|
||||
expected = DataFrame({"time": [ts1, ts3]}, dtype=object)
|
||||
tm.assert_frame_equal(results, expected)
|
||||
|
||||
results = concat([df2, df3]).reset_index(drop=True)
|
||||
expected = DataFrame({"time": [ts2, ts3]})
|
||||
tm.assert_frame_equal(results, expected)
|
||||
|
||||
def test_concat_multiindex_with_tz(self):
|
||||
# GH 6606
|
||||
df = DataFrame(
|
||||
{
|
||||
"dt": DatetimeIndex(
|
||||
[
|
||||
datetime(2014, 1, 1),
|
||||
datetime(2014, 1, 2),
|
||||
datetime(2014, 1, 3),
|
||||
],
|
||||
dtype="M8[ns, US/Pacific]",
|
||||
),
|
||||
"b": ["A", "B", "C"],
|
||||
"c": [1, 2, 3],
|
||||
"d": [4, 5, 6],
|
||||
}
|
||||
)
|
||||
df = df.set_index(["dt", "b"])
|
||||
|
||||
exp_idx1 = DatetimeIndex(
|
||||
["2014-01-01", "2014-01-02", "2014-01-03"] * 2,
|
||||
dtype="M8[ns, US/Pacific]",
|
||||
name="dt",
|
||||
)
|
||||
exp_idx2 = Index(["A", "B", "C"] * 2, name="b")
|
||||
exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
|
||||
expected = DataFrame(
|
||||
{"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"]
|
||||
)
|
||||
|
||||
result = concat([df, df])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_tz_not_aligned(self):
|
||||
# GH#22796
|
||||
ts = pd.to_datetime([1, 2]).tz_localize("UTC")
|
||||
a = DataFrame({"A": ts})
|
||||
b = DataFrame({"A": ts, "B": ts})
|
||||
result = concat([a, b], sort=True, ignore_index=True)
|
||||
expected = DataFrame(
|
||||
{"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"t1",
|
||||
[
|
||||
"2015-01-01",
|
||||
pytest.param(
|
||||
pd.NaT,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="GH23037 incorrect dtype when concatenating"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_concat_tz_NaT(self, t1):
|
||||
# GH#22796
|
||||
# Concatenating tz-aware multicolumn DataFrames
|
||||
ts1 = Timestamp(t1, tz="UTC")
|
||||
ts2 = Timestamp("2015-01-01", tz="UTC")
|
||||
ts3 = Timestamp("2015-01-01", tz="UTC")
|
||||
|
||||
df1 = DataFrame([[ts1, ts2]])
|
||||
df2 = DataFrame([[ts3]])
|
||||
|
||||
result = concat([df1, df2])
|
||||
expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_tz_with_empty(self):
|
||||
# GH 9188
|
||||
result = concat(
|
||||
[DataFrame(date_range("2000", periods=1, tz="UTC")), DataFrame()]
|
||||
)
|
||||
expected = DataFrame(date_range("2000", periods=1, tz="UTC"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestPeriodConcat:
|
||||
def test_concat_period_series(self):
|
||||
x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
|
||||
y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D"))
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_period_multiple_freq_series(self):
|
||||
x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
|
||||
y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M"))
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == "object"
|
||||
|
||||
def test_concat_period_other_series(self):
|
||||
x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
|
||||
y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M"))
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == "object"
|
||||
|
||||
def test_concat_period_other_series2(self):
|
||||
# non-period
|
||||
x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
|
||||
y = Series(DatetimeIndex(["2015-11-01", "2015-12-01"]))
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == "object"
|
||||
|
||||
def test_concat_period_other_series3(self):
|
||||
x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
|
||||
y = Series(["A", "B"])
|
||||
expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
|
||||
result = concat([x, y], ignore_index=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == "object"
|
||||
|
||||
|
||||
def test_concat_timedelta64_block():
|
||||
rng = to_timedelta(np.arange(10), unit="s")
|
||||
|
||||
df = DataFrame({"time": rng})
|
||||
|
||||
result = concat([df, df])
|
||||
tm.assert_frame_equal(result.iloc[:10], df)
|
||||
tm.assert_frame_equal(result.iloc[10:], df)
|
||||
|
||||
|
||||
def test_concat_multiindex_datetime_nat():
|
||||
# GH#44900
|
||||
left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)]))
|
||||
right = DataFrame(
|
||||
{"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
|
||||
)
|
||||
result = concat([left, right], axis="columns")
|
||||
expected = DataFrame(
|
||||
{"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_float_datetime64(using_array_manager):
|
||||
# GH#32934
|
||||
df_time = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")})
|
||||
df_float = DataFrame({"A": pd.array([1.0], dtype="float64")})
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
pd.array(["2000"], dtype="datetime64[ns]")[0],
|
||||
pd.array([1.0], dtype="float64")[0],
|
||||
]
|
||||
},
|
||||
index=[0, 0],
|
||||
)
|
||||
result = concat([df_time, df_float])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame({"A": pd.array([], dtype="object")})
|
||||
result = concat([df_time.iloc[:0], df_float.iloc[:0]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame({"A": pd.array([1.0], dtype="object")})
|
||||
result = concat([df_time.iloc[:0], df_float])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
if not using_array_manager:
|
||||
expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")})
|
||||
msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = concat([df_time, df_float.iloc[:0]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}).astype(
|
||||
{"A": "object"}
|
||||
)
|
||||
result = concat([df_time, df_float.iloc[:0]])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,295 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
RangeIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestEmptyConcat:
|
||||
def test_handle_empty_objects(self, sort, using_infer_string):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)), columns=list("abcd")
|
||||
)
|
||||
|
||||
dfcopy = df[:5].copy()
|
||||
dfcopy["foo"] = "bar"
|
||||
empty = df[5:5]
|
||||
|
||||
frames = [dfcopy, empty, empty, df[5:]]
|
||||
concatted = concat(frames, axis=0, sort=sort)
|
||||
|
||||
expected = df.reindex(columns=["a", "b", "c", "d", "foo"])
|
||||
expected["foo"] = expected["foo"].astype(
|
||||
object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
expected.loc[0:4, "foo"] = "bar"
|
||||
|
||||
tm.assert_frame_equal(concatted, expected)
|
||||
|
||||
# empty as first element with time series
|
||||
# GH3259
|
||||
df = DataFrame(
|
||||
{"A": range(10000)}, index=date_range("20130101", periods=10000, freq="s")
|
||||
)
|
||||
empty = DataFrame()
|
||||
result = concat([df, empty], axis=1)
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = concat([empty, df], axis=1)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = concat([df, empty])
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = concat([empty, df])
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_concat_empty_series(self):
|
||||
# GH 11082
|
||||
s1 = Series([1, 2, 3], name="x")
|
||||
s2 = Series(name="y", dtype="float64")
|
||||
res = concat([s1, s2], axis=1)
|
||||
exp = DataFrame(
|
||||
{"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]},
|
||||
index=RangeIndex(3),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
s1 = Series([1, 2, 3], name="x")
|
||||
s2 = Series(name="y", dtype="float64")
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = concat([s1, s2], axis=0)
|
||||
# name will be reset
|
||||
exp = Series([1, 2, 3])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# empty Series with no name
|
||||
s1 = Series([1, 2, 3], name="x")
|
||||
s2 = Series(name=None, dtype="float64")
|
||||
res = concat([s1, s2], axis=1)
|
||||
exp = DataFrame(
|
||||
{"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
|
||||
columns=["x", 0],
|
||||
index=RangeIndex(3),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "UTC"])
|
||||
@pytest.mark.parametrize("values", [[], [1, 2, 3]])
|
||||
def test_concat_empty_series_timelike(self, tz, values):
|
||||
# GH 18447
|
||||
|
||||
first = Series([], dtype="M8[ns]").dt.tz_localize(tz)
|
||||
dtype = None if values else np.float64
|
||||
second = Series(values, dtype=dtype)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz),
|
||||
1: values,
|
||||
}
|
||||
)
|
||||
result = concat([first, second], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left,right,expected",
|
||||
[
|
||||
# booleans
|
||||
(np.bool_, np.int32, np.object_), # changed from int32 in 2.0 GH#39817
|
||||
(np.bool_, np.float32, np.object_),
|
||||
# datetime-like
|
||||
("m8[ns]", np.bool_, np.object_),
|
||||
("m8[ns]", np.int64, np.object_),
|
||||
("M8[ns]", np.bool_, np.object_),
|
||||
("M8[ns]", np.int64, np.object_),
|
||||
# categorical
|
||||
("category", "category", "category"),
|
||||
("category", "object", "object"),
|
||||
],
|
||||
)
|
||||
def test_concat_empty_series_dtypes(self, left, right, expected):
|
||||
# GH#39817, GH#45101
|
||||
result = concat([Series(dtype=left), Series(dtype=right)])
|
||||
assert result.dtype == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]
|
||||
)
|
||||
def test_concat_empty_series_dtypes_match_roundtrips(self, dtype):
|
||||
dtype = np.dtype(dtype)
|
||||
|
||||
result = concat([Series(dtype=dtype)])
|
||||
assert result.dtype == dtype
|
||||
|
||||
result = concat([Series(dtype=dtype), Series(dtype=dtype)])
|
||||
assert result.dtype == dtype
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int8", "uint8", "m8[ns]", "M8[ns]"])
|
||||
@pytest.mark.parametrize(
|
||||
"dtype2",
|
||||
["float64", "int8", "uint8", "m8[ns]", "M8[ns]"],
|
||||
)
|
||||
def test_concat_empty_series_dtypes_roundtrips(self, dtype, dtype2):
|
||||
# round-tripping with self & like self
|
||||
if dtype == dtype2:
|
||||
pytest.skip("same dtype is not applicable for test")
|
||||
|
||||
def int_result_type(dtype, dtype2):
|
||||
typs = {dtype.kind, dtype2.kind}
|
||||
if not len(typs - {"i", "u", "b"}) and (
|
||||
dtype.kind == "i" or dtype2.kind == "i"
|
||||
):
|
||||
return "i"
|
||||
elif not len(typs - {"u", "b"}) and (
|
||||
dtype.kind == "u" or dtype2.kind == "u"
|
||||
):
|
||||
return "u"
|
||||
return None
|
||||
|
||||
def float_result_type(dtype, dtype2):
|
||||
typs = {dtype.kind, dtype2.kind}
|
||||
if not len(typs - {"f", "i", "u"}) and (
|
||||
dtype.kind == "f" or dtype2.kind == "f"
|
||||
):
|
||||
return "f"
|
||||
return None
|
||||
|
||||
def get_result_type(dtype, dtype2):
|
||||
result = float_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
result = int_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
return "O"
|
||||
|
||||
dtype = np.dtype(dtype)
|
||||
dtype2 = np.dtype(dtype2)
|
||||
expected = get_result_type(dtype, dtype2)
|
||||
result = concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
|
||||
assert result.kind == expected
|
||||
|
||||
def test_concat_empty_series_dtypes_triple(self):
|
||||
assert (
|
||||
concat(
|
||||
[Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
|
||||
).dtype
|
||||
== np.object_
|
||||
)
|
||||
|
||||
def test_concat_empty_series_dtype_category_with_array(self):
|
||||
# GH#18515
|
||||
assert (
|
||||
concat(
|
||||
[Series(np.array([]), dtype="category"), Series(dtype="float64")]
|
||||
).dtype
|
||||
== "float64"
|
||||
)
|
||||
|
||||
def test_concat_empty_series_dtypes_sparse(self):
|
||||
result = concat(
|
||||
[
|
||||
Series(dtype="float64").astype("Sparse"),
|
||||
Series(dtype="float64").astype("Sparse"),
|
||||
]
|
||||
)
|
||||
assert result.dtype == "Sparse[float64]"
|
||||
|
||||
result = concat(
|
||||
[Series(dtype="float64").astype("Sparse"), Series(dtype="float64")]
|
||||
)
|
||||
expected = pd.SparseDtype(np.float64)
|
||||
assert result.dtype == expected
|
||||
|
||||
result = concat(
|
||||
[Series(dtype="float64").astype("Sparse"), Series(dtype="object")]
|
||||
)
|
||||
expected = pd.SparseDtype("object")
|
||||
assert result.dtype == expected
|
||||
|
||||
def test_concat_empty_df_object_dtype(self):
|
||||
# GH 9149
|
||||
df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
|
||||
df_2 = DataFrame(columns=df_1.columns)
|
||||
result = concat([df_1, df_2], axis=0)
|
||||
expected = df_1.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_empty_dataframe_dtypes(self):
|
||||
df = DataFrame(columns=list("abc"))
|
||||
df["a"] = df["a"].astype(np.bool_)
|
||||
df["b"] = df["b"].astype(np.int32)
|
||||
df["c"] = df["c"].astype(np.float64)
|
||||
|
||||
result = concat([df, df])
|
||||
assert result["a"].dtype == np.bool_
|
||||
assert result["b"].dtype == np.int32
|
||||
assert result["c"].dtype == np.float64
|
||||
|
||||
result = concat([df, df.astype(np.float64)])
|
||||
assert result["a"].dtype == np.object_
|
||||
assert result["b"].dtype == np.float64
|
||||
assert result["c"].dtype == np.float64
|
||||
|
||||
def test_concat_inner_join_empty(self):
|
||||
# GH 15328
|
||||
df_empty = DataFrame()
|
||||
df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
|
||||
df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64")
|
||||
|
||||
result = concat([df_a, df_empty], axis=1, join="inner")
|
||||
tm.assert_frame_equal(result, df_expected)
|
||||
|
||||
result = concat([df_a, df_empty], axis=1, join="outer")
|
||||
tm.assert_frame_equal(result, df_a)
|
||||
|
||||
def test_empty_dtype_coerce(self):
|
||||
# xref to #12411
|
||||
# xref to #12045
|
||||
# xref to #11594
|
||||
# see below
|
||||
|
||||
# 10571
|
||||
df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"])
|
||||
df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"])
|
||||
result = concat([df1, df2])
|
||||
expected = df1.dtypes
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
def test_concat_empty_dataframe(self):
|
||||
# 39037
|
||||
df1 = DataFrame(columns=["a", "b"])
|
||||
df2 = DataFrame(columns=["b", "c"])
|
||||
result = concat([df1, df2, df1])
|
||||
expected = DataFrame(columns=["a", "b", "c"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df3 = DataFrame(columns=["a", "b"])
|
||||
df4 = DataFrame(columns=["b"])
|
||||
result = concat([df3, df4])
|
||||
expected = DataFrame(columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_empty_dataframe_different_dtypes(self, using_infer_string):
|
||||
# 39037
|
||||
df1 = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
|
||||
df2 = DataFrame({"a": [1, 2, 3]})
|
||||
|
||||
result = concat([df1[:0], df2[:0]])
|
||||
assert result["a"].dtype == np.int64
|
||||
assert result["b"].dtype == np.object_ if not using_infer_string else "string"
|
||||
|
||||
def test_concat_to_empty_ea(self):
|
||||
"""48510 `concat` to an empty EA should maintain type EA dtype."""
|
||||
df_empty = DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())})
|
||||
df_new = DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())})
|
||||
expected = df_new.copy()
|
||||
result = concat([df_empty, df_new])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,472 @@
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIndexConcat:
|
||||
def test_concat_ignore_index(self, sort):
|
||||
frame1 = DataFrame(
|
||||
{"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]}
|
||||
)
|
||||
frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
|
||||
frame1.index = Index(["x", "y", "z"])
|
||||
frame2.index = Index(["x", "y", "q"])
|
||||
|
||||
v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort)
|
||||
|
||||
nan = np.nan
|
||||
expected = DataFrame(
|
||||
[
|
||||
[nan, nan, nan, 4.3],
|
||||
["a", 1, 4.5, 5.2],
|
||||
["b", 2, 3.2, 2.2],
|
||||
["c", 3, 1.2, nan],
|
||||
],
|
||||
index=Index(["q", "x", "y", "z"]),
|
||||
)
|
||||
if not sort:
|
||||
expected = expected.loc[["x", "y", "z", "q"]]
|
||||
|
||||
tm.assert_frame_equal(v1, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name_in1,name_in2,name_in3,name_out",
|
||||
[
|
||||
("idx", "idx", "idx", "idx"),
|
||||
("idx", "idx", None, None),
|
||||
("idx", None, None, None),
|
||||
("idx1", "idx2", None, None),
|
||||
("idx1", "idx1", "idx2", None),
|
||||
("idx1", "idx2", "idx3", None),
|
||||
(None, None, None, None),
|
||||
],
|
||||
)
|
||||
def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out):
|
||||
# GH13475
|
||||
indices = [
|
||||
Index(["a", "b", "c"], name=name_in1),
|
||||
Index(["b", "c", "d"], name=name_in2),
|
||||
Index(["c", "d", "e"], name=name_in3),
|
||||
]
|
||||
frames = [
|
||||
DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"])
|
||||
]
|
||||
result = concat(frames, axis=1)
|
||||
|
||||
exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"x": [0, 1, 2, np.nan, np.nan],
|
||||
"y": [np.nan, 0, 1, 2, np.nan],
|
||||
"z": [np.nan, np.nan, 0, 1, 2],
|
||||
},
|
||||
index=exp_ind,
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_rename_index(self):
|
||||
a = DataFrame(
|
||||
np.random.default_rng(2).random((3, 3)),
|
||||
columns=list("ABC"),
|
||||
index=Index(list("abc"), name="index_a"),
|
||||
)
|
||||
b = DataFrame(
|
||||
np.random.default_rng(2).random((3, 3)),
|
||||
columns=list("ABC"),
|
||||
index=Index(list("abc"), name="index_b"),
|
||||
)
|
||||
|
||||
result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"])
|
||||
|
||||
exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"])
|
||||
names = list(exp.index.names)
|
||||
names[1] = "lvl1"
|
||||
exp.index.set_names(names, inplace=True)
|
||||
|
||||
tm.assert_frame_equal(result, exp)
|
||||
assert result.index.names == exp.index.names
|
||||
|
||||
def test_concat_copy_index_series(self, axis, using_copy_on_write):
|
||||
# GH 29879
|
||||
ser = Series([1, 2])
|
||||
comb = concat([ser, ser], axis=axis, copy=True)
|
||||
if not using_copy_on_write or axis in [0, "index"]:
|
||||
assert comb.index is not ser.index
|
||||
else:
|
||||
assert comb.index is ser.index
|
||||
|
||||
def test_concat_copy_index_frame(self, axis, using_copy_on_write):
|
||||
# GH 29879
|
||||
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
|
||||
comb = concat([df, df], axis=axis, copy=True)
|
||||
if not using_copy_on_write:
|
||||
assert not comb.index.is_(df.index)
|
||||
assert not comb.columns.is_(df.columns)
|
||||
elif axis in [0, "index"]:
|
||||
assert not comb.index.is_(df.index)
|
||||
assert comb.columns.is_(df.columns)
|
||||
elif axis in [1, "columns"]:
|
||||
assert comb.index.is_(df.index)
|
||||
assert not comb.columns.is_(df.columns)
|
||||
|
||||
def test_default_index(self):
|
||||
# is_series and ignore_index
|
||||
s1 = Series([1, 2, 3], name="x")
|
||||
s2 = Series([4, 5, 6], name="y")
|
||||
res = concat([s1, s2], axis=1, ignore_index=True)
|
||||
assert isinstance(res.columns, pd.RangeIndex)
|
||||
exp = DataFrame([[1, 4], [2, 5], [3, 6]])
|
||||
# use check_index_type=True to check the result have
|
||||
# RangeIndex (default index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
|
||||
|
||||
# is_series and all inputs have no names
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
res = concat([s1, s2], axis=1, ignore_index=False)
|
||||
assert isinstance(res.columns, pd.RangeIndex)
|
||||
exp = DataFrame([[1, 4], [2, 5], [3, 6]])
|
||||
exp.columns = pd.RangeIndex(2)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
|
||||
|
||||
# is_dataframe and ignore_index
|
||||
df1 = DataFrame({"A": [1, 2], "B": [5, 6]})
|
||||
df2 = DataFrame({"A": [3, 4], "B": [7, 8]})
|
||||
|
||||
res = concat([df1, df2], axis=0, ignore_index=True)
|
||||
exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"])
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
|
||||
|
||||
res = concat([df1, df2], axis=1, ignore_index=True)
|
||||
exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
|
||||
|
||||
def test_dups_index(self):
|
||||
# GH 4771
|
||||
|
||||
# single dtypes
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 10, size=40).reshape(10, 4),
|
||||
columns=["A", "A", "C", "C"],
|
||||
)
|
||||
|
||||
result = concat([df, df], axis=1)
|
||||
tm.assert_frame_equal(result.iloc[:, :4], df)
|
||||
tm.assert_frame_equal(result.iloc[:, 4:], df)
|
||||
|
||||
result = concat([df, df], axis=0)
|
||||
tm.assert_frame_equal(result.iloc[:10], df)
|
||||
tm.assert_frame_equal(result.iloc[10:], df)
|
||||
|
||||
# multi dtypes
|
||||
df = concat(
|
||||
[
|
||||
DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=["A", "A", "B", "B"],
|
||||
),
|
||||
DataFrame(
|
||||
np.random.default_rng(2).integers(0, 10, size=20).reshape(10, 2),
|
||||
columns=["A", "C"],
|
||||
),
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
result = concat([df, df], axis=1)
|
||||
tm.assert_frame_equal(result.iloc[:, :6], df)
|
||||
tm.assert_frame_equal(result.iloc[:, 6:], df)
|
||||
|
||||
result = concat([df, df], axis=0)
|
||||
tm.assert_frame_equal(result.iloc[:10], df)
|
||||
tm.assert_frame_equal(result.iloc[10:], df)
|
||||
|
||||
# append
|
||||
result = df.iloc[0:8, :]._append(df.iloc[8:])
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[0:8, :]._append(df.iloc[8:9])._append(df.iloc[9:10])
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
expected = concat([df, df], axis=0)
|
||||
result = df._append(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestMultiIndexConcat:
|
||||
def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
index = frame.index
|
||||
result = concat([frame, frame], keys=[0, 1], names=["iteration"])
|
||||
|
||||
assert result.index.names == ("iteration",) + index.names
|
||||
tm.assert_frame_equal(result.loc[0], frame)
|
||||
tm.assert_frame_equal(result.loc[1], frame)
|
||||
assert result.index.nlevels == 3
|
||||
|
||||
def test_concat_multiindex_with_none_in_index_names(self):
|
||||
# GH 15787
|
||||
index = MultiIndex.from_product([[1], range(5)], names=["level1", None])
|
||||
df = DataFrame({"col": range(5)}, index=index, dtype=np.int32)
|
||||
|
||||
result = concat([df, df], keys=[1, 2], names=["level2"])
|
||||
index = MultiIndex.from_product(
|
||||
[[1, 2], [1], range(5)], names=["level2", "level1", None]
|
||||
)
|
||||
expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat([df, df[:2]], keys=[1, 2], names=["level2"])
|
||||
level2 = [1] * 5 + [2] * 2
|
||||
level1 = [1] * 7
|
||||
no_name = list(range(5)) + list(range(2))
|
||||
tuples = list(zip(level2, level1, no_name))
|
||||
index = MultiIndex.from_tuples(tuples, names=["level2", "level1", None])
|
||||
expected = DataFrame({"col": no_name}, index=index, dtype=np.int32)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_multiindex_rangeindex(self):
|
||||
# GH13542
|
||||
# when multi-index levels are RangeIndex objects
|
||||
# there is a bug in concat with objects of len 1
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((9, 2)))
|
||||
df.index = MultiIndex(
|
||||
levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
|
||||
codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)],
|
||||
)
|
||||
|
||||
res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
|
||||
exp = df.iloc[[2, 3, 4, 5], :]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_concat_multiindex_dfs_with_deepcopy(self):
|
||||
# GH 9967
|
||||
example_multiindex1 = MultiIndex.from_product([["a"], ["b"]])
|
||||
example_dataframe1 = DataFrame([0], index=example_multiindex1)
|
||||
|
||||
example_multiindex2 = MultiIndex.from_product([["a"], ["c"]])
|
||||
example_dataframe2 = DataFrame([1], index=example_multiindex2)
|
||||
|
||||
example_dict = {"s1": example_dataframe1, "s2": example_dataframe2}
|
||||
expected_index = MultiIndex(
|
||||
levels=[["s1", "s2"], ["a"], ["b", "c"]],
|
||||
codes=[[0, 1], [0, 0], [0, 1]],
|
||||
names=["testname", None, None],
|
||||
)
|
||||
expected = DataFrame([[0], [1]], index=expected_index)
|
||||
result_copy = concat(deepcopy(example_dict), names=["testname"])
|
||||
tm.assert_frame_equal(result_copy, expected)
|
||||
result_no_copy = concat(example_dict, names=["testname"])
|
||||
tm.assert_frame_equal(result_no_copy, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mi1_list",
|
||||
[
|
||||
[["a"], range(2)],
|
||||
[["b"], np.arange(2.0, 4.0)],
|
||||
[["c"], ["A", "B"]],
|
||||
[["d"], pd.date_range(start="2017", end="2018", periods=2)],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"mi2_list",
|
||||
[
|
||||
[["a"], range(2)],
|
||||
[["b"], np.arange(2.0, 4.0)],
|
||||
[["c"], ["A", "B"]],
|
||||
[["d"], pd.date_range(start="2017", end="2018", periods=2)],
|
||||
],
|
||||
)
|
||||
def test_concat_with_various_multiindex_dtypes(
|
||||
self, mi1_list: list, mi2_list: list
|
||||
):
|
||||
# GitHub #23478
|
||||
mi1 = MultiIndex.from_product(mi1_list)
|
||||
mi2 = MultiIndex.from_product(mi2_list)
|
||||
|
||||
df1 = DataFrame(np.zeros((1, len(mi1))), columns=mi1)
|
||||
df2 = DataFrame(np.zeros((1, len(mi2))), columns=mi2)
|
||||
|
||||
if mi1_list[0] == mi2_list[0]:
|
||||
expected_mi = MultiIndex(
|
||||
levels=[mi1_list[0], list(mi1_list[1])],
|
||||
codes=[[0, 0, 0, 0], [0, 1, 0, 1]],
|
||||
)
|
||||
else:
|
||||
expected_mi = MultiIndex(
|
||||
levels=[
|
||||
mi1_list[0] + mi2_list[0],
|
||||
list(mi1_list[1]) + list(mi2_list[1]),
|
||||
],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
)
|
||||
|
||||
expected_df = DataFrame(np.zeros((1, len(expected_mi))), columns=expected_mi)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
result_df = concat((df1, df2), axis=1)
|
||||
|
||||
tm.assert_frame_equal(expected_df, result_df)
|
||||
|
||||
def test_concat_multiindex_(self):
|
||||
# GitHub #44786
|
||||
df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"])
|
||||
df = concat([df], keys=["X"])
|
||||
|
||||
iterables = [["X"], ["1", "2", "2"]]
|
||||
result_index = df.index
|
||||
expected_index = MultiIndex.from_product(iterables)
|
||||
|
||||
tm.assert_index_equal(result_index, expected_index)
|
||||
|
||||
result_df = df
|
||||
expected_df = DataFrame(
|
||||
{"col": ["a", "b", "c"]}, index=MultiIndex.from_product(iterables)
|
||||
)
|
||||
tm.assert_frame_equal(result_df, expected_df)
|
||||
|
||||
def test_concat_with_key_not_unique(self):
|
||||
# GitHub #46519
|
||||
df1 = DataFrame({"name": [1]})
|
||||
df2 = DataFrame({"name": [2]})
|
||||
df3 = DataFrame({"name": [3]})
|
||||
df_a = concat([df1, df2, df3], keys=["x", "y", "x"])
|
||||
# the warning is caused by indexing unsorted multi-index
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, match="indexing past lexsort depth"
|
||||
):
|
||||
out_a = df_a.loc[("x", 0), :]
|
||||
|
||||
df_b = DataFrame(
|
||||
{"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)])
|
||||
)
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, match="indexing past lexsort depth"
|
||||
):
|
||||
out_b = df_b.loc[("x", 0)]
|
||||
|
||||
tm.assert_frame_equal(out_a, out_b)
|
||||
|
||||
df1 = DataFrame({"name": ["a", "a", "b"]})
|
||||
df2 = DataFrame({"name": ["a", "b"]})
|
||||
df3 = DataFrame({"name": ["c", "d"]})
|
||||
df_a = concat([df1, df2, df3], keys=["x", "y", "x"])
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, match="indexing past lexsort depth"
|
||||
):
|
||||
out_a = df_a.loc[("x", 0), :]
|
||||
|
||||
df_b = DataFrame(
|
||||
{
|
||||
"a": ["x", "x", "x", "y", "y", "x", "x"],
|
||||
"b": [0, 1, 2, 0, 1, 0, 1],
|
||||
"name": list("aababcd"),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
df_b.index.names = [None, None]
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, match="indexing past lexsort depth"
|
||||
):
|
||||
out_b = df_b.loc[("x", 0), :]
|
||||
|
||||
tm.assert_frame_equal(out_a, out_b)
|
||||
|
||||
def test_concat_with_duplicated_levels(self):
|
||||
# keyword levels should be unique
|
||||
df1 = DataFrame({"A": [1]}, index=["x"])
|
||||
df2 = DataFrame({"A": [1]}, index=["y"])
|
||||
msg = r"Level values not unique: \['x', 'y', 'y'\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]])
|
||||
|
||||
@pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]])
|
||||
def test_concat_with_levels_with_none_keys(self, levels):
|
||||
df1 = DataFrame({"A": [1]}, index=["x"])
|
||||
df2 = DataFrame({"A": [1]}, index=["y"])
|
||||
msg = "levels supported only when keys is not None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
concat([df1, df2], levels=levels)
|
||||
|
||||
def test_concat_range_index_result(self):
|
||||
# GH#47501
|
||||
df1 = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [1, 2]})
|
||||
|
||||
result = concat([df1, df2], sort=True, axis=1)
|
||||
expected = DataFrame({"a": [1, 2], "b": [1, 2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
expected_index = pd.RangeIndex(0, 2)
|
||||
tm.assert_index_equal(result.index, expected_index, exact=True)
|
||||
|
||||
def test_concat_index_keep_dtype(self):
|
||||
# GH#47329
|
||||
df1 = DataFrame([[0, 1, 1]], columns=Index([1, 2, 3], dtype="object"))
|
||||
df2 = DataFrame([[0, 1]], columns=Index([1, 2], dtype="object"))
|
||||
result = concat([df1, df2], ignore_index=True, join="outer", sort=True)
|
||||
expected = DataFrame(
|
||||
[[0, 1, 1.0], [0, 1, np.nan]], columns=Index([1, 2, 3], dtype="object")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_index_keep_dtype_ea_numeric(self, any_numeric_ea_dtype):
|
||||
# GH#47329
|
||||
df1 = DataFrame(
|
||||
[[0, 1, 1]], columns=Index([1, 2, 3], dtype=any_numeric_ea_dtype)
|
||||
)
|
||||
df2 = DataFrame([[0, 1]], columns=Index([1, 2], dtype=any_numeric_ea_dtype))
|
||||
result = concat([df1, df2], ignore_index=True, join="outer", sort=True)
|
||||
expected = DataFrame(
|
||||
[[0, 1, 1.0], [0, 1, np.nan]],
|
||||
columns=Index([1, 2, 3], dtype=any_numeric_ea_dtype),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["Int8", "Int16", "Int32"])
|
||||
def test_concat_index_find_common(self, dtype):
|
||||
# GH#47329
|
||||
df1 = DataFrame([[0, 1, 1]], columns=Index([1, 2, 3], dtype=dtype))
|
||||
df2 = DataFrame([[0, 1]], columns=Index([1, 2], dtype="Int32"))
|
||||
result = concat([df1, df2], ignore_index=True, join="outer", sort=True)
|
||||
expected = DataFrame(
|
||||
[[0, 1, 1.0], [0, 1, np.nan]], columns=Index([1, 2, 3], dtype="Int32")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string):
|
||||
# GH 46675
|
||||
s1 = Series(["a", "b", "c"])
|
||||
s2 = Series(["a", "b"])
|
||||
s3 = Series(["a", "b", "c", "d"])
|
||||
s4 = Series(
|
||||
[], dtype=object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
result = concat(
|
||||
[s1, s2, s3, s4], sort=False, join="outer", ignore_index=False, axis=1
|
||||
)
|
||||
expected = DataFrame(
|
||||
[
|
||||
["a"] * 3 + [np.nan],
|
||||
["b"] * 3 + [np.nan],
|
||||
["c", np.nan] * 2,
|
||||
[np.nan] * 2 + ["d"] + [np.nan],
|
||||
],
|
||||
dtype=object if not using_infer_string else "string[pyarrow_numpy]",
|
||||
)
|
||||
tm.assert_frame_equal(
|
||||
result, expected, check_index_type=True, check_column_type=True
|
||||
)
|
@ -0,0 +1,54 @@
|
||||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
concat,
|
||||
read_csv,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestInvalidConcat:
|
||||
@pytest.mark.parametrize("obj", [1, {}, [1, 2], (1, 2)])
|
||||
def test_concat_invalid(self, obj):
|
||||
# trying to concat a ndframe with a non-ndframe
|
||||
df1 = DataFrame(range(2))
|
||||
msg = (
|
||||
f"cannot concatenate object of type '{type(obj)}'; "
|
||||
"only Series and DataFrame objs are valid"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
concat([df1, obj])
|
||||
|
||||
def test_concat_invalid_first_argument(self):
|
||||
df1 = DataFrame(range(2))
|
||||
msg = (
|
||||
"first argument must be an iterable of pandas "
|
||||
'objects, you passed an object of type "DataFrame"'
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
concat(df1)
|
||||
|
||||
def test_concat_generator_obj(self):
|
||||
# generator ok though
|
||||
concat(DataFrame(np.random.default_rng(2).random((5, 5))) for _ in range(3))
|
||||
|
||||
def test_concat_textreader_obj(self):
|
||||
# text reader ok
|
||||
# GH6583
|
||||
data = """index,A,B,C,D
|
||||
foo,2,3,4,5
|
||||
bar,7,8,9,10
|
||||
baz,12,13,14,15
|
||||
qux,12,13,14,15
|
||||
foo2,12,13,14,15
|
||||
bar2,12,13,14,15
|
||||
"""
|
||||
|
||||
with read_csv(StringIO(data), chunksize=1) as reader:
|
||||
result = concat(reader, ignore_index=True)
|
||||
expected = read_csv(StringIO(data))
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,175 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesConcat:
|
||||
def test_concat_series(self):
|
||||
ts = Series(
|
||||
np.arange(20, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=20),
|
||||
name="foo",
|
||||
)
|
||||
ts.name = "foo"
|
||||
|
||||
pieces = [ts[:5], ts[5:15], ts[15:]]
|
||||
|
||||
result = concat(pieces)
|
||||
tm.assert_series_equal(result, ts)
|
||||
assert result.name == ts.name
|
||||
|
||||
result = concat(pieces, keys=[0, 1, 2])
|
||||
expected = ts.copy()
|
||||
|
||||
ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]"))
|
||||
|
||||
exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))]
|
||||
exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes)
|
||||
expected.index = exp_index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_empty_and_non_empty_series_regression(self):
|
||||
# GH 18187 regression test
|
||||
s1 = Series([1])
|
||||
s2 = Series([], dtype=object)
|
||||
|
||||
expected = s1
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = concat([s1, s2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_series_axis1(self):
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
|
||||
pieces = [ts[:-2], ts[2:], ts[2:-2]]
|
||||
|
||||
result = concat(pieces, axis=1)
|
||||
expected = DataFrame(pieces).T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat(pieces, keys=["A", "B", "C"], axis=1)
|
||||
expected = DataFrame(pieces, index=["A", "B", "C"]).T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_series_axis1_preserves_series_names(self):
|
||||
# preserve series names, #2489
|
||||
s = Series(np.random.default_rng(2).standard_normal(5), name="A")
|
||||
s2 = Series(np.random.default_rng(2).standard_normal(5), name="B")
|
||||
|
||||
result = concat([s, s2], axis=1)
|
||||
expected = DataFrame({"A": s, "B": s2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s2.name = None
|
||||
result = concat([s, s2], axis=1)
|
||||
tm.assert_index_equal(result.columns, Index(["A", 0], dtype="object"))
|
||||
|
||||
def test_concat_series_axis1_with_reindex(self, sort):
|
||||
# must reindex, #2603
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(3), index=["c", "a", "b"], name="A"
|
||||
)
|
||||
s2 = Series(
|
||||
np.random.default_rng(2).standard_normal(4),
|
||||
index=["d", "a", "b", "c"],
|
||||
name="B",
|
||||
)
|
||||
result = concat([s, s2], axis=1, sort=sort)
|
||||
expected = DataFrame({"A": s, "B": s2}, index=["c", "a", "b", "d"])
|
||||
if sort:
|
||||
expected = expected.sort_index()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_series_axis1_names_applied(self):
|
||||
# ensure names argument is not ignored on axis=1, #23490
|
||||
s = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
result = concat([s, s2], axis=1, keys=["a", "b"], names=["A"])
|
||||
expected = DataFrame(
|
||||
[[1, 4], [2, 5], [3, 6]], columns=Index(["a", "b"], name="A")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"])
|
||||
expected = DataFrame(
|
||||
[[1, 4], [2, 5], [3, 6]],
|
||||
columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_series_axis1_same_names_ignore_index(self):
|
||||
dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1]
|
||||
s1 = Series(
|
||||
np.random.default_rng(2).standard_normal(len(dates)),
|
||||
index=dates,
|
||||
name="value",
|
||||
)
|
||||
s2 = Series(
|
||||
np.random.default_rng(2).standard_normal(len(dates)),
|
||||
index=dates,
|
||||
name="value",
|
||||
)
|
||||
|
||||
result = concat([s1, s2], axis=1, ignore_index=True)
|
||||
expected = Index(range(2))
|
||||
|
||||
tm.assert_index_equal(result.columns, expected, exact=True)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s1name,s2name", [(np.int64(190), (43, 0)), (190, (43, 0))]
|
||||
)
|
||||
def test_concat_series_name_npscalar_tuple(self, s1name, s2name):
|
||||
# GH21015
|
||||
s1 = Series({"a": 1, "b": 2}, name=s1name)
|
||||
s2 = Series({"c": 5, "d": 6}, name=s2name)
|
||||
result = concat([s1, s2])
|
||||
expected = Series({"a": 1, "b": 2, "c": 5, "d": 6})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_series_partial_columns_names(self):
|
||||
# GH10698
|
||||
named_series = Series([1, 2], name="foo")
|
||||
unnamed_series1 = Series([1, 2])
|
||||
unnamed_series2 = Series([4, 5])
|
||||
|
||||
result = concat([named_series, unnamed_series1, unnamed_series2], axis=1)
|
||||
expected = DataFrame(
|
||||
{"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat(
|
||||
[named_series, unnamed_series1, unnamed_series2],
|
||||
axis=1,
|
||||
keys=["red", "blue", "yellow"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]},
|
||||
columns=["red", "blue", "yellow"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = concat(
|
||||
[named_series, unnamed_series1, unnamed_series2], axis=1, ignore_index=True
|
||||
)
|
||||
expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_series_length_one_reversed(self, frame_or_series):
|
||||
# GH39401
|
||||
obj = frame_or_series([100])
|
||||
result = concat([obj.iloc[::-1]])
|
||||
tm.assert_equal(result, obj)
|
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestConcatSort:
|
||||
def test_concat_sorts_columns(self, sort):
|
||||
# GH-4588
|
||||
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
|
||||
df2 = DataFrame({"a": [3, 4], "c": [5, 6]})
|
||||
|
||||
# for sort=True/None
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]},
|
||||
columns=["a", "b", "c"],
|
||||
)
|
||||
|
||||
if sort is False:
|
||||
expected = expected[["b", "a", "c"]]
|
||||
|
||||
# default
|
||||
with tm.assert_produces_warning(None):
|
||||
result = pd.concat([df1, df2], ignore_index=True, sort=sort)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_sorts_index(self, sort):
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"])
|
||||
df2 = DataFrame({"b": [1, 2]}, index=["a", "b"])
|
||||
|
||||
# For True/None
|
||||
expected = DataFrame(
|
||||
{"a": [2, 3, 1], "b": [1, 2, None]},
|
||||
index=["a", "b", "c"],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
if sort is False:
|
||||
expected = expected.loc[["c", "a", "b"]]
|
||||
|
||||
# Warn and sort by default
|
||||
with tm.assert_produces_warning(None):
|
||||
result = pd.concat([df1, df2], axis=1, sort=sort)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_inner_sort(self, sort):
|
||||
# https://github.com/pandas-dev/pandas/pull/20613
|
||||
df1 = DataFrame(
|
||||
{"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]
|
||||
)
|
||||
df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4])
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
# unset sort should *not* warn for inner join
|
||||
# since that never sorted
|
||||
result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True)
|
||||
|
||||
expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"])
|
||||
if sort is True:
|
||||
expected = expected[["a", "b"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_aligned_sort(self):
|
||||
# GH-4588
|
||||
df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"])
|
||||
result = pd.concat([df, df], sort=True, ignore_index=True)
|
||||
expected = DataFrame(
|
||||
{"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]},
|
||||
columns=["a", "b", "c"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.concat(
|
||||
[df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True
|
||||
)
|
||||
expected = expected[["b", "c"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_aligned_sort_does_not_raise(self):
|
||||
# GH-4588
|
||||
# We catch TypeErrors from sorting internally and do not re-raise.
|
||||
df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"])
|
||||
expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"])
|
||||
result = pd.concat([df, df], ignore_index=True, sort=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_frame_with_sort_false(self):
|
||||
# GH 43375
|
||||
result = pd.concat(
|
||||
[DataFrame({i: i}, index=[i]) for i in range(2, 0, -1)], sort=False
|
||||
)
|
||||
expected = DataFrame([[2, np.nan], [np.nan, 1]], index=[2, 1], columns=[2, 1])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH 37937
|
||||
df1 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[1, 2, 3])
|
||||
df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}, index=[3, 1, 6])
|
||||
result = pd.concat([df2, df1], axis=1, sort=False)
|
||||
expected = DataFrame(
|
||||
[
|
||||
[7.0, 10.0, 3.0, 6.0],
|
||||
[8.0, 11.0, 1.0, 4.0],
|
||||
[9.0, 12.0, np.nan, np.nan],
|
||||
[np.nan, np.nan, 2.0, 5.0],
|
||||
],
|
||||
index=[3, 1, 6, 2],
|
||||
columns=["c", "d", "a", "b"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_sort_none_raises(self):
|
||||
# GH#41518
|
||||
df = DataFrame({1: [1, 2], "a": [3, 4]})
|
||||
msg = "The 'sort' keyword only accepts boolean values; None was passed."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.concat([df, df], sort=None)
|
Reference in New Issue
Block a user