forked from Alsan/Post_finder
venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,50 @@
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
import pathlib
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.io.pytables import HDFStore
|
||||
|
||||
tables = pytest.importorskip("tables")
|
||||
# set these parameters so we don't have file sharing
|
||||
tables.parameters.MAX_NUMEXPR_THREADS = 1
|
||||
tables.parameters.MAX_BLOSC_THREADS = 1
|
||||
tables.parameters.MAX_THREADS = 1
|
||||
|
||||
|
||||
def safe_close(store):
|
||||
try:
|
||||
if store is not None:
|
||||
store.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# contextmanager to ensure the file cleanup
|
||||
@contextmanager
|
||||
def ensure_clean_store(
|
||||
path, mode="a", complevel=None, complib=None, fletcher32=False
|
||||
) -> Generator[HDFStore, None, None]:
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
tmp_path = pathlib.Path(tmpdirname, path)
|
||||
with HDFStore(
|
||||
tmp_path,
|
||||
mode=mode,
|
||||
complevel=complevel,
|
||||
complib=complib,
|
||||
fletcher32=fletcher32,
|
||||
) as store:
|
||||
yield store
|
||||
|
||||
|
||||
def _maybe_remove(store, key):
|
||||
"""
|
||||
For tests using tables, try removing the table to be sure there is
|
||||
no content from previous tests using the same table name.
|
||||
"""
|
||||
try:
|
||||
store.remove(key)
|
||||
except (ValueError, KeyError):
|
||||
pass
|
@ -0,0 +1,9 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_path():
|
||||
"""Fixture for setup path"""
|
||||
return f"tmp.__{uuid.uuid4()}__.h5"
|
@ -0,0 +1,986 @@
|
||||
import datetime
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
tables = pytest.importorskip("tables")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::tables.NaturalNameWarning")
|
||||
def test_append(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# this is allowed by almost always don't want to do it
|
||||
# tables.NaturalNameWarning):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=20, freq="B"),
|
||||
)
|
||||
_maybe_remove(store, "df1")
|
||||
store.append("df1", df[:10])
|
||||
store.append("df1", df[10:])
|
||||
tm.assert_frame_equal(store["df1"], df)
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.put("df2", df[:10], format="table")
|
||||
store.append("df2", df[10:])
|
||||
tm.assert_frame_equal(store["df2"], df)
|
||||
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("/df3", df[:10])
|
||||
store.append("/df3", df[10:])
|
||||
tm.assert_frame_equal(store["df3"], df)
|
||||
|
||||
# this is allowed by almost always don't want to do it
|
||||
# tables.NaturalNameWarning
|
||||
_maybe_remove(store, "/df3 foo")
|
||||
store.append("/df3 foo", df[:10])
|
||||
store.append("/df3 foo", df[10:])
|
||||
tm.assert_frame_equal(store["df3 foo"], df)
|
||||
|
||||
# dtype issues - mizxed type in a single object column
|
||||
df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
|
||||
df["mixed_column"] = "testing"
|
||||
df.loc[2, "mixed_column"] = np.nan
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
# uints - test storage of uints
|
||||
uint_data = DataFrame(
|
||||
{
|
||||
"u08": Series(
|
||||
np.random.default_rng(2).integers(0, high=255, size=5),
|
||||
dtype=np.uint8,
|
||||
),
|
||||
"u16": Series(
|
||||
np.random.default_rng(2).integers(0, high=65535, size=5),
|
||||
dtype=np.uint16,
|
||||
),
|
||||
"u32": Series(
|
||||
np.random.default_rng(2).integers(0, high=2**30, size=5),
|
||||
dtype=np.uint32,
|
||||
),
|
||||
"u64": Series(
|
||||
[2**58, 2**59, 2**60, 2**61, 2**62],
|
||||
dtype=np.uint64,
|
||||
),
|
||||
},
|
||||
index=np.arange(5),
|
||||
)
|
||||
_maybe_remove(store, "uints")
|
||||
store.append("uints", uint_data)
|
||||
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)
|
||||
|
||||
# uints - test storage of uints in indexable columns
|
||||
_maybe_remove(store, "uints")
|
||||
# 64-bit indices not yet supported
|
||||
store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])
|
||||
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_series(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# basic
|
||||
ss = Series(range(20), dtype=np.float64, index=[f"i_{i}" for i in range(20)])
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
ns = Series(np.arange(100))
|
||||
|
||||
store.append("ss", ss)
|
||||
result = store["ss"]
|
||||
tm.assert_series_equal(result, ss)
|
||||
assert result.name is None
|
||||
|
||||
store.append("ts", ts)
|
||||
result = store["ts"]
|
||||
tm.assert_series_equal(result, ts)
|
||||
assert result.name is None
|
||||
|
||||
ns.name = "foo"
|
||||
store.append("ns", ns)
|
||||
result = store["ns"]
|
||||
tm.assert_series_equal(result, ns)
|
||||
assert result.name == ns.name
|
||||
|
||||
# select on the values
|
||||
expected = ns[ns > 60]
|
||||
result = store.select("ns", "foo>60")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# select on the index and values
|
||||
expected = ns[(ns > 70) & (ns.index < 90)]
|
||||
result = store.select("ns", "foo>70 and index<90")
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# multi-index
|
||||
mi = DataFrame(np.random.default_rng(2).standard_normal((5, 1)), columns=["A"])
|
||||
mi["B"] = np.arange(len(mi))
|
||||
mi["C"] = "foo"
|
||||
mi.loc[3:5, "C"] = "bar"
|
||||
mi.set_index(["C", "B"], inplace=True)
|
||||
s = mi.stack(future_stack=True)
|
||||
s.index = s.index.droplevel(2)
|
||||
store.append("mi", s)
|
||||
tm.assert_series_equal(store["mi"], s, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_some_nans(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(np.random.default_rng(2).standard_normal(20)).astype(
|
||||
"int32"
|
||||
),
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
"B": "foo",
|
||||
"C": "bar",
|
||||
"D": Timestamp("2001-01-01").as_unit("ns"),
|
||||
"E": Timestamp("2001-01-02").as_unit("ns"),
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
# some nans
|
||||
_maybe_remove(store, "df1")
|
||||
df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan
|
||||
store.append("df1", df[:10])
|
||||
store.append("df1", df[10:])
|
||||
tm.assert_frame_equal(store["df1"], df, check_index_type=True)
|
||||
|
||||
# first column
|
||||
df1 = df.copy()
|
||||
df1["A1"] = np.nan
|
||||
_maybe_remove(store, "df1")
|
||||
store.append("df1", df1[:10])
|
||||
store.append("df1", df1[10:])
|
||||
tm.assert_frame_equal(store["df1"], df1, check_index_type=True)
|
||||
|
||||
# 2nd column
|
||||
df2 = df.copy()
|
||||
df2["A2"] = np.nan
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df2[:10])
|
||||
store.append("df2", df2[10:])
|
||||
tm.assert_frame_equal(store["df2"], df2, check_index_type=True)
|
||||
|
||||
# datetimes
|
||||
df3 = df.copy()
|
||||
df3["E"] = np.nan
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("df3", df3[:10])
|
||||
store.append("df3", df3[10:])
|
||||
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_all_nans(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
{
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
df.loc[0:15, :] = np.nan
|
||||
|
||||
# nan some entire rows (dropna=True)
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:10], dropna=True)
|
||||
store.append("df", df[10:], dropna=True)
|
||||
tm.assert_frame_equal(store["df"], df[-4:], check_index_type=True)
|
||||
|
||||
# nan some entire rows (dropna=False)
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df[:10], dropna=False)
|
||||
store.append("df2", df[10:], dropna=False)
|
||||
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
|
||||
|
||||
# tests the option io.hdf.dropna_table
|
||||
with pd.option_context("io.hdf.dropna_table", False):
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("df3", df[:10])
|
||||
store.append("df3", df[10:])
|
||||
tm.assert_frame_equal(store["df3"], df)
|
||||
|
||||
with pd.option_context("io.hdf.dropna_table", True):
|
||||
_maybe_remove(store, "df4")
|
||||
store.append("df4", df[:10])
|
||||
store.append("df4", df[10:])
|
||||
tm.assert_frame_equal(store["df4"], df[-4:])
|
||||
|
||||
# nan some entire rows (string are still written!)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
"B": "foo",
|
||||
"C": "bar",
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
|
||||
df.loc[0:15, :] = np.nan
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:10], dropna=True)
|
||||
store.append("df", df[10:], dropna=True)
|
||||
tm.assert_frame_equal(store["df"], df, check_index_type=True)
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df[:10], dropna=False)
|
||||
store.append("df2", df[10:], dropna=False)
|
||||
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
|
||||
|
||||
# nan some entire rows (but since we have dates they are still
|
||||
# written!)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
"B": "foo",
|
||||
"C": "bar",
|
||||
"D": Timestamp("2001-01-01").as_unit("ns"),
|
||||
"E": Timestamp("2001-01-02").as_unit("ns"),
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
|
||||
df.loc[0:15, :] = np.nan
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:10], dropna=True)
|
||||
store.append("df", df[10:], dropna=True)
|
||||
tm.assert_frame_equal(store["df"], df, check_index_type=True)
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df[:10], dropna=False)
|
||||
store.append("df2", df[10:], dropna=False)
|
||||
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_frame_column_oriented(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# column oriented
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df.index = df.index._with_freq(None) # freq doesn't round-trip
|
||||
|
||||
_maybe_remove(store, "df1")
|
||||
store.append("df1", df.iloc[:, :2], axes=["columns"])
|
||||
store.append("df1", df.iloc[:, 2:])
|
||||
tm.assert_frame_equal(store["df1"], df)
|
||||
|
||||
result = store.select("df1", "columns=A")
|
||||
expected = df.reindex(columns=["A"])
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
# selection on the non-indexable
|
||||
result = store.select("df1", ("columns=A", "index=df.index[0:4]"))
|
||||
expected = df.reindex(columns=["A"], index=df.index[0:4])
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
# this isn't supported
|
||||
msg = re.escape(
|
||||
"passing a filterable condition to a non-table indexer "
|
||||
"[Filter: Not Initialized]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select("df1", "columns=A and index>df.index[4]")
|
||||
|
||||
|
||||
def test_append_with_different_block_ordering(setup_path):
|
||||
# GH 4096; using same frames, but different block orderings
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
for i in range(10):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB")
|
||||
)
|
||||
df["index"] = range(10)
|
||||
df["index"] += i * 10
|
||||
df["int64"] = Series([1] * len(df), dtype="int64")
|
||||
df["int16"] = Series([1] * len(df), dtype="int16")
|
||||
|
||||
if i % 2 == 0:
|
||||
del df["int64"]
|
||||
df["int64"] = Series([1] * len(df), dtype="int64")
|
||||
if i % 3 == 0:
|
||||
a = df.pop("A")
|
||||
df["A"] = a
|
||||
|
||||
df.set_index("index", inplace=True)
|
||||
|
||||
store.append("df", df)
|
||||
|
||||
# test a different ordering but with more fields (like invalid
|
||||
# combinations)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)),
|
||||
columns=list("AB"),
|
||||
dtype="float64",
|
||||
)
|
||||
df["int64"] = Series([1] * len(df), dtype="int64")
|
||||
df["int16"] = Series([1] * len(df), dtype="int16")
|
||||
store.append("df", df)
|
||||
|
||||
# store additional fields in different blocks
|
||||
df["int16_2"] = Series([1] * len(df), dtype="int16")
|
||||
msg = re.escape(
|
||||
"cannot match existing table structure for [int16] on appending data"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# store multiple additional fields in different blocks
|
||||
df["float_3"] = Series([1.0] * len(df), dtype="float64")
|
||||
msg = re.escape(
|
||||
"cannot match existing table structure for [A,B] on appending data"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
|
||||
def test_append_with_strings(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
|
||||
def check_col(key, name, size):
|
||||
assert (
|
||||
getattr(store.get_storer(key).table.description, name).itemsize == size
|
||||
)
|
||||
|
||||
# avoid truncation on elements
|
||||
df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])
|
||||
store.append("df_big", df)
|
||||
tm.assert_frame_equal(store.select("df_big"), df)
|
||||
check_col("df_big", "values_block_1", 15)
|
||||
|
||||
# appending smaller string ok
|
||||
df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]])
|
||||
store.append("df_big", df2)
|
||||
expected = concat([df, df2])
|
||||
tm.assert_frame_equal(store.select("df_big"), expected)
|
||||
check_col("df_big", "values_block_1", 15)
|
||||
|
||||
# avoid truncation on elements
|
||||
df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])
|
||||
store.append("df_big2", df, min_itemsize={"values": 50})
|
||||
tm.assert_frame_equal(store.select("df_big2"), df)
|
||||
check_col("df_big2", "values_block_1", 50)
|
||||
|
||||
# bigger string on next append
|
||||
store.append("df_new", df)
|
||||
df_new = DataFrame([[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]])
|
||||
msg = (
|
||||
r"Trying to store a string with len \[26\] in "
|
||||
r"\[values_block_1\] column but\n"
|
||||
r"this column has a limit of \[15\]!\n"
|
||||
"Consider using min_itemsize to preset the sizes on these "
|
||||
"columns"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_new", df_new)
|
||||
|
||||
# min_itemsize on Series index (GH 11412)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
|
||||
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
|
||||
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype=object),
|
||||
"D": date_range("20130101", periods=5),
|
||||
}
|
||||
).set_index("C")
|
||||
store.append("ss", df["B"], min_itemsize={"index": 4})
|
||||
tm.assert_series_equal(store.select("ss"), df["B"])
|
||||
|
||||
# same as above, with data_columns=True
|
||||
store.append("ss2", df["B"], data_columns=True, min_itemsize={"index": 4})
|
||||
tm.assert_series_equal(store.select("ss2"), df["B"])
|
||||
|
||||
# min_itemsize in index without appending (GH 10381)
|
||||
store.put("ss3", df, format="table", min_itemsize={"index": 6})
|
||||
# just make sure there is a longer string:
|
||||
df2 = df.copy().reset_index().assign(C="longer").set_index("C")
|
||||
store.append("ss3", df2)
|
||||
tm.assert_frame_equal(store.select("ss3"), concat([df, df2]))
|
||||
|
||||
# same as above, with a Series
|
||||
store.put("ss4", df["B"], format="table", min_itemsize={"index": 6})
|
||||
store.append("ss4", df2["B"])
|
||||
tm.assert_series_equal(store.select("ss4"), concat([df["B"], df2["B"]]))
|
||||
|
||||
# with nans
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["string"] = "foo"
|
||||
df.loc[df.index[1:4], "string"] = np.nan
|
||||
df["string2"] = "bar"
|
||||
df.loc[df.index[4:8], "string2"] = np.nan
|
||||
df["string3"] = "bah"
|
||||
df.loc[df.index[1:], "string3"] = np.nan
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame({"A": "foo", "B": "bar"}, index=range(10))
|
||||
|
||||
# a min_itemsize that creates a data_column
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, min_itemsize={"A": 200})
|
||||
check_col("df", "A", 200)
|
||||
assert store.get_storer("df").data_columns == ["A"]
|
||||
|
||||
# a min_itemsize that creates a data_column2
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, data_columns=["B"], min_itemsize={"A": 200})
|
||||
check_col("df", "A", 200)
|
||||
assert store.get_storer("df").data_columns == ["B", "A"]
|
||||
|
||||
# a min_itemsize that creates a data_column2
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, data_columns=["B"], min_itemsize={"values": 200})
|
||||
check_col("df", "B", 200)
|
||||
check_col("df", "values_block_0", 200)
|
||||
assert store.get_storer("df").data_columns == ["B"]
|
||||
|
||||
# infer the .typ on subsequent appends
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:5], min_itemsize=200)
|
||||
store.append("df", df[5:], min_itemsize=200)
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
# invalid min_itemsize keys
|
||||
df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])
|
||||
_maybe_remove(store, "df")
|
||||
msg = re.escape(
|
||||
"min_itemsize has the key [foo] which is not an axis or data_column"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})
|
||||
|
||||
|
||||
def test_append_with_empty_string(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# with all empty strings (GH 12242)
|
||||
df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})
|
||||
store.append("df", df[:-1], min_itemsize={"x": 1})
|
||||
store.append("df", df[-1:], min_itemsize={"x": 1})
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
def test_append_with_data_columns(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df.iloc[0, df.columns.get_loc("B")] = 1.0
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:2], data_columns=["B"])
|
||||
store.append("df", df[2:])
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
# check that we have indices created
|
||||
assert store._handle.root.df.table.cols.index.is_indexed is True
|
||||
assert store._handle.root.df.table.cols.B.is_indexed is True
|
||||
|
||||
# data column searching
|
||||
result = store.select("df", "B>0")
|
||||
expected = df[df.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# data column searching (with an indexable and a data_columns)
|
||||
result = store.select("df", "B>0 and index>df.index[3]")
|
||||
df_new = df.reindex(index=df.index[4:])
|
||||
expected = df_new[df_new.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# data column selection with a string data_column
|
||||
df_new = df.copy()
|
||||
df_new["string"] = "foo"
|
||||
df_new.loc[df_new.index[1:4], "string"] = np.nan
|
||||
df_new.loc[df_new.index[5:6], "string"] = "bar"
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"])
|
||||
result = store.select("df", "string='foo'")
|
||||
expected = df_new[df_new.string == "foo"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# using min_itemsize and a data column
|
||||
def check_col(key, name, size):
|
||||
assert (
|
||||
getattr(store.get_storer(key).table.description, name).itemsize == size
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"], min_itemsize={"string": 30})
|
||||
check_col("df", "string", 30)
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"], min_itemsize=30)
|
||||
check_col("df", "string", 30)
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"], min_itemsize={"values": 30})
|
||||
check_col("df", "string", 30)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df_new["string2"] = "foobarbah"
|
||||
df_new["string_block1"] = "foobarbah1"
|
||||
df_new["string_block2"] = "foobarbah2"
|
||||
_maybe_remove(store, "df")
|
||||
store.append(
|
||||
"df",
|
||||
df_new,
|
||||
data_columns=["string", "string2"],
|
||||
min_itemsize={"string": 30, "string2": 40, "values": 50},
|
||||
)
|
||||
check_col("df", "string", 30)
|
||||
check_col("df", "string2", 40)
|
||||
check_col("df", "values_block_1", 50)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# multiple data columns
|
||||
df_new = df.copy()
|
||||
df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0
|
||||
df_new.iloc[0, df_new.columns.get_loc("B")] = -1.0
|
||||
df_new["string"] = "foo"
|
||||
|
||||
sl = df_new.columns.get_loc("string")
|
||||
df_new.iloc[1:4, sl] = np.nan
|
||||
df_new.iloc[5:6, sl] = "bar"
|
||||
|
||||
df_new["string2"] = "foo"
|
||||
sl = df_new.columns.get_loc("string2")
|
||||
df_new.iloc[2:5, sl] = np.nan
|
||||
df_new.iloc[7:8, sl] = "bar"
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["A", "B", "string", "string2"])
|
||||
result = store.select("df", "string='foo' and string2='foo' and A>0 and B<0")
|
||||
expected = df_new[
|
||||
(df_new.string == "foo")
|
||||
& (df_new.string2 == "foo")
|
||||
& (df_new.A > 0)
|
||||
& (df_new.B < 0)
|
||||
]
|
||||
tm.assert_frame_equal(result, expected, check_freq=False)
|
||||
# FIXME: 2020-05-07 freq check randomly fails in the CI
|
||||
|
||||
# yield an empty frame
|
||||
result = store.select("df", "string='foo' and string2='cool'")
|
||||
expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# doc example
|
||||
df_dc = df.copy()
|
||||
df_dc["string"] = "foo"
|
||||
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
|
||||
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
|
||||
df_dc["string2"] = "cool"
|
||||
df_dc["datetime"] = Timestamp("20010102").as_unit("ns")
|
||||
df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan
|
||||
|
||||
_maybe_remove(store, "df_dc")
|
||||
store.append(
|
||||
"df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"]
|
||||
)
|
||||
result = store.select("df_dc", "B>0")
|
||||
|
||||
expected = df_dc[df_dc.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"])
|
||||
expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
|
||||
tm.assert_frame_equal(result, expected, check_freq=False)
|
||||
# FIXME: 2020-12-07 intermittent build failures here with freq of
|
||||
# None instead of BDay(4)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# doc example part 2
|
||||
|
||||
index = date_range("1/1/2000", periods=8)
|
||||
df_dc = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 3)),
|
||||
index=index,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
df_dc["string"] = "foo"
|
||||
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
|
||||
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
|
||||
df_dc[["B", "C"]] = df_dc[["B", "C"]].abs()
|
||||
df_dc["string2"] = "cool"
|
||||
|
||||
# on-disk operations
|
||||
store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])
|
||||
|
||||
result = store.select("df_dc", "B>0")
|
||||
expected = df_dc[df_dc.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"'])
|
||||
expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.columns.name = None
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("mi", df)
|
||||
result = store.select("mi")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 3748
|
||||
result = store.select("mi", columns=["A", "B"])
|
||||
expected = df.reindex(columns=["A", "B"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
path = tmp_path / "test.hdf"
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
result = read_hdf(path, "df", columns=["A", "B"])
|
||||
expected = df.reindex(columns=["A", "B"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_misc(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
store.append("df", df, chunksize=1)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
store.append("df1", df, expectedrows=10)
|
||||
result = store.select("df1")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("chunksize", [10, 200, 1000])
|
||||
def test_append_misc_chunksize(setup_path, chunksize):
|
||||
# more chunksize in append tests
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["string"] = "foo"
|
||||
df["float322"] = 1.0
|
||||
df["float322"] = df["float322"].astype("float32")
|
||||
df["bool"] = df["float322"] > 0
|
||||
df["time1"] = Timestamp("20130101").as_unit("ns")
|
||||
df["time2"] = Timestamp("20130102").as_unit("ns")
|
||||
with ensure_clean_store(setup_path, mode="w") as store:
|
||||
store.append("obj", df, chunksize=chunksize)
|
||||
result = store.select("obj")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_append_misc_empty_frame(setup_path):
|
||||
# empty frame, GH4273
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# 0 len
|
||||
df_empty = DataFrame(columns=list("ABC"))
|
||||
store.append("df", df_empty)
|
||||
with pytest.raises(KeyError, match="'No object named df in the file'"):
|
||||
store.select("df")
|
||||
|
||||
# repeated append of 0/non-zero frames
|
||||
df = DataFrame(np.random.default_rng(2).random((10, 3)), columns=list("ABC"))
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
store.append("df", df_empty)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# store
|
||||
df = DataFrame(columns=list("ABC"))
|
||||
store.put("df2", df)
|
||||
tm.assert_frame_equal(store.select("df2"), df)
|
||||
|
||||
|
||||
# TODO(ArrayManager) currently we rely on falling back to BlockManager, but
|
||||
# the conversion from AM->BM converts the invalid object dtype column into
|
||||
# a datetime64 column no longer raising an error
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_append_raise(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# test append with invalid input to get good error messages
|
||||
|
||||
# list in column
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["invalid"] = [["a"]] * len(df)
|
||||
assert df.dtypes["invalid"] == np.object_
|
||||
msg = re.escape(
|
||||
"""Cannot serialize the column [invalid]
|
||||
because its data contents are not [string] but [mixed] object dtype"""
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# multiple invalid columns
|
||||
df["invalid2"] = [["a"]] * len(df)
|
||||
df["invalid3"] = [["a"]] * len(df)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# datetime with embedded nans as object
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
s = Series(datetime.datetime(2001, 1, 2), index=df.index)
|
||||
s = s.astype(object)
|
||||
s[0:5] = np.nan
|
||||
df["invalid"] = s
|
||||
assert df.dtypes["invalid"] == np.object_
|
||||
msg = "too many timezones in this block, create separate data columns"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# directly ndarray
|
||||
msg = "value must be None, Series, or DataFrame"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", np.arange(10))
|
||||
|
||||
# series directly
|
||||
msg = re.escape(
|
||||
"cannot properly create the storer for: "
|
||||
"[group->df,value-><class 'pandas.core.series.Series'>]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", Series(np.arange(10)))
|
||||
|
||||
# appending an incompatible table
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
store.append("df", df)
|
||||
|
||||
df["foo"] = "foo"
|
||||
msg = re.escape(
|
||||
"invalid combination of [non_index_axes] on appending data "
|
||||
"[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table "
|
||||
"[(1, ['A', 'B', 'C', 'D'])]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# incompatible type (GH 41897)
|
||||
_maybe_remove(store, "df")
|
||||
df["foo"] = Timestamp("20130101")
|
||||
store.append("df", df)
|
||||
df["foo"] = "bar"
|
||||
msg = re.escape(
|
||||
"invalid combination of [values_axes] on appending data "
|
||||
"[name->values_block_1,cname->values_block_1,"
|
||||
"dtype->bytes24,kind->string,shape->(1, 30)] "
|
||||
"vs current table "
|
||||
"[name->values_block_1,cname->values_block_1,"
|
||||
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
|
||||
def test_append_with_timedelta(setup_path):
|
||||
# GH 3577
|
||||
# append timedelta
|
||||
|
||||
ts = Timestamp("20130101").as_unit("ns")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ts,
|
||||
"B": [ts + timedelta(days=i, seconds=10) for i in range(10)],
|
||||
}
|
||||
)
|
||||
df["C"] = df["A"] - df["B"]
|
||||
df.loc[3:5, "C"] = np.nan
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# table
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, data_columns=True)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = store.select("df", where="C<100000")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = store.select("df", where="C<pd.Timedelta('-3D')")
|
||||
tm.assert_frame_equal(result, df.iloc[3:])
|
||||
|
||||
result = store.select("df", "C<'-3D'")
|
||||
tm.assert_frame_equal(result, df.iloc[3:])
|
||||
|
||||
# a bit hacky here as we don't really deal with the NaT properly
|
||||
|
||||
result = store.select("df", "C<'-500000s'")
|
||||
result = result.dropna(subset=["C"])
|
||||
tm.assert_frame_equal(result, df.iloc[6:])
|
||||
|
||||
result = store.select("df", "C<'-3.5D'")
|
||||
result = result.iloc[1:]
|
||||
tm.assert_frame_equal(result, df.iloc[4:])
|
||||
|
||||
# fixed
|
||||
_maybe_remove(store, "df2")
|
||||
store.put("df2", df)
|
||||
result = store.select("df2")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_append_to_multiple(setup_path):
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df2 = df1.copy().rename(columns="{}_2".format)
|
||||
df2["foo"] = "bar"
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# exceptions
|
||||
msg = "append_to_multiple requires a selector that is in passed dict"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append_to_multiple(
|
||||
{"df1": ["A", "B"], "df2": None}, df, selector="df3"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")
|
||||
|
||||
msg = (
|
||||
"append_to_multiple must have a dictionary specified as the way to "
|
||||
"split the value"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append_to_multiple("df1", df, "df1")
|
||||
|
||||
# regular operation
|
||||
store.append_to_multiple({"df1": ["A", "B"], "df2": None}, df, selector="df1")
|
||||
result = store.select_as_multiple(
|
||||
["df1", "df2"], where=["A>0", "B>0"], selector="df1"
|
||||
)
|
||||
expected = df[(df.A > 0) & (df.B > 0)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_to_multiple_dropna(setup_path):
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df2 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
).rename(columns="{}_2".format)
|
||||
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# dropna=True should guarantee rows are synchronized
|
||||
store.append_to_multiple(
|
||||
{"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True
|
||||
)
|
||||
result = store.select_as_multiple(["df1", "df2"])
|
||||
expected = df.dropna()
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
tm.assert_index_equal(store.select("df1").index, store.select("df2").index)
|
||||
|
||||
|
||||
def test_append_to_multiple_dropna_false(setup_path):
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df2 = df1.copy().rename(columns="{}_2".format)
|
||||
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
with ensure_clean_store(setup_path) as store, pd.option_context(
|
||||
"io.hdf.dropna_table", True
|
||||
):
|
||||
# dropna=False shouldn't synchronize row indexes
|
||||
store.append_to_multiple(
|
||||
{"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False
|
||||
)
|
||||
|
||||
msg = "all tables must have exactly the same nrows!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.select_as_multiple(["df1a", "df2a"])
|
||||
|
||||
assert not store.select("df1a").index.equals(store.select("df2a").index)
|
||||
|
||||
|
||||
def test_append_to_multiple_min_itemsize(setup_path):
|
||||
# GH 11238
|
||||
df = DataFrame(
|
||||
{
|
||||
"IX": np.arange(1, 21),
|
||||
"Num": np.arange(1, 21),
|
||||
"BigNum": np.arange(1, 21) * 88,
|
||||
"Str": ["a" for _ in range(20)],
|
||||
"LongStr": ["abcde" for _ in range(20)],
|
||||
}
|
||||
)
|
||||
expected = df.iloc[[0]]
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append_to_multiple(
|
||||
{
|
||||
"index": ["IX"],
|
||||
"nums": ["Num", "BigNum"],
|
||||
"strs": ["Str", "LongStr"],
|
||||
},
|
||||
df.iloc[[0]],
|
||||
"index",
|
||||
min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},
|
||||
)
|
||||
result = store.select_as_multiple(["index", "nums", "strs"])
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
@ -0,0 +1,214 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_categorical(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# Basic
|
||||
_maybe_remove(store, "s")
|
||||
s = Series(
|
||||
Categorical(
|
||||
["a", "b", "b", "a", "a", "c"],
|
||||
categories=["a", "b", "c", "d"],
|
||||
ordered=False,
|
||||
)
|
||||
)
|
||||
store.append("s", s, format="table")
|
||||
result = store.select("s")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
_maybe_remove(store, "s_ordered")
|
||||
s = Series(
|
||||
Categorical(
|
||||
["a", "b", "b", "a", "a", "c"],
|
||||
categories=["a", "b", "c", "d"],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
store.append("s_ordered", s, format="table")
|
||||
result = store.select("s_ordered")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
store.append("df", df, format="table")
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# Dtypes
|
||||
_maybe_remove(store, "si")
|
||||
s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
|
||||
store.append("si", s)
|
||||
result = store.select("si")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
_maybe_remove(store, "si2")
|
||||
s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
|
||||
store.append("si2", s)
|
||||
result = store.select("si2")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# Multiple
|
||||
_maybe_remove(store, "df2")
|
||||
df2 = df.copy()
|
||||
df2["s2"] = Series(list("abcdefg")).astype("category")
|
||||
store.append("df2", df2)
|
||||
result = store.select("df2")
|
||||
tm.assert_frame_equal(result, df2)
|
||||
|
||||
# Make sure the metadata is OK
|
||||
info = store.info()
|
||||
assert "/df2 " in info
|
||||
# df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical
|
||||
assert "/df2/meta/values_block_0/meta" in info
|
||||
assert "/df2/meta/values_block_2/meta" in info
|
||||
|
||||
# unordered
|
||||
_maybe_remove(store, "s2")
|
||||
s = Series(
|
||||
Categorical(
|
||||
["a", "b", "b", "a", "a", "c"],
|
||||
categories=["a", "b", "c", "d"],
|
||||
ordered=False,
|
||||
)
|
||||
)
|
||||
store.append("s2", s, format="table")
|
||||
result = store.select("s2")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# Query
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("df3", df, data_columns=["s"])
|
||||
expected = df[df.s.isin(["b", "c"])]
|
||||
result = store.select("df3", where=['s in ["b","c"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df[df.s.isin(["b", "c"])]
|
||||
result = store.select("df3", where=['s = ["b","c"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df[df.s.isin(["d"])]
|
||||
result = store.select("df3", where=['s in ["d"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df[df.s.isin(["f"])]
|
||||
result = store.select("df3", where=['s in ["f"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Appending with same categories is ok
|
||||
store.append("df3", df)
|
||||
|
||||
df = concat([df, df])
|
||||
expected = df[df.s.isin(["b", "c"])]
|
||||
result = store.select("df3", where=['s in ["b","c"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Appending must have the same categories
|
||||
df3 = df.copy()
|
||||
df3["s"] = df3["s"].cat.remove_unused_categories()
|
||||
|
||||
msg = "cannot append a categorical with different categories to the existing"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df3", df3)
|
||||
|
||||
# Remove, and make sure meta data is removed (its a recursive
|
||||
# removal so should be).
|
||||
result = store.select("df3/meta/s/meta")
|
||||
assert result is not None
|
||||
store.remove("df3")
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'No object named df3/meta/s/meta in the file'"
|
||||
):
|
||||
store.select("df3/meta/s/meta")
|
||||
|
||||
|
||||
def test_categorical_conversion(tmp_path, setup_path):
|
||||
# GH13322
|
||||
# Check that read_hdf with categorical columns doesn't return rows if
|
||||
# where criteria isn't met.
|
||||
obsids = ["ESP_012345_6789", "ESP_987654_3210"]
|
||||
imgids = ["APF00006np", "APF0001imm"]
|
||||
data = [4.3, 9.8]
|
||||
|
||||
# Test without categories
|
||||
df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})
|
||||
|
||||
# We are expecting an empty DataFrame matching types of df
|
||||
expected = df.iloc[[], :]
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", data_columns=True)
|
||||
result = read_hdf(path, "df", where="obsids=B")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Test with categories
|
||||
df.obsids = df.obsids.astype("category")
|
||||
df.imgids = df.imgids.astype("category")
|
||||
|
||||
# We are expecting an empty DataFrame matching types of df
|
||||
expected = df.iloc[[], :]
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", data_columns=True)
|
||||
result = read_hdf(path, "df", where="obsids=B")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_categorical_nan_only_columns(tmp_path, setup_path):
|
||||
# GH18413
|
||||
# Check that read_hdf with categorical columns with NaN-only values can
|
||||
# be read back.
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["a", "b", "c", np.nan],
|
||||
"b": [np.nan, np.nan, np.nan, np.nan],
|
||||
"c": [1, 2, 3, 4],
|
||||
"d": Series([None] * 4, dtype=object),
|
||||
}
|
||||
)
|
||||
df["a"] = df.a.astype("category")
|
||||
df["b"] = df.b.astype("category")
|
||||
df["d"] = df.b.astype("category")
|
||||
expected = df
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", data_columns=True)
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"where, df, expected",
|
||||
[
|
||||
('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})),
|
||||
('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})),
|
||||
],
|
||||
)
|
||||
def test_convert_value(
|
||||
tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame
|
||||
):
|
||||
# GH39420
|
||||
# Check that read_hdf with categorical columns can filter by where condition.
|
||||
df.col = df.col.astype("category")
|
||||
max_widths = {"col": 1}
|
||||
categorical_values = sorted(df.col.unique())
|
||||
expected.col = expected.col.astype("category")
|
||||
expected.col = expected.col.cat.set_categories(categorical_values)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", min_itemsize=max_widths)
|
||||
result = read_hdf(path, where=where)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,75 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
tables = pytest.importorskip("tables")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pytables_hdf5_file(tmp_path):
|
||||
"""
|
||||
Use PyTables to create a simple HDF5 file.
|
||||
"""
|
||||
table_schema = {
|
||||
"c0": tables.Time64Col(pos=0),
|
||||
"c1": tables.StringCol(5, pos=1),
|
||||
"c2": tables.Int64Col(pos=2),
|
||||
}
|
||||
|
||||
t0 = 1_561_105_000.0
|
||||
|
||||
testsamples = [
|
||||
{"c0": t0, "c1": "aaaaa", "c2": 1},
|
||||
{"c0": t0 + 1, "c1": "bbbbb", "c2": 2},
|
||||
{"c0": t0 + 2, "c1": "ccccc", "c2": 10**5},
|
||||
{"c0": t0 + 3, "c1": "ddddd", "c2": 4_294_967_295},
|
||||
]
|
||||
|
||||
objname = "pandas_test_timeseries"
|
||||
|
||||
path = tmp_path / "written_with_pytables.h5"
|
||||
with tables.open_file(path, mode="w") as f:
|
||||
t = f.create_table("/", name=objname, description=table_schema)
|
||||
for sample in testsamples:
|
||||
for key, value in sample.items():
|
||||
t.row[key] = value
|
||||
t.row.append()
|
||||
|
||||
yield path, objname, pd.DataFrame(testsamples)
|
||||
|
||||
|
||||
class TestReadPyTablesHDF5:
|
||||
"""
|
||||
A group of tests which covers reading HDF5 files written by plain PyTables
|
||||
(not written by pandas).
|
||||
|
||||
Was introduced for regression-testing issue 11188.
|
||||
"""
|
||||
|
||||
def test_read_complete(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
result = pd.read_hdf(path, key=objname)
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_read_with_start(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
# This is a regression test for pandas-dev/pandas/issues/11188
|
||||
result = pd.read_hdf(path, key=objname, start=1)
|
||||
expected = df[1:].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_read_with_stop(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
# This is a regression test for pandas-dev/pandas/issues/11188
|
||||
result = pd.read_hdf(path, key=objname, stop=1)
|
||||
expected = df[:1].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_read_with_startstop(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
# This is a regression test for pandas-dev/pandas/issues/11188
|
||||
result = pd.read_hdf(path, key=objname, start=1, stop=2)
|
||||
expected = df[1:2].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
@ -0,0 +1,195 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.io.pytables.common import ensure_clean_store
|
||||
|
||||
from pandas.io.pytables import read_hdf
|
||||
|
||||
|
||||
def test_complex_fixed(tmp_path, setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex64),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex128),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_table(tmp_path, setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex64),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
reread = read_hdf(path, key="df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex128),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", mode="w")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_mixed_fixed(tmp_path, setup_path):
|
||||
complex64 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
|
||||
)
|
||||
complex128 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
|
||||
)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 2, 3, 4],
|
||||
"B": ["a", "b", "c", "d"],
|
||||
"C": complex64,
|
||||
"D": complex128,
|
||||
"E": [1.0, 2.0, 3.0, 4.0],
|
||||
},
|
||||
index=list("abcd"),
|
||||
)
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_mixed_table(tmp_path, setup_path):
|
||||
complex64 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
|
||||
)
|
||||
complex128 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
|
||||
)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 2, 3, 4],
|
||||
"B": ["a", "b", "c", "d"],
|
||||
"C": complex64,
|
||||
"D": complex128,
|
||||
"E": [1.0, 2.0, 3.0, 4.0],
|
||||
},
|
||||
index=list("abcd"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df", df, data_columns=["A", "B"])
|
||||
result = store.select("df", where="A>2")
|
||||
tm.assert_frame_equal(df.loc[df.A > 2], result)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_across_dimensions_fixed(tmp_path, setup_path):
|
||||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
||||
s = Series(complex128, index=list("abcd"))
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
objs = [s, df]
|
||||
comps = [tm.assert_series_equal, tm.assert_frame_equal]
|
||||
for obj, comp in zip(objs, comps):
|
||||
path = tmp_path / setup_path
|
||||
obj.to_hdf(path, key="obj", format="fixed")
|
||||
reread = read_hdf(path, "obj")
|
||||
comp(obj, reread)
|
||||
|
||||
|
||||
def test_complex_across_dimensions(tmp_path, setup_path):
|
||||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
||||
s = Series(complex128, index=list("abcd"))
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="obj", format="table")
|
||||
reread = read_hdf(path, "obj")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_indexing_error(setup_path):
|
||||
complex128 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
|
||||
)
|
||||
df = DataFrame(
|
||||
{"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},
|
||||
index=list("abcd"),
|
||||
)
|
||||
|
||||
msg = (
|
||||
"Columns containing complex values can be stored "
|
||||
"but cannot be indexed when using table format. "
|
||||
"Either use fixed format, set index=False, "
|
||||
"or do not include the columns containing complex "
|
||||
"values to data_columns when initializing the table."
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df, data_columns=["C"])
|
||||
|
||||
|
||||
def test_complex_series_error(tmp_path, setup_path):
|
||||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
||||
s = Series(complex128, index=list("abcd"))
|
||||
|
||||
msg = (
|
||||
"Columns containing complex values can be stored "
|
||||
"but cannot be indexed when using table format. "
|
||||
"Either use fixed format, set index=False, "
|
||||
"or do not include the columns containing complex "
|
||||
"values to data_columns when initializing the table."
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.to_hdf(path, key="obj", format="t")
|
||||
|
||||
path = tmp_path / setup_path
|
||||
s.to_hdf(path, key="obj", format="t", index=False)
|
||||
reread = read_hdf(path, "obj")
|
||||
tm.assert_series_equal(s, reread)
|
||||
|
||||
|
||||
def test_complex_append(setup_path):
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.random.default_rng(2).standard_normal(100).astype(np.complex128),
|
||||
"b": np.random.default_rng(2).standard_normal(100),
|
||||
}
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df", df, data_columns=["b"])
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(pd.concat([df, df], axis=0), result)
|
@ -0,0 +1,251 @@
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
MultiIndex,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import ensure_clean_store
|
||||
|
||||
from pandas.io.pytables import (
|
||||
Term,
|
||||
_maybe_adjust_name,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_pass_spec_to_storer(setup_path):
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df)
|
||||
msg = (
|
||||
"cannot pass a column specification when reading a Fixed format "
|
||||
"store. this store must be selected in its entirety"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select("df", columns=["A"])
|
||||
msg = (
|
||||
"cannot pass a where specification when reading from a Fixed "
|
||||
"format store. this store must be selected in its entirety"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select("df", where=[("columns=A")])
|
||||
|
||||
|
||||
def test_table_index_incompatible_dtypes(setup_path):
|
||||
df1 = DataFrame({"a": [1, 2, 3]})
|
||||
df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("frame", df1, format="table")
|
||||
msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.put("frame", df2, format="table", append=True)
|
||||
|
||||
|
||||
def test_unimplemented_dtypes_table_columns(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
dtypes = [("date", datetime.date(2001, 1, 2))]
|
||||
|
||||
# currently not supported dtypes ####
|
||||
for n, f in dtypes:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df[n] = f
|
||||
msg = re.escape(f"[{n}] is not implemented as a table column")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append(f"df1_{n}", df)
|
||||
|
||||
# frame
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["datetime1"] = datetime.date(2001, 1, 2)
|
||||
df = df._consolidate()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# this fails because we have a date in the object block......
|
||||
msg = re.escape(
|
||||
"""Cannot serialize the column [datetime1]
|
||||
because its data contents are not [string] but [date] object dtype"""
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df_unimplemented", df)
|
||||
|
||||
|
||||
def test_invalid_terms(tmp_path, setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["string"] = "foo"
|
||||
df.loc[df.index[0:4], "string"] = "bar"
|
||||
|
||||
store.put("df", df, format="table")
|
||||
|
||||
# some invalid terms
|
||||
msg = re.escape("__init__() missing 1 required positional argument: 'where'")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Term()
|
||||
|
||||
# more invalid
|
||||
msg = re.escape(
|
||||
"cannot process expression [df.index[3]], "
|
||||
"[2000-01-06 00:00:00] is not a valid condition"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.select("df", "df.index[3]")
|
||||
|
||||
msg = "invalid syntax"
|
||||
with pytest.raises(SyntaxError, match=msg):
|
||||
store.select("df", "index>")
|
||||
|
||||
# from the docs
|
||||
path = tmp_path / setup_path
|
||||
dfq = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=list("ABCD"),
|
||||
index=date_range("20130101", periods=10),
|
||||
)
|
||||
dfq.to_hdf(path, key="dfq", format="table", data_columns=True)
|
||||
|
||||
# check ok
|
||||
read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']")
|
||||
read_hdf(path, "dfq", where="A>0 or C>0")
|
||||
|
||||
# catch the invalid reference
|
||||
path = tmp_path / setup_path
|
||||
dfq = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=list("ABCD"),
|
||||
index=date_range("20130101", periods=10),
|
||||
)
|
||||
dfq.to_hdf(path, key="dfq", format="table")
|
||||
|
||||
msg = (
|
||||
r"The passed where expression: A>0 or C>0\n\s*"
|
||||
r"contains an invalid variable reference\n\s*"
|
||||
r"all of the variable references must be a reference to\n\s*"
|
||||
r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
|
||||
r"The currently defined references are: index,columns\n"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path, "dfq", where="A>0 or C>0")
|
||||
|
||||
|
||||
def test_append_with_diff_col_name_types_raises_value_error(setup_path):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)))
|
||||
df2 = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
|
||||
df3 = DataFrame({(1, 2): np.random.default_rng(2).standard_normal(10)})
|
||||
df4 = DataFrame({("1", 2): np.random.default_rng(2).standard_normal(10)})
|
||||
df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
name = "df_diff_valerror"
|
||||
store.append(name, df)
|
||||
|
||||
for d in (df2, df3, df4, df5):
|
||||
msg = re.escape(
|
||||
"cannot match existing table structure for [0] on appending data"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append(name, d)
|
||||
|
||||
|
||||
def test_invalid_complib(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
with tm.ensure_clean(setup_path) as path:
|
||||
msg = r"complib only supports \[.*\] compression."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df", complib="foolib")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
date_range("2019", freq="D", periods=3, tz="UTC"),
|
||||
CategoricalIndex(list("abc")),
|
||||
],
|
||||
)
|
||||
def test_to_hdf_multiindex_extension_dtype(idx, tmp_path, setup_path):
|
||||
# GH 7775
|
||||
mi = MultiIndex.from_arrays([idx, idx])
|
||||
df = DataFrame(0, index=mi, columns=["a"])
|
||||
path = tmp_path / setup_path
|
||||
with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
|
||||
df.to_hdf(path, key="df")
|
||||
|
||||
|
||||
def test_unsuppored_hdf_file_error(datapath):
|
||||
# GH 9539
|
||||
data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5")
|
||||
message = (
|
||||
r"Dataset\(s\) incompatible with Pandas data types, "
|
||||
"not table, or no datasets found in HDF5 file."
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
read_hdf(data_path)
|
||||
|
||||
|
||||
def test_read_hdf_errors(setup_path, tmp_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
msg = r"File [\S]* does not exist"
|
||||
with pytest.raises(OSError, match=msg):
|
||||
read_hdf(path, "key")
|
||||
|
||||
df.to_hdf(path, key="df")
|
||||
store = HDFStore(path, mode="r")
|
||||
store.close()
|
||||
|
||||
msg = "The HDFStore must be open for reading."
|
||||
with pytest.raises(OSError, match=msg):
|
||||
read_hdf(store, "df")
|
||||
|
||||
|
||||
def test_read_hdf_generic_buffer_errors():
|
||||
msg = "Support for generic buffers has not been implemented."
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
read_hdf(BytesIO(b""), "df")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"])
|
||||
def test_maybe_adjust_name_bad_version_raises(bad_version):
|
||||
msg = "Version is incorrect, expected sequence of 3 integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_maybe_adjust_name("values_block_0", version=bad_version)
|
@ -0,0 +1,495 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import (
|
||||
PY311,
|
||||
is_ci_environment,
|
||||
is_platform_linux,
|
||||
is_platform_little_endian,
|
||||
)
|
||||
from pandas.errors import (
|
||||
ClosedFileError,
|
||||
PossibleDataLossError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
tables,
|
||||
)
|
||||
|
||||
from pandas.io import pytables
|
||||
from pandas.io.pytables import Term
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
|
||||
def test_mode(setup_path, tmp_path, mode):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
msg = r"[\S]* does not exist"
|
||||
path = tmp_path / setup_path
|
||||
|
||||
# constructor
|
||||
if mode in ["r", "r+"]:
|
||||
with pytest.raises(OSError, match=msg):
|
||||
HDFStore(path, mode=mode)
|
||||
|
||||
else:
|
||||
with HDFStore(path, mode=mode) as store:
|
||||
assert store._handle.mode == mode
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
# context
|
||||
if mode in ["r", "r+"]:
|
||||
with pytest.raises(OSError, match=msg):
|
||||
with HDFStore(path, mode=mode) as store:
|
||||
pass
|
||||
else:
|
||||
with HDFStore(path, mode=mode) as store:
|
||||
assert store._handle.mode == mode
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
# conv write
|
||||
if mode in ["r", "r+"]:
|
||||
with pytest.raises(OSError, match=msg):
|
||||
df.to_hdf(path, key="df", mode=mode)
|
||||
df.to_hdf(path, key="df", mode="w")
|
||||
else:
|
||||
df.to_hdf(path, key="df", mode=mode)
|
||||
|
||||
# conv read
|
||||
if mode in ["w"]:
|
||||
msg = (
|
||||
"mode w is not allowed while performing a read. "
|
||||
r"Allowed modes are r, r\+ and a."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path, "df", mode=mode)
|
||||
else:
|
||||
result = read_hdf(path, "df", mode=mode)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_default_mode(tmp_path, setup_path):
|
||||
# read_hdf uses default mode
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w")
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_reopen_handle(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
store = HDFStore(path, mode="a")
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
|
||||
msg = (
|
||||
r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the "
|
||||
"current file!"
|
||||
)
|
||||
# invalid mode change
|
||||
with pytest.raises(PossibleDataLossError, match=msg):
|
||||
store.open("w")
|
||||
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
# truncation ok here
|
||||
store.open("w")
|
||||
assert store.is_open
|
||||
assert len(store) == 0
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
store = HDFStore(path, mode="a")
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
|
||||
# reopen as read
|
||||
store.open("r")
|
||||
assert store.is_open
|
||||
assert len(store) == 1
|
||||
assert store._mode == "r"
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
# reopen as append
|
||||
store.open("a")
|
||||
assert store.is_open
|
||||
assert len(store) == 1
|
||||
assert store._mode == "a"
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
# reopen as append (again)
|
||||
store.open("a")
|
||||
assert store.is_open
|
||||
assert len(store) == 1
|
||||
assert store._mode == "a"
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
|
||||
def test_open_args(setup_path):
|
||||
with tm.ensure_clean(setup_path) as path:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# create an in memory store
|
||||
store = HDFStore(
|
||||
path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0
|
||||
)
|
||||
store["df"] = df
|
||||
store.append("df2", df)
|
||||
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
tm.assert_frame_equal(store["df2"], df)
|
||||
|
||||
store.close()
|
||||
|
||||
# the file should not have actually been written
|
||||
assert not os.path.exists(path)
|
||||
|
||||
|
||||
def test_flush(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = Series(range(5))
|
||||
store.flush()
|
||||
store.flush(fsync=True)
|
||||
|
||||
|
||||
def test_complibs_default_settings(tmp_path, setup_path):
|
||||
# GH15943
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# Set complevel and check if complib is automatically set to
|
||||
# default value
|
||||
tmpfile = tmp_path / setup_path
|
||||
df.to_hdf(tmpfile, key="df", complevel=9)
|
||||
result = read_hdf(tmpfile, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 9
|
||||
assert node.filters.complib == "zlib"
|
||||
|
||||
# Set complib and check to see if compression is disabled
|
||||
tmpfile = tmp_path / setup_path
|
||||
df.to_hdf(tmpfile, key="df", complib="zlib")
|
||||
result = read_hdf(tmpfile, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 0
|
||||
assert node.filters.complib is None
|
||||
|
||||
# Check if not setting complib or complevel results in no compression
|
||||
tmpfile = tmp_path / setup_path
|
||||
df.to_hdf(tmpfile, key="df")
|
||||
result = read_hdf(tmpfile, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 0
|
||||
assert node.filters.complib is None
|
||||
|
||||
|
||||
def test_complibs_default_settings_override(tmp_path, setup_path):
|
||||
# Check if file-defaults can be overridden on a per table basis
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
tmpfile = tmp_path / setup_path
|
||||
store = HDFStore(tmpfile)
|
||||
store.append("dfc", df, complevel=9, complib="blosc")
|
||||
store.append("df", df)
|
||||
store.close()
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 0
|
||||
assert node.filters.complib is None
|
||||
for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):
|
||||
assert node.filters.complevel == 9
|
||||
assert node.filters.complib == "blosc"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lvl", range(10))
|
||||
@pytest.mark.parametrize("lib", tables.filters.all_complibs)
|
||||
@pytest.mark.filterwarnings("ignore:object name is not a valid")
|
||||
@pytest.mark.skipif(
|
||||
not PY311 and is_ci_environment() and is_platform_linux(),
|
||||
reason="Segfaulting in a CI environment"
|
||||
# with xfail, would sometimes raise UnicodeDecodeError
|
||||
# invalid state byte
|
||||
)
|
||||
def test_complibs(tmp_path, lvl, lib, request):
|
||||
# GH14478
|
||||
if PY311 and is_platform_linux() and lib == "blosc2" and lvl != 0:
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11")
|
||||
)
|
||||
df = DataFrame(
|
||||
np.ones((30, 4)), columns=list("ABCD"), index=np.arange(30).astype(np.str_)
|
||||
)
|
||||
|
||||
# Remove lzo if its not available on this platform
|
||||
if not tables.which_lib_version("lzo"):
|
||||
pytest.skip("lzo not available")
|
||||
# Remove bzip2 if its not available on this platform
|
||||
if not tables.which_lib_version("bzip2"):
|
||||
pytest.skip("bzip2 not available")
|
||||
|
||||
tmpfile = tmp_path / f"{lvl}_{lib}.h5"
|
||||
gname = f"{lvl}_{lib}"
|
||||
|
||||
# Write and read file to see if data is consistent
|
||||
df.to_hdf(tmpfile, key=gname, complib=lib, complevel=lvl)
|
||||
result = read_hdf(tmpfile, gname)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# Open file and check metadata for correct amount of compression
|
||||
with tables.open_file(tmpfile, mode="r") as h5table:
|
||||
for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
|
||||
assert node.filters.complevel == lvl
|
||||
if lvl == 0:
|
||||
assert node.filters.complib is None
|
||||
else:
|
||||
assert node.filters.complib == lib
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not is_platform_little_endian(), reason="reason platform is not little endian"
|
||||
)
|
||||
def test_encoding(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame({"A": "foo", "B": "bar"}, index=range(5))
|
||||
df.loc[2, "A"] = np.nan
|
||||
df.loc[3, "B"] = np.nan
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, encoding="ascii")
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
expected = df.reindex(columns=["A"])
|
||||
result = store.select("df", Term("columns=A", encoding="ascii"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val",
|
||||
[
|
||||
[b"E\xc9, 17", b"", b"a", b"b", b"c"],
|
||||
[b"E\xc9, 17", b"a", b"b", b"c"],
|
||||
[b"EE, 17", b"", b"a", b"b", b"c"],
|
||||
[b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
|
||||
[b"", b"a", b"b", b"c"],
|
||||
[b"\xf8\xfc", b"a", b"b", b"c"],
|
||||
[b"A\xf8\xfc", b"", b"a", b"b", b"c"],
|
||||
[np.nan, b"", b"b", b"c"],
|
||||
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", ["category", object])
|
||||
def test_latin_encoding(tmp_path, setup_path, dtype, val):
|
||||
enc = "latin-1"
|
||||
nan_rep = ""
|
||||
key = "data"
|
||||
|
||||
val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]
|
||||
ser = Series(val, dtype=dtype)
|
||||
|
||||
store = tmp_path / setup_path
|
||||
ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep)
|
||||
retr = read_hdf(store, key)
|
||||
|
||||
# TODO:(3.0): once Categorical replace deprecation is enforced,
|
||||
# we may be able to re-simplify the construction of s_nan
|
||||
if dtype == "category":
|
||||
if nan_rep in ser.cat.categories:
|
||||
s_nan = ser.cat.remove_categories([nan_rep])
|
||||
else:
|
||||
s_nan = ser
|
||||
else:
|
||||
s_nan = ser.replace(nan_rep, np.nan)
|
||||
|
||||
tm.assert_series_equal(s_nan, retr)
|
||||
|
||||
|
||||
def test_multiple_open_close(tmp_path, setup_path):
|
||||
# gh-4409: open & close multiple times
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_hdf(path, key="df", mode="w", format="table")
|
||||
|
||||
# single
|
||||
store = HDFStore(path)
|
||||
assert "CLOSED" not in store.info()
|
||||
assert store.is_open
|
||||
|
||||
store.close()
|
||||
assert "CLOSED" in store.info()
|
||||
assert not store.is_open
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
if pytables._table_file_open_policy_is_strict:
|
||||
# multiples
|
||||
store1 = HDFStore(path)
|
||||
msg = (
|
||||
r"The file [\S]* is already opened\. Please close it before "
|
||||
r"reopening in write mode\."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
HDFStore(path)
|
||||
|
||||
store1.close()
|
||||
else:
|
||||
# multiples
|
||||
store1 = HDFStore(path)
|
||||
store2 = HDFStore(path)
|
||||
|
||||
assert "CLOSED" not in store1.info()
|
||||
assert "CLOSED" not in store2.info()
|
||||
assert store1.is_open
|
||||
assert store2.is_open
|
||||
|
||||
store1.close()
|
||||
assert "CLOSED" in store1.info()
|
||||
assert not store1.is_open
|
||||
assert "CLOSED" not in store2.info()
|
||||
assert store2.is_open
|
||||
|
||||
store2.close()
|
||||
assert "CLOSED" in store1.info()
|
||||
assert "CLOSED" in store2.info()
|
||||
assert not store1.is_open
|
||||
assert not store2.is_open
|
||||
|
||||
# nested close
|
||||
store = HDFStore(path, mode="w")
|
||||
store.append("df", df)
|
||||
|
||||
store2 = HDFStore(path)
|
||||
store2.append("df2", df)
|
||||
store2.close()
|
||||
assert "CLOSED" in store2.info()
|
||||
assert not store2.is_open
|
||||
|
||||
store.close()
|
||||
assert "CLOSED" in store.info()
|
||||
assert not store.is_open
|
||||
|
||||
# double closing
|
||||
store = HDFStore(path, mode="w")
|
||||
store.append("df", df)
|
||||
|
||||
store2 = HDFStore(path)
|
||||
store.close()
|
||||
assert "CLOSED" in store.info()
|
||||
assert not store.is_open
|
||||
|
||||
store2.close()
|
||||
assert "CLOSED" in store2.info()
|
||||
assert not store2.is_open
|
||||
|
||||
# ops on a closed store
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_hdf(path, key="df", mode="w", format="table")
|
||||
|
||||
store = HDFStore(path)
|
||||
store.close()
|
||||
|
||||
msg = r"[\S]* file is not open!"
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.keys()
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
"df" in store
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
len(store)
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store["df"]
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.select("df")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.get("df")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.append("df2", df)
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.put("df3", df)
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.get_storer("df2")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.remove("df2")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.select("df")
|
||||
|
||||
msg = "'HDFStore' object has no attribute 'df'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
store.df
|
||||
|
||||
|
||||
def test_fspath():
|
||||
with tm.ensure_clean("foo.h5") as path:
|
||||
with HDFStore(path) as store:
|
||||
assert os.fspath(store) == str(path)
|
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
ensure_clean_store,
|
||||
tables,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_keys(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
store["b"] = Series(
|
||||
range(10), dtype="float64", index=[f"i_{i}" for i in range(10)]
|
||||
)
|
||||
store["c"] = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
assert len(store) == 3
|
||||
expected = {"/a", "/b", "/c"}
|
||||
assert set(store.keys()) == expected
|
||||
assert set(store) == expected
|
||||
|
||||
|
||||
def test_non_pandas_keys(tmp_path, setup_path):
|
||||
class Table1(tables.IsDescription):
|
||||
value1 = tables.Float32Col()
|
||||
|
||||
class Table2(tables.IsDescription):
|
||||
value2 = tables.Float32Col()
|
||||
|
||||
class Table3(tables.IsDescription):
|
||||
value3 = tables.Float32Col()
|
||||
|
||||
path = tmp_path / setup_path
|
||||
with tables.open_file(path, mode="w") as h5file:
|
||||
group = h5file.create_group("/", "group")
|
||||
h5file.create_table(group, "table1", Table1, "Table 1")
|
||||
h5file.create_table(group, "table2", Table2, "Table 2")
|
||||
h5file.create_table(group, "table3", Table3, "Table 3")
|
||||
with HDFStore(path) as store:
|
||||
assert len(store.keys(include="native")) == 3
|
||||
expected = {"/group/table1", "/group/table2", "/group/table3"}
|
||||
assert set(store.keys(include="native")) == expected
|
||||
assert set(store.keys(include="pandas")) == set()
|
||||
for name in expected:
|
||||
df = store.get(name)
|
||||
assert len(df.columns) == 1
|
||||
|
||||
|
||||
def test_keys_illegal_include_keyword_value(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="`include` should be either 'pandas' or 'native' but is 'illegal'",
|
||||
):
|
||||
store.keys(include="illegal")
|
||||
|
||||
|
||||
def test_keys_ignore_hdf_softlink(setup_path):
|
||||
# GH 20523
|
||||
# Puts a softlink into HDF file and rereads
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame({"A": range(5), "B": range(5)})
|
||||
store.put("df", df)
|
||||
|
||||
assert store.keys() == ["/df"]
|
||||
|
||||
store._handle.create_soft_link(store._handle.root, "symlink", "df")
|
||||
|
||||
# Should ignore the softlink
|
||||
assert store.keys() == ["/df"]
|
@ -0,0 +1,374 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_format_type(tmp_path, setup_path):
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
with HDFStore(tmp_path / setup_path) as store:
|
||||
store.put("a", df, format="fixed")
|
||||
store.put("b", df, format="table")
|
||||
|
||||
assert store.get_storer("a").format_type == "fixed"
|
||||
assert store.get_storer("b").format_type == "table"
|
||||
|
||||
|
||||
def test_format_kwarg_in_constructor(tmp_path, setup_path):
|
||||
# GH 13291
|
||||
|
||||
msg = "format is not a defined argument for HDFStore"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
HDFStore(tmp_path / setup_path, format="table")
|
||||
|
||||
|
||||
def test_api_default_format(tmp_path, setup_path):
|
||||
# default_format option
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "fixed"):
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
assert not store.get_storer("df").is_table
|
||||
|
||||
msg = "Can only append to Tables"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df2", df)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "table"):
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
assert store.get_storer("df").is_table
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df)
|
||||
assert store.get_storer("df").is_table
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "fixed"):
|
||||
df.to_hdf(path, key="df")
|
||||
with HDFStore(path) as store:
|
||||
assert not store.get_storer("df").is_table
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df2", append=True)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "table"):
|
||||
df.to_hdf(path, key="df3")
|
||||
with HDFStore(path) as store:
|
||||
assert store.get_storer("df3").is_table
|
||||
df.to_hdf(path, key="df4", append=True)
|
||||
with HDFStore(path) as store:
|
||||
assert store.get_storer("df4").is_table
|
||||
|
||||
|
||||
def test_put(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=20, freq="B"),
|
||||
)
|
||||
store["a"] = ts
|
||||
store["b"] = df[:10]
|
||||
store["foo/bar/bah"] = df[:10]
|
||||
store["foo"] = df[:10]
|
||||
store["/foo"] = df[:10]
|
||||
store.put("c", df[:10], format="table")
|
||||
|
||||
# not OK, not a table
|
||||
msg = "Can only append to Tables"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df[10:], append=True)
|
||||
|
||||
# node does not currently exist, test _is_table_type returns False
|
||||
# in this case
|
||||
_maybe_remove(store, "f")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("f", df[10:], append=True)
|
||||
|
||||
# can't put to a table (use append instead)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("c", df[10:], append=True)
|
||||
|
||||
# overwrite table
|
||||
store.put("c", df[:10], format="table", append=False)
|
||||
tm.assert_frame_equal(df[:10], store["c"])
|
||||
|
||||
|
||||
def test_put_string_index(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
index = Index([f"I am a very long string index: {i}" for i in range(20)])
|
||||
s = Series(np.arange(20), index=index)
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
store["a"] = s
|
||||
tm.assert_series_equal(store["a"], s)
|
||||
|
||||
store["b"] = df
|
||||
tm.assert_frame_equal(store["b"], df)
|
||||
|
||||
# mixed length
|
||||
index = Index(
|
||||
["abcdefghijklmnopqrstuvwxyz1234567890"]
|
||||
+ [f"I am a very long string index: {i}" for i in range(20)]
|
||||
)
|
||||
s = Series(np.arange(21), index=index)
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
store["a"] = s
|
||||
tm.assert_series_equal(store["a"], s)
|
||||
|
||||
store["b"] = df
|
||||
tm.assert_frame_equal(store["b"], df)
|
||||
|
||||
|
||||
def test_put_compression(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
store.put("c", df, format="table", complib="zlib")
|
||||
tm.assert_frame_equal(store["c"], df)
|
||||
|
||||
# can't compress if format='fixed'
|
||||
msg = "Compression not supported on Fixed format stores"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df, format="fixed", complib="zlib")
|
||||
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_put_compression_blosc(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# can't compress if format='fixed'
|
||||
msg = "Compression not supported on Fixed format stores"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df, format="fixed", complib="blosc")
|
||||
|
||||
store.put("c", df, format="table", complib="blosc")
|
||||
tm.assert_frame_equal(store["c"], df)
|
||||
|
||||
|
||||
def test_put_mixed_type(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["bool1"] = df["A"] > 0
|
||||
df["bool2"] = df["B"] > 0
|
||||
df["bool3"] = True
|
||||
df["int1"] = 1
|
||||
df["int2"] = 2
|
||||
df["timestamp1"] = Timestamp("20010102").as_unit("ns")
|
||||
df["timestamp2"] = Timestamp("20010103").as_unit("ns")
|
||||
df["datetime1"] = Timestamp("20010102").as_unit("ns")
|
||||
df["datetime2"] = Timestamp("20010103").as_unit("ns")
|
||||
df.loc[df.index[3:6], ["obj1"]] = np.nan
|
||||
df = df._consolidate()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
|
||||
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
|
||||
store.put("df", df)
|
||||
|
||||
expected = store.get("df")
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["table", "fixed"])
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index([str(i) for i in range(10)]),
|
||||
Index(np.arange(10, dtype=float)),
|
||||
Index(np.arange(10)),
|
||||
date_range("2020-01-01", periods=10),
|
||||
pd.period_range("2020-01-01", periods=10),
|
||||
],
|
||||
)
|
||||
def test_store_index_types(setup_path, format, index):
|
||||
# GH5386
|
||||
# test storing various index types
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)),
|
||||
columns=list("AB"),
|
||||
index=index,
|
||||
)
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df, format=format)
|
||||
tm.assert_frame_equal(df, store["df"])
|
||||
|
||||
|
||||
def test_column_multiindex(setup_path):
|
||||
# GH 4710
|
||||
# recreate multi-indexes properly
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]
|
||||
)
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
|
||||
expected = df.set_axis(df.index.to_numpy())
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df)
|
||||
tm.assert_frame_equal(
|
||||
store["df"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
store.put("df1", df, format="table")
|
||||
tm.assert_frame_equal(
|
||||
store["df1"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("df2", df, format="table", data_columns=["A"])
|
||||
msg = re.escape("cannot use a multi-index on axis [1] with data_columns True")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("df3", df, format="table", data_columns=True)
|
||||
|
||||
# appending multi-column on existing table (see GH 6167)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df2", df)
|
||||
store.append("df2", df)
|
||||
|
||||
tm.assert_frame_equal(store["df2"], concat((df, df)))
|
||||
|
||||
# non_index_axes name
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo"))
|
||||
expected = df.set_axis(df.index.to_numpy())
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df1", df, format="table")
|
||||
tm.assert_frame_equal(
|
||||
store["df1"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
|
||||
def test_store_multiindex(setup_path):
|
||||
# validate multi-index names
|
||||
# GH 5527
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
|
||||
def make_index(names=None):
|
||||
dti = date_range("2013-12-01", "2013-12-02")
|
||||
mi = MultiIndex.from_product([dti, range(2), range(3)], names=names)
|
||||
return mi
|
||||
|
||||
# no names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# partial names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", None, None]),
|
||||
)
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# series
|
||||
_maybe_remove(store, "ser")
|
||||
ser = Series(np.zeros(12), index=make_index(["date", None, None]))
|
||||
store.append("ser", ser)
|
||||
xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))
|
||||
tm.assert_series_equal(store.select("ser"), xp)
|
||||
|
||||
# dup with column
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "a", "t"]),
|
||||
)
|
||||
msg = "duplicate names/columns in the multi-index when storing as a table"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# dup within level
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "date", "date"]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# fully names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "s", "t"]),
|
||||
)
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["fixed", "table"])
|
||||
def test_store_periodindex(tmp_path, setup_path, format):
|
||||
# GH 7796
|
||||
# test of PeriodIndex in HDFStore
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 1)),
|
||||
index=pd.period_range("20220101", freq="M", periods=5),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format=format)
|
||||
expected = pd.read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, expected)
|
@ -0,0 +1,14 @@
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@td.skip_if_installed("tables")
|
||||
def test_pytables_raises():
|
||||
df = pd.DataFrame({"A": [1, 2]})
|
||||
with pytest.raises(ImportError, match="tables"):
|
||||
with tm.ensure_clean("foo.h5") as path:
|
||||
df.to_hdf(path, key="df")
|
@ -0,0 +1,412 @@
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
from pandas.io.pytables import TableIterator
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_read_missing_key_close_store(tmp_path, setup_path):
|
||||
# GH 25766
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame({"a": range(2), "b": range(2)})
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
|
||||
read_hdf(path, "k2")
|
||||
|
||||
# smoke test to test that file is properly closed after
|
||||
# read with KeyError before another write
|
||||
df.to_hdf(path, key="k2")
|
||||
|
||||
|
||||
def test_read_index_error_close_store(tmp_path, setup_path):
|
||||
# GH 25766
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame({"A": [], "B": []}, index=[])
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
with pytest.raises(IndexError, match=r"list index out of range"):
|
||||
read_hdf(path, "k1", stop=0)
|
||||
|
||||
# smoke test to test that file is properly closed after
|
||||
# read with IndexError before another write
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
|
||||
def test_read_missing_key_opened_store(tmp_path, setup_path):
|
||||
# GH 28699
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame({"a": range(2), "b": range(2)})
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
with HDFStore(path, "r") as store:
|
||||
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
|
||||
read_hdf(store, "k2")
|
||||
|
||||
# Test that the file is still open after a KeyError and that we can
|
||||
# still read from it.
|
||||
read_hdf(store, "k1")
|
||||
|
||||
|
||||
def test_read_column(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
|
||||
# GH 17912
|
||||
# HDFStore.select_column should raise a KeyError
|
||||
# exception if the key is not a valid store
|
||||
with pytest.raises(KeyError, match="No object named df in the file"):
|
||||
store.select_column("df", "index")
|
||||
|
||||
store.append("df", df)
|
||||
# error
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("'column [foo] not found in the table'")
|
||||
):
|
||||
store.select_column("df", "foo")
|
||||
|
||||
msg = re.escape("select_column() got an unexpected keyword argument 'where'")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select_column("df", "index", where=["index>5"])
|
||||
|
||||
# valid
|
||||
result = store.select_column("df", "index")
|
||||
tm.assert_almost_equal(result.values, Series(df.index).values)
|
||||
assert isinstance(result, Series)
|
||||
|
||||
# not a data indexable column
|
||||
msg = re.escape(
|
||||
"column [values_block_0] can not be extracted individually; "
|
||||
"it is not data indexable"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.select_column("df", "values_block_0")
|
||||
|
||||
# a data column
|
||||
df2 = df.copy()
|
||||
df2["string"] = "foo"
|
||||
store.append("df2", df2, data_columns=["string"])
|
||||
result = store.select_column("df2", "string")
|
||||
tm.assert_almost_equal(result.values, df2["string"].values)
|
||||
|
||||
# a data column with NaNs, result excludes the NaNs
|
||||
df3 = df.copy()
|
||||
df3["string"] = "foo"
|
||||
df3.loc[df3.index[4:6], "string"] = np.nan
|
||||
store.append("df3", df3, data_columns=["string"])
|
||||
result = store.select_column("df3", "string")
|
||||
tm.assert_almost_equal(result.values, df3["string"].values)
|
||||
|
||||
# start/stop
|
||||
result = store.select_column("df3", "string", start=2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[2:])
|
||||
|
||||
result = store.select_column("df3", "string", start=-2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[-2:])
|
||||
|
||||
result = store.select_column("df3", "string", stop=2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[:2])
|
||||
|
||||
result = store.select_column("df3", "string", stop=-2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[:-2])
|
||||
|
||||
result = store.select_column("df3", "string", start=2, stop=-2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[2:-2])
|
||||
|
||||
result = store.select_column("df3", "string", start=-2, stop=2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[-2:2])
|
||||
|
||||
# GH 10392 - make sure column name is preserved
|
||||
df4 = DataFrame({"A": np.random.default_rng(2).standard_normal(10), "B": "foo"})
|
||||
store.append("df4", df4, data_columns=True)
|
||||
expected = df4["B"]
|
||||
result = store.select_column("df4", "B")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pytables_native_read(datapath):
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
|
||||
) as store:
|
||||
d2 = store["detector/readout"]
|
||||
assert isinstance(d2, DataFrame)
|
||||
|
||||
|
||||
@pytest.mark.skipif(is_platform_windows(), reason="native2 read fails oddly on windows")
|
||||
def test_pytables_native2_read(datapath):
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"
|
||||
) as store:
|
||||
str(store)
|
||||
d1 = store["detector"]
|
||||
assert isinstance(d1, DataFrame)
|
||||
|
||||
|
||||
def test_legacy_table_fixed_format_read_py2(datapath):
|
||||
# GH 24510
|
||||
# legacy table with fixed format written in Python 2
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"
|
||||
) as store:
|
||||
result = store.select("df")
|
||||
expected = DataFrame(
|
||||
[[1, 2, 3, "D"]],
|
||||
columns=["A", "B", "C", "D"],
|
||||
index=Index(["ABC"], name="INDEX_NAME"),
|
||||
)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_legacy_table_fixed_format_read_datetime_py2(datapath):
|
||||
# GH 31750
|
||||
# legacy table with fixed format and datetime64 column written in Python 2
|
||||
expected = DataFrame(
|
||||
[[Timestamp("2020-02-06T18:00")]],
|
||||
columns=["A"],
|
||||
index=Index(["date"]),
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),
|
||||
mode="r",
|
||||
) as store:
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_legacy_table_read_py2(datapath):
|
||||
# issue: 24925
|
||||
# legacy table written in Python 2
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"
|
||||
) as store:
|
||||
result = store.select("table")
|
||||
|
||||
expected = DataFrame({"a": ["a", "b"], "b": [2, 3]})
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_read_hdf_open_store(tmp_path, setup_path):
|
||||
# GH10330
|
||||
# No check for non-string path_or-buf, and no test of open store
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
df.index.name = "letters"
|
||||
df = df.set_index(keys="E", append=True)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w")
|
||||
direct = read_hdf(path, "df")
|
||||
with HDFStore(path, mode="r") as store:
|
||||
indirect = read_hdf(store, "df")
|
||||
tm.assert_frame_equal(direct, indirect)
|
||||
assert store.is_open
|
||||
|
||||
|
||||
def test_read_hdf_index_not_view(tmp_path, setup_path):
|
||||
# GH 37441
|
||||
# Ensure that the index of the DataFrame is not a view
|
||||
# into the original recarray that pytables reads in
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=[0, 1, 2, 3],
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format="table")
|
||||
|
||||
df2 = read_hdf(path, "df")
|
||||
assert df2.index._data.base is None
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
def test_read_hdf_iterator(tmp_path, setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
df.index.name = "letters"
|
||||
df = df.set_index(keys="E", append=True)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format="t")
|
||||
direct = read_hdf(path, "df")
|
||||
iterator = read_hdf(path, "df", iterator=True)
|
||||
with closing(iterator.store):
|
||||
assert isinstance(iterator, TableIterator)
|
||||
indirect = next(iterator.__iter__())
|
||||
tm.assert_frame_equal(direct, indirect)
|
||||
|
||||
|
||||
def test_read_nokey(tmp_path, setup_path):
|
||||
# GH10443
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
# Categorical dtype not supported for "fixed" format. So no need
|
||||
# to test with that dtype in the dataframe here.
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="a")
|
||||
reread = read_hdf(path)
|
||||
tm.assert_frame_equal(df, reread)
|
||||
df.to_hdf(path, key="df2", mode="a")
|
||||
|
||||
msg = "key must be provided when HDF5 file contains multiple datasets."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path)
|
||||
|
||||
|
||||
def test_read_nokey_table(tmp_path, setup_path):
|
||||
# GH13231
|
||||
df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="a", format="table")
|
||||
reread = read_hdf(path)
|
||||
tm.assert_frame_equal(df, reread)
|
||||
df.to_hdf(path, key="df2", mode="a", format="table")
|
||||
|
||||
msg = "key must be provided when HDF5 file contains multiple datasets."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path)
|
||||
|
||||
|
||||
def test_read_nokey_empty(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
store = HDFStore(path)
|
||||
store.close()
|
||||
msg = re.escape(
|
||||
"Dataset(s) incompatible with Pandas data types, not table, or no "
|
||||
"datasets found in HDF5 file."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path)
|
||||
|
||||
|
||||
def test_read_from_pathlib_path(tmp_path, setup_path):
|
||||
# GH11773
|
||||
expected = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
filename = tmp_path / setup_path
|
||||
path_obj = Path(filename)
|
||||
|
||||
expected.to_hdf(path_obj, key="df", mode="a")
|
||||
actual = read_hdf(path_obj, key="df")
|
||||
|
||||
tm.assert_frame_equal(expected, actual)
|
||||
|
||||
|
||||
@td.skip_if_no("py.path")
|
||||
def test_read_from_py_localpath(tmp_path, setup_path):
|
||||
# GH11773
|
||||
from py.path import local as LocalPath
|
||||
|
||||
expected = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
filename = tmp_path / setup_path
|
||||
path_obj = LocalPath(filename)
|
||||
|
||||
expected.to_hdf(path_obj, key="df", mode="a")
|
||||
actual = read_hdf(path_obj, key="df")
|
||||
|
||||
tm.assert_frame_equal(expected, actual)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["fixed", "table"])
|
||||
def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
|
||||
# GH 16583
|
||||
# Tests that reading a Series saved to an HDF file
|
||||
# still works if a mode='r' argument is supplied
|
||||
series = Series(range(10), dtype=np.float64)
|
||||
path = tmp_path / setup_path
|
||||
series.to_hdf(path, key="data", format=format)
|
||||
result = read_hdf(path, key="data", mode="r")
|
||||
tm.assert_series_equal(result, series)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_read_py2_hdf_file_in_py3(datapath):
|
||||
# GH 16781
|
||||
|
||||
# tests reading a PeriodIndex DataFrame written in Python2 in Python3
|
||||
|
||||
# the file was generated in Python 2.7 like so:
|
||||
#
|
||||
# df = DataFrame([1.,2,3], index=pd.PeriodIndex(
|
||||
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
|
||||
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
|
||||
|
||||
expected = DataFrame(
|
||||
[1.0, 2, 3],
|
||||
index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(
|
||||
datapath(
|
||||
"io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"
|
||||
),
|
||||
mode="r",
|
||||
) as store:
|
||||
result = store["p"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_infer_string(tmp_path, setup_path):
|
||||
# GH#54431
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": ["a", "b", None]})
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="data", format="table")
|
||||
with pd.option_context("future.infer_string", True):
|
||||
result = read_hdf(path, key="data", mode="r")
|
||||
expected = DataFrame(
|
||||
{"a": ["a", "b", None]},
|
||||
dtype="string[pyarrow_numpy]",
|
||||
columns=Index(["a"], dtype="string[pyarrow_numpy]"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,92 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
errors,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_retain_index_attributes(setup_path, unit):
|
||||
# GH 3499, losing frequency info on index recreation
|
||||
dti = date_range("2000-1-1", periods=3, freq="h", unit=unit)
|
||||
df = DataFrame({"A": Series(range(3), index=dti)})
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "data")
|
||||
store.put("data", df, format="table")
|
||||
|
||||
result = store.get("data")
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
for attr in ["freq", "tz", "name"]:
|
||||
for idx in ["index", "columns"]:
|
||||
assert getattr(getattr(df, idx), attr, None) == getattr(
|
||||
getattr(result, idx), attr, None
|
||||
)
|
||||
|
||||
dti2 = date_range("2002-1-1", periods=3, freq="D", unit=unit)
|
||||
# try to append a table with a different frequency
|
||||
with tm.assert_produces_warning(errors.AttributeConflictWarning):
|
||||
df2 = DataFrame({"A": Series(range(3), index=dti2)})
|
||||
store.append("data", df2)
|
||||
|
||||
assert store.get_storer("data").info["index"]["freq"] is None
|
||||
|
||||
# this is ok
|
||||
_maybe_remove(store, "df2")
|
||||
dti3 = DatetimeIndex(
|
||||
["2001-01-01", "2001-01-02", "2002-01-01"], dtype=f"M8[{unit}]"
|
||||
)
|
||||
df2 = DataFrame(
|
||||
{
|
||||
"A": Series(
|
||||
range(3),
|
||||
index=dti3,
|
||||
)
|
||||
}
|
||||
)
|
||||
store.append("df2", df2)
|
||||
dti4 = date_range("2002-1-1", periods=3, freq="D", unit=unit)
|
||||
df3 = DataFrame({"A": Series(range(3), index=dti4)})
|
||||
store.append("df2", df3)
|
||||
|
||||
|
||||
def test_retain_index_attributes2(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
with tm.assert_produces_warning(errors.AttributeConflictWarning):
|
||||
df = DataFrame(
|
||||
{"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="h"))}
|
||||
)
|
||||
df.to_hdf(path, key="data", mode="w", append=True)
|
||||
df2 = DataFrame(
|
||||
{"A": Series(range(3), index=date_range("2002-1-1", periods=3, freq="D"))}
|
||||
)
|
||||
|
||||
df2.to_hdf(path, key="data", append=True)
|
||||
|
||||
idx = date_range("2000-1-1", periods=3, freq="h")
|
||||
idx.name = "foo"
|
||||
df = DataFrame({"A": Series(range(3), index=idx)})
|
||||
df.to_hdf(path, key="data", mode="w", append=True)
|
||||
|
||||
assert read_hdf(path, key="data").index.name == "foo"
|
||||
|
||||
with tm.assert_produces_warning(errors.AttributeConflictWarning):
|
||||
idx2 = date_range("2001-1-1", periods=3, freq="h")
|
||||
idx2.name = "bar"
|
||||
df2 = DataFrame({"A": Series(range(3), index=idx2)})
|
||||
df2.to_hdf(path, key="data", append=True)
|
||||
|
||||
assert read_hdf(path, "data").index.name is None
|
@ -0,0 +1,578 @@
|
||||
import datetime
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
bdate_range,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_conv_read_write():
|
||||
with tm.ensure_clean() as path:
|
||||
|
||||
def roundtrip(key, obj, **kwargs):
|
||||
obj.to_hdf(path, key=key, **kwargs)
|
||||
return read_hdf(path, key)
|
||||
|
||||
o = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
tm.assert_series_equal(o, roundtrip("series", o))
|
||||
|
||||
o = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)])
|
||||
tm.assert_series_equal(o, roundtrip("string_series", o))
|
||||
|
||||
o = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
tm.assert_frame_equal(o, roundtrip("frame", o))
|
||||
|
||||
# table
|
||||
df = DataFrame({"A": range(5), "B": range(5)})
|
||||
df.to_hdf(path, key="table", append=True)
|
||||
result = read_hdf(path, "table", where=["index>2"])
|
||||
tm.assert_frame_equal(df[df.index > 2], result)
|
||||
|
||||
|
||||
def test_long_strings(setup_path):
|
||||
# GH6166
|
||||
data = ["a" * 50] * 10
|
||||
df = DataFrame({"a": data}, index=data)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df", df, data_columns=["a"])
|
||||
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
def test_api(tmp_path, setup_path):
|
||||
# GH4584
|
||||
# API issue when to_hdf doesn't accept append AND format args
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(range(20))
|
||||
df.iloc[:10].to_hdf(path, key="df", append=True, format="table")
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
# append to False
|
||||
df.iloc[:10].to_hdf(path, key="df", append=False, format="table")
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
|
||||
def test_api_append(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(range(20))
|
||||
df.iloc[:10].to_hdf(path, key="df", append=True)
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
# append to False
|
||||
df.iloc[:10].to_hdf(path, key="df", append=False, format="table")
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True)
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
|
||||
def test_api_2(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(range(20))
|
||||
df.to_hdf(path, key="df", append=False, format="fixed")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
df.to_hdf(path, key="df", append=False, format="f")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
df.to_hdf(path, key="df", append=False)
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
df.to_hdf(path, key="df")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(range(20))
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=True, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format="table")
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# append to False
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=False, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format="table")
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# formats
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=False, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format="table")
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=False, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format=None)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
def test_api_invalid(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
# Invalid.
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
msg = "Can only append to Tables"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df", append=True, format="f")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df", append=True, format="fixed")
|
||||
|
||||
msg = r"invalid HDFStore format specified \[foo\]"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.to_hdf(path, key="df", append=True, format="foo")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.to_hdf(path, key="df", append=False, format="foo")
|
||||
|
||||
# File path doesn't exist
|
||||
path = ""
|
||||
msg = f"File {path} does not exist"
|
||||
|
||||
with pytest.raises(FileNotFoundError, match=msg):
|
||||
read_hdf(path, "df")
|
||||
|
||||
|
||||
def test_get(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
left = store.get("a")
|
||||
right = store["a"]
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
left = store.get("/a")
|
||||
right = store["/a"]
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
with pytest.raises(KeyError, match="'No object named b in the file'"):
|
||||
store.get("b")
|
||||
|
||||
|
||||
def test_put_integer(setup_path):
|
||||
# non-date, non-string index
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((50, 100)))
|
||||
_check_roundtrip(df, tm.assert_frame_equal, setup_path)
|
||||
|
||||
|
||||
def test_table_values_dtypes_roundtrip(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")
|
||||
store.append("df_f8", df1)
|
||||
tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)
|
||||
|
||||
df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")
|
||||
store.append("df_i8", df2)
|
||||
tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)
|
||||
|
||||
# incompatible dtype
|
||||
msg = re.escape(
|
||||
"invalid combination of [values_axes] on appending data "
|
||||
"[name->values_block_0,cname->values_block_0,"
|
||||
"dtype->float64,kind->float,shape->(1, 3)] vs "
|
||||
"current table [name->values_block_0,"
|
||||
"cname->values_block_0,dtype->int64,kind->integer,"
|
||||
"shape->None]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_i8", df1)
|
||||
|
||||
# check creation/storage/retrieval of float32 (a bit hacky to
|
||||
# actually create them thought)
|
||||
df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])
|
||||
store.append("df_f4", df1)
|
||||
tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)
|
||||
assert df1.dtypes.iloc[0] == "float32"
|
||||
|
||||
# check with mixed dtypes
|
||||
df1 = DataFrame(
|
||||
{
|
||||
c: Series(np.random.default_rng(2).integers(5), dtype=c)
|
||||
for c in ["float32", "float64", "int32", "int64", "int16", "int8"]
|
||||
}
|
||||
)
|
||||
df1["string"] = "foo"
|
||||
df1["float322"] = 1.0
|
||||
df1["float322"] = df1["float322"].astype("float32")
|
||||
df1["bool"] = df1["float32"] > 0
|
||||
df1["time1"] = Timestamp("20130101")
|
||||
df1["time2"] = Timestamp("20130102")
|
||||
|
||||
store.append("df_mixed_dtypes1", df1)
|
||||
result = store.select("df_mixed_dtypes1").dtypes.value_counts()
|
||||
result.index = [str(i) for i in result.index]
|
||||
expected = Series(
|
||||
{
|
||||
"float32": 2,
|
||||
"float64": 1,
|
||||
"int32": 1,
|
||||
"bool": 1,
|
||||
"int16": 1,
|
||||
"int8": 1,
|
||||
"int64": 1,
|
||||
"object": 1,
|
||||
"datetime64[ns]": 2,
|
||||
},
|
||||
name="count",
|
||||
)
|
||||
result = result.sort_index()
|
||||
expected = expected.sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_series(setup_path):
|
||||
s = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)])
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
_check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
ts2 = Series(ts.index, Index(ts.index, dtype=object))
|
||||
_check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))
|
||||
_check_roundtrip(
|
||||
ts3, tm.assert_series_equal, path=setup_path, check_index_type=False
|
||||
)
|
||||
|
||||
|
||||
def test_float_index(setup_path):
|
||||
# GH #454
|
||||
index = np.random.default_rng(2).standard_normal(10)
|
||||
s = Series(np.random.default_rng(2).standard_normal(10), index=index)
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_tuple_index(setup_path):
|
||||
# GH #492
|
||||
col = np.arange(10)
|
||||
idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]
|
||||
data = np.random.default_rng(2).standard_normal(30).reshape((3, 10))
|
||||
DF = DataFrame(data, index=idx, columns=col)
|
||||
|
||||
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
|
||||
_check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_index_types(setup_path):
|
||||
values = np.random.default_rng(2).standard_normal(2)
|
||||
|
||||
func = lambda lhs, rhs: tm.assert_series_equal(lhs, rhs, check_index_type=True)
|
||||
|
||||
ser = Series(values, [0, "y"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.datetime.today(), 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, ["y", 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.date.today(), "a"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [0, "y"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.datetime.today(), 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, ["y", 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.date.today(), "a"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [1.23, "b"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [1, 1.53])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [1, 5])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
dti = DatetimeIndex(["2012-01-01", "2012-01-02"], dtype="M8[ns]")
|
||||
ser = Series(values, index=dti)
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser.index = ser.index.as_unit("s")
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
|
||||
def test_timeseries_preepoch(setup_path, request):
|
||||
dr = bdate_range("1/1/1940", "1/1/1960")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(dr)), index=dr)
|
||||
try:
|
||||
_check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
|
||||
except OverflowError:
|
||||
if is_platform_windows():
|
||||
request.applymarker(
|
||||
pytest.mark.xfail("known failure on some windows platforms")
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compression", [False, pytest.param(True, marks=td.skip_if_windows)]
|
||||
)
|
||||
def test_frame(compression, setup_path):
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# put in some random NAs
|
||||
df.iloc[0, 0] = np.nan
|
||||
df.iloc[5, 3] = np.nan
|
||||
|
||||
_check_roundtrip_table(
|
||||
df, tm.assert_frame_equal, path=setup_path, compression=compression
|
||||
)
|
||||
_check_roundtrip(
|
||||
df, tm.assert_frame_equal, path=setup_path, compression=compression
|
||||
)
|
||||
|
||||
tdf = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
_check_roundtrip(
|
||||
tdf, tm.assert_frame_equal, path=setup_path, compression=compression
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# not consolidated
|
||||
df["foo"] = np.random.default_rng(2).standard_normal(len(df))
|
||||
store["df"] = df
|
||||
recons = store["df"]
|
||||
assert recons._mgr.is_consolidated()
|
||||
|
||||
# empty
|
||||
_check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_empty_series_frame(setup_path):
|
||||
s0 = Series(dtype=object)
|
||||
s1 = Series(name="myseries", dtype=object)
|
||||
df0 = DataFrame()
|
||||
df1 = DataFrame(index=["a", "b", "c"])
|
||||
df2 = DataFrame(columns=["d", "e", "f"])
|
||||
|
||||
_check_roundtrip(s0, tm.assert_series_equal, path=setup_path)
|
||||
_check_roundtrip(s1, tm.assert_series_equal, path=setup_path)
|
||||
_check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"])
|
||||
def test_empty_series(dtype, setup_path):
|
||||
s = Series(dtype=dtype)
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_can_serialize_dates(setup_path):
|
||||
rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
_check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_store_hierarchical(setup_path, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
_check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)
|
||||
|
||||
# check that the names are stored
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["frame"] = frame
|
||||
recons = store["frame"]
|
||||
tm.assert_frame_equal(recons, frame)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compression", [False, pytest.param(True, marks=td.skip_if_windows)]
|
||||
)
|
||||
def test_store_mixed(compression, setup_path):
|
||||
def _make_one():
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["bool1"] = df["A"] > 0
|
||||
df["bool2"] = df["B"] > 0
|
||||
df["int1"] = 1
|
||||
df["int2"] = 2
|
||||
return df._consolidate()
|
||||
|
||||
df1 = _make_one()
|
||||
df2 = _make_one()
|
||||
|
||||
_check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["obj"] = df1
|
||||
tm.assert_frame_equal(store["obj"], df1)
|
||||
store["obj"] = df2
|
||||
tm.assert_frame_equal(store["obj"], df2)
|
||||
|
||||
# check that can store Series of all of these types
|
||||
_check_roundtrip(
|
||||
df1["obj1"],
|
||||
tm.assert_series_equal,
|
||||
path=setup_path,
|
||||
compression=compression,
|
||||
)
|
||||
_check_roundtrip(
|
||||
df1["bool1"],
|
||||
tm.assert_series_equal,
|
||||
path=setup_path,
|
||||
compression=compression,
|
||||
)
|
||||
_check_roundtrip(
|
||||
df1["int1"],
|
||||
tm.assert_series_equal,
|
||||
path=setup_path,
|
||||
compression=compression,
|
||||
)
|
||||
|
||||
|
||||
def _check_roundtrip(obj, comparator, path, compression=False, **kwargs):
|
||||
options = {}
|
||||
if compression:
|
||||
options["complib"] = "blosc"
|
||||
|
||||
with ensure_clean_store(path, "w", **options) as store:
|
||||
store["obj"] = obj
|
||||
retrieved = store["obj"]
|
||||
comparator(retrieved, obj, **kwargs)
|
||||
|
||||
|
||||
def _check_roundtrip_table(obj, comparator, path, compression=False):
|
||||
options = {}
|
||||
if compression:
|
||||
options["complib"] = "blosc"
|
||||
|
||||
with ensure_clean_store(path, "w", **options) as store:
|
||||
store.put("obj", obj, format="table")
|
||||
retrieved = store["obj"]
|
||||
|
||||
comparator(retrieved, obj)
|
||||
|
||||
|
||||
def test_unicode_index(setup_path):
|
||||
unicode_values = ["\u03c3", "\u03c3\u03c3"]
|
||||
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(len(unicode_values)),
|
||||
unicode_values,
|
||||
)
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_unicode_longer_encoded(setup_path):
|
||||
# GH 11234
|
||||
char = "\u0394"
|
||||
df = DataFrame({"A": [char]})
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df, format="table", encoding="utf-8")
|
||||
result = store.get("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
df = DataFrame({"A": ["a", char], "B": ["b", "b"]})
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df, format="table", encoding="utf-8")
|
||||
result = store.get("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_store_datetime_mixed(setup_path):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
df["d"] = ts.index[:3]
|
||||
_check_roundtrip(df, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_round_trip_equals(tmp_path, setup_path):
|
||||
# GH 9330
|
||||
df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
other = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, other)
|
||||
assert df.equals(other)
|
||||
assert other.equals(df)
|
||||
|
||||
|
||||
def test_infer_string_columns(tmp_path, setup_path):
|
||||
# GH#
|
||||
pytest.importorskip("pyarrow")
|
||||
path = tmp_path / setup_path
|
||||
with pd.option_context("future.infer_string", True):
|
||||
df = DataFrame(1, columns=list("ABCD"), index=list(range(10))).set_index(
|
||||
["A", "B"]
|
||||
)
|
||||
expected = df.copy()
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,52 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.pytables import (
|
||||
HDFStore,
|
||||
read_hdf,
|
||||
)
|
||||
|
||||
pytest.importorskip("tables")
|
||||
|
||||
|
||||
class TestHDFStoreSubclass:
|
||||
# GH 33748
|
||||
def test_supported_for_subclass_dataframe(self, tmp_path):
|
||||
data = {"a": [1, 2], "b": [3, 4]}
|
||||
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
|
||||
|
||||
expected = DataFrame(data, dtype=np.intp)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
sdf.to_hdf(path, key="df")
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
with HDFStore(path) as store:
|
||||
store.put("df", sdf)
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_supported_for_subclass_series(self, tmp_path):
|
||||
data = [1, 2, 3]
|
||||
sser = tm.SubclassedSeries(data, dtype=np.intp)
|
||||
|
||||
expected = Series(data, dtype=np.intp)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
sser.to_hdf(path, key="ser")
|
||||
result = read_hdf(path, "ser")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
with HDFStore(path) as store:
|
||||
store.put("ser", sser)
|
||||
result = read_hdf(path, "ser")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,72 @@
|
||||
import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import ensure_clean_store
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unit", ["us", "ns"])
|
||||
def test_store_datetime_fractional_secs(setup_path, unit):
|
||||
dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
|
||||
dti = DatetimeIndex([dt], dtype=f"M8[{unit}]")
|
||||
series = Series([0], index=dti)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = series
|
||||
assert store["a"].index[0] == dt
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_tseries_indices_series(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
idx = date_range("2020-01-01", periods=10)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx)
|
||||
store["a"] = ser
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_series_equal(result, ser)
|
||||
assert result.index.freq == ser.index.freq
|
||||
tm.assert_class_equal(result.index, ser.index, obj="series index")
|
||||
|
||||
idx = period_range("2020-01-01", periods=10, freq="D")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx)
|
||||
store["a"] = ser
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_series_equal(result, ser)
|
||||
assert result.index.freq == ser.index.freq
|
||||
tm.assert_class_equal(result.index, ser.index, obj="series index")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_tseries_indices_frame(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
idx = date_range("2020-01-01", periods=10)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx
|
||||
)
|
||||
store["a"] = df
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
assert result.index.freq == df.index.freq
|
||||
tm.assert_class_equal(result.index, df.index, obj="dataframe index")
|
||||
|
||||
idx = period_range("2020-01-01", periods=10, freq="D")
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 3)), idx)
|
||||
store["a"] = df
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
assert result.index.freq == df.index.freq
|
||||
tm.assert_class_equal(result.index, df.index, obj="dataframe index")
|
@ -0,0 +1,378 @@
|
||||
from datetime import (
|
||||
date,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
|
||||
def _compare_with_tz(a, b):
|
||||
tm.assert_frame_equal(a, b)
|
||||
|
||||
# compare the zones on each element
|
||||
for c in a.columns:
|
||||
for i in a.index:
|
||||
a_e = a.loc[i, c]
|
||||
b_e = b.loc[i, c]
|
||||
if not (a_e == b_e and a_e.tz == b_e.tz):
|
||||
raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
|
||||
|
||||
|
||||
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
|
||||
# filename issues.
|
||||
gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
|
||||
gettz_pytz = lambda x: x
|
||||
|
||||
|
||||
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
|
||||
def test_append_with_timezones(setup_path, gettz):
|
||||
# as columns
|
||||
|
||||
# Single-tzinfo, no DST transition
|
||||
df_est = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")).as_unit("ns")
|
||||
+ timedelta(hours=1) * i
|
||||
for i in range(5)
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
# frame with all columns having same tzinfo, but different sides
|
||||
# of DST transition
|
||||
df_crosses_dst = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
"B": Timestamp("20130603", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
df_mixed_tz = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
"B": Timestamp("20130102", tz=gettz("EET")).as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
df_different_tz = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
"B": Timestamp("20130102", tz=gettz("CET")).as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df_tz")
|
||||
store.append("df_tz", df_est, data_columns=["A"])
|
||||
result = store["df_tz"]
|
||||
_compare_with_tz(result, df_est)
|
||||
tm.assert_frame_equal(result, df_est)
|
||||
|
||||
# select with tz aware
|
||||
expected = df_est[df_est.A >= df_est.A[3]]
|
||||
result = store.select("df_tz", where="A>=df_est.A[3]")
|
||||
_compare_with_tz(result, expected)
|
||||
|
||||
# ensure we include dates in DST and STD time here.
|
||||
_maybe_remove(store, "df_tz")
|
||||
store.append("df_tz", df_crosses_dst)
|
||||
result = store["df_tz"]
|
||||
_compare_with_tz(result, df_crosses_dst)
|
||||
tm.assert_frame_equal(result, df_crosses_dst)
|
||||
|
||||
msg = (
|
||||
r"invalid info for \[values_block_1\] for \[tz\], "
|
||||
r"existing_value \[(dateutil/.*)?(US/Eastern|America/New_York)\] "
|
||||
r"conflicts with new value \[(dateutil/.*)?EET\]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_tz", df_mixed_tz)
|
||||
|
||||
# this is ok
|
||||
_maybe_remove(store, "df_tz")
|
||||
store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
|
||||
result = store["df_tz"]
|
||||
_compare_with_tz(result, df_mixed_tz)
|
||||
tm.assert_frame_equal(result, df_mixed_tz)
|
||||
|
||||
# can't append with diff timezone
|
||||
msg = (
|
||||
r"invalid info for \[B\] for \[tz\], "
|
||||
r"existing_value \[(dateutil/.*)?EET\] "
|
||||
r"conflicts with new value \[(dateutil/.*)?CET\]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_tz", df_different_tz)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
|
||||
def test_append_with_timezones_as_index(setup_path, gettz):
|
||||
# GH#4098 example
|
||||
|
||||
dti = date_range("2000-1-1", periods=3, freq="h", tz=gettz("US/Eastern"))
|
||||
dti = dti._with_freq(None) # freq doesn't round-trip
|
||||
|
||||
df = DataFrame({"A": Series(range(3), index=dti)})
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_roundtrip_tz_aware_index(setup_path, unit):
|
||||
# GH 17618
|
||||
ts = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
|
||||
dti = DatetimeIndex([ts]).as_unit(unit)
|
||||
df = DataFrame(data=[0], index=dti)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("frame", df, format="fixed")
|
||||
recons = store["frame"]
|
||||
tm.assert_frame_equal(recons, df)
|
||||
|
||||
value = recons.index[0]._value
|
||||
denom = {"ns": 1, "us": 1000, "ms": 10**6, "s": 10**9}[unit]
|
||||
assert value == 946706400000000000 // denom
|
||||
|
||||
|
||||
def test_store_index_name_with_tz(setup_path):
|
||||
# GH 13884
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
|
||||
df.index = df.index.tz_localize("UTC")
|
||||
df.index.name = "foo"
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("frame", df, format="table")
|
||||
recons = store["frame"]
|
||||
tm.assert_frame_equal(recons, df)
|
||||
|
||||
|
||||
def test_tseries_select_index_column(setup_path):
|
||||
# GH7777
|
||||
# selecting a UTC datetimeindex column did
|
||||
# not preserve UTC tzinfo set before storing
|
||||
|
||||
# check that no tz still works
|
||||
rng = date_range("1/1/2000", "1/30/2000")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("frame", frame)
|
||||
result = store.select_column("frame", "index")
|
||||
assert rng.tz == DatetimeIndex(result.values).tz
|
||||
|
||||
# check utc
|
||||
rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("frame", frame)
|
||||
result = store.select_column("frame", "index")
|
||||
assert rng.tz == result.dt.tz
|
||||
|
||||
# double check non-utc
|
||||
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("frame", frame)
|
||||
result = store.select_column("frame", "index")
|
||||
assert rng.tz == result.dt.tz
|
||||
|
||||
|
||||
def test_timezones_fixed_format_frame_non_empty(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# index
|
||||
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
|
||||
rng = rng._with_freq(None) # freq doesn't round-trip
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
store["df"] = df
|
||||
result = store["df"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# as data
|
||||
# GH11411
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": rng,
|
||||
"B": rng.tz_convert("UTC").tz_localize(None),
|
||||
"C": rng.tz_convert("CET"),
|
||||
"D": range(len(rng)),
|
||||
},
|
||||
index=rng,
|
||||
)
|
||||
store["df"] = df
|
||||
result = store["df"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
|
||||
# GH 20594
|
||||
|
||||
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
|
||||
|
||||
obj = Series(dtype=dtype, name="A")
|
||||
if frame_or_series is DataFrame:
|
||||
obj = obj.to_frame()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["obj"] = obj
|
||||
result = store["obj"]
|
||||
tm.assert_equal(result, obj)
|
||||
|
||||
|
||||
def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
|
||||
# GH 20594
|
||||
|
||||
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
s = Series([0], dtype=dtype)
|
||||
store["s"] = s
|
||||
result = store["s"]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_fixed_offset_tz(setup_path):
|
||||
rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["frame"] = frame
|
||||
recons = store["frame"]
|
||||
tm.assert_index_equal(recons.index, rng)
|
||||
assert rng.tz == recons.index.tz
|
||||
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_store_timezone(setup_path):
|
||||
# GH2852
|
||||
# issue storing datetime.date with a timezone as it resets when read
|
||||
# back in a new timezone
|
||||
|
||||
# original method
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
today = date(2013, 9, 10)
|
||||
df = DataFrame([1, 2, 3], index=[today, today, today])
|
||||
store["obj1"] = df
|
||||
result = store["obj1"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# with tz setting
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
with tm.set_timezone("EST5EDT"):
|
||||
today = date(2013, 9, 10)
|
||||
df = DataFrame([1, 2, 3], index=[today, today, today])
|
||||
store["obj1"] = df
|
||||
|
||||
with tm.set_timezone("CST6CDT"):
|
||||
result = store["obj1"]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_legacy_datetimetz_object(datapath):
|
||||
# legacy from < 0.17.0
|
||||
# 8260
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz="US/Eastern").as_unit("ns"),
|
||||
"B": Timestamp("20130603", tz="CET").as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
|
||||
) as store:
|
||||
result = store["df"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_dst_transitions(setup_path):
|
||||
# make sure we are not failing on transitions
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
times = date_range(
|
||||
"2013-10-26 23:00",
|
||||
"2013-10-27 01:00",
|
||||
tz="Europe/London",
|
||||
freq="h",
|
||||
ambiguous="infer",
|
||||
)
|
||||
times = times._with_freq(None) # freq doesn't round-trip
|
||||
|
||||
for i in [times, times + pd.Timedelta("10min")]:
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame({"A": range(len(i)), "B": i}, index=i)
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_read_with_where_tz_aware_index(tmp_path, setup_path):
|
||||
# GH 11926
|
||||
periods = 10
|
||||
dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
|
||||
mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
|
||||
expected = DataFrame({"MYCOL": 0}, index=mi)
|
||||
|
||||
key = "mykey"
|
||||
path = tmp_path / setup_path
|
||||
with pd.HDFStore(path) as store:
|
||||
store.append(key, expected, format="table", append=True)
|
||||
result = pd.read_hdf(path, key, where="DATE > 20151130")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_py2_created_with_datetimez(datapath):
|
||||
# The test HDF5 file was created in Python 2, but could not be read in
|
||||
# Python 3.
|
||||
#
|
||||
# GH26443
|
||||
index = DatetimeIndex(["2019-01-01T18:00"], dtype="M8[ns, America/New_York]")
|
||||
expected = DataFrame({"data": 123}, index=index)
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
|
||||
) as store:
|
||||
result = store["key"]
|
||||
tm.assert_frame_equal(result, expected)
|
Reference in New Issue
Block a user