venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,77 @@
|
||||
import functools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
pytest.importorskip("odf")
|
||||
|
||||
if is_platform_windows():
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cd_and_set_engine(monkeypatch, datapath):
|
||||
func = functools.partial(pd.read_excel, engine="odf")
|
||||
monkeypatch.setattr(pd, "read_excel", func)
|
||||
monkeypatch.chdir(datapath("io", "data", "excel"))
|
||||
|
||||
|
||||
def test_read_invalid_types_raises():
|
||||
# the invalid_value_type.ods required manually editing
|
||||
# of the included content.xml file
|
||||
with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"):
|
||||
pd.read_excel("invalid_value_type.ods")
|
||||
|
||||
|
||||
def test_read_writer_table():
|
||||
# Also test reading tables from an text OpenDocument file
|
||||
# (.odt)
|
||||
index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header")
|
||||
expected = pd.DataFrame(
|
||||
[[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]],
|
||||
index=index,
|
||||
columns=["Column 1", "Unnamed: 2", "Column 3"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_newlines_between_xml_elements_table():
|
||||
# GH#45598
|
||||
expected = pd.DataFrame(
|
||||
[[1.0, 4.0, 7], [np.nan, np.nan, 8], [3.0, 6.0, 9]],
|
||||
columns=["Column 1", "Column 2", "Column 3"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("test_newlines.ods")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_unempty_cells():
|
||||
expected = pd.DataFrame(
|
||||
[1, np.nan, 3, np.nan, 5],
|
||||
columns=["Column 1"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("test_unempty_cells.ods")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_cell_annotation():
|
||||
expected = pd.DataFrame(
|
||||
["test", np.nan, "test 3"],
|
||||
columns=["Column 1"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("test_cell_annotation.ods")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,106 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import ExcelWriter
|
||||
|
||||
odf = pytest.importorskip("odf")
|
||||
|
||||
if is_platform_windows():
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ext():
|
||||
return ".ods"
|
||||
|
||||
|
||||
def test_write_append_mode_raises(ext):
|
||||
msg = "Append mode is not supported with odf!"
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ExcelWriter(f, engine="odf", mode="a")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
|
||||
def test_engine_kwargs(ext, engine_kwargs):
|
||||
# GH 42286
|
||||
# GH 43445
|
||||
# test for error: OpenDocumentSpreadsheet does not accept any arguments
|
||||
with tm.ensure_clean(ext) as f:
|
||||
if engine_kwargs is not None:
|
||||
error = re.escape(
|
||||
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
|
||||
)
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match=error,
|
||||
):
|
||||
ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
|
||||
else:
|
||||
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
|
||||
pass
|
||||
|
||||
|
||||
def test_book_and_sheets_consistent(ext):
|
||||
# GH#45687 - Ensure sheets is updated if user modifies book
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with ExcelWriter(f) as writer:
|
||||
assert writer.sheets == {}
|
||||
table = odf.table.Table(name="test_name")
|
||||
writer.book.spreadsheet.addElement(table)
|
||||
assert writer.sheets == {"test_name": table}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["value", "cell_value_type", "cell_value_attribute", "cell_value"],
|
||||
argvalues=[
|
||||
(True, "boolean", "boolean-value", "true"),
|
||||
("test string", "string", "string-value", "test string"),
|
||||
(1, "float", "value", "1"),
|
||||
(1.5, "float", "value", "1.5"),
|
||||
(
|
||||
datetime(2010, 10, 10, 10, 10, 10),
|
||||
"date",
|
||||
"date-value",
|
||||
"2010-10-10T10:10:10",
|
||||
),
|
||||
(date(2010, 10, 10), "date", "date-value", "2010-10-10"),
|
||||
],
|
||||
)
|
||||
def test_cell_value_type(ext, value, cell_value_type, cell_value_attribute, cell_value):
|
||||
# GH#54994 ODS: cell attributes should follow specification
|
||||
# http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
|
||||
from odf.namespaces import OFFICENS
|
||||
from odf.table import (
|
||||
TableCell,
|
||||
TableRow,
|
||||
)
|
||||
|
||||
table_cell_name = TableCell().qname
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
pd.DataFrame([[value]]).to_excel(f, header=False, index=False)
|
||||
|
||||
with pd.ExcelFile(f) as wb:
|
||||
sheet = wb._reader.get_sheet_by_index(0)
|
||||
sheet_rows = sheet.getElementsByType(TableRow)
|
||||
sheet_cells = [
|
||||
x
|
||||
for x in sheet_rows[0].childNodes
|
||||
if hasattr(x, "qname") and x.qname == table_cell_name
|
||||
]
|
||||
|
||||
cell = sheet_cells[0]
|
||||
assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
|
||||
assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
|
@ -0,0 +1,432 @@
|
||||
import contextlib
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import (
|
||||
ExcelWriter,
|
||||
_OpenpyxlWriter,
|
||||
)
|
||||
from pandas.io.excel._openpyxl import OpenpyxlReader
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
|
||||
if is_platform_windows():
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ext():
|
||||
return ".xlsx"
|
||||
|
||||
|
||||
def test_to_excel_styleconverter():
|
||||
from openpyxl import styles
|
||||
|
||||
hstyle = {
|
||||
"font": {"color": "00FF0000", "bold": True},
|
||||
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
|
||||
"alignment": {"horizontal": "center", "vertical": "top"},
|
||||
"fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
|
||||
"number_format": {"format_code": "0.00"},
|
||||
"protection": {"locked": True, "hidden": False},
|
||||
}
|
||||
|
||||
font_color = styles.Color("00FF0000")
|
||||
font = styles.Font(bold=True, color=font_color)
|
||||
side = styles.Side(style=styles.borders.BORDER_THIN)
|
||||
border = styles.Border(top=side, right=side, bottom=side, left=side)
|
||||
alignment = styles.Alignment(horizontal="center", vertical="top")
|
||||
fill_color = styles.Color(rgb="006666FF", tint=0.3)
|
||||
fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
|
||||
|
||||
number_format = "0.00"
|
||||
|
||||
protection = styles.Protection(locked=True, hidden=False)
|
||||
|
||||
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
|
||||
assert kw["font"] == font
|
||||
assert kw["border"] == border
|
||||
assert kw["alignment"] == alignment
|
||||
assert kw["fill"] == fill
|
||||
assert kw["number_format"] == number_format
|
||||
assert kw["protection"] == protection
|
||||
|
||||
|
||||
def test_write_cells_merge_styled(ext):
|
||||
from pandas.io.formats.excel import ExcelCell
|
||||
|
||||
sheet_name = "merge_styled"
|
||||
|
||||
sty_b1 = {"font": {"color": "00FF0000"}}
|
||||
sty_a2 = {"font": {"color": "0000FF00"}}
|
||||
|
||||
initial_cells = [
|
||||
ExcelCell(col=1, row=0, val=42, style=sty_b1),
|
||||
ExcelCell(col=0, row=1, val=99, style=sty_a2),
|
||||
]
|
||||
|
||||
sty_merged = {"font": {"color": "000000FF", "bold": True}}
|
||||
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
|
||||
openpyxl_sty_merged = sty_kwargs["font"]
|
||||
merge_cells = [
|
||||
ExcelCell(
|
||||
col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
|
||||
)
|
||||
]
|
||||
|
||||
with tm.ensure_clean(ext) as path:
|
||||
with _OpenpyxlWriter(path) as writer:
|
||||
writer._write_cells(initial_cells, sheet_name=sheet_name)
|
||||
writer._write_cells(merge_cells, sheet_name=sheet_name)
|
||||
|
||||
wks = writer.sheets[sheet_name]
|
||||
xcell_b1 = wks["B1"]
|
||||
xcell_a2 = wks["A2"]
|
||||
assert xcell_b1.font == openpyxl_sty_merged
|
||||
assert xcell_a2.font == openpyxl_sty_merged
|
||||
|
||||
|
||||
@pytest.mark.parametrize("iso_dates", [True, False])
|
||||
def test_engine_kwargs_write(ext, iso_dates):
|
||||
# GH 42286 GH 43445
|
||||
engine_kwargs = {"iso_dates": iso_dates}
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
|
||||
assert writer.book.iso_dates == iso_dates
|
||||
# ExcelWriter won't allow us to close without writing something
|
||||
DataFrame().to_excel(writer)
|
||||
|
||||
|
||||
def test_engine_kwargs_append_invalid(ext):
|
||||
# GH 43445
|
||||
# test whether an invalid engine kwargs actually raises
|
||||
with tm.ensure_clean(ext) as f:
|
||||
DataFrame(["hello", "world"]).to_excel(f)
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match=re.escape(
|
||||
"load_workbook() got an unexpected keyword argument 'apple_banana'"
|
||||
),
|
||||
):
|
||||
with ExcelWriter(
|
||||
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
|
||||
) as writer:
|
||||
# ExcelWriter needs us to write something to close properly
|
||||
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
|
||||
def test_engine_kwargs_append_data_only(ext, data_only, expected):
|
||||
# GH 43445
|
||||
# tests whether the data_only engine_kwarg actually works well for
|
||||
# openpyxl's load_workbook
|
||||
with tm.ensure_clean(ext) as f:
|
||||
DataFrame(["=1+1"]).to_excel(f)
|
||||
with ExcelWriter(
|
||||
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
|
||||
) as writer:
|
||||
assert writer.sheets["Sheet1"]["B2"].value == expected
|
||||
# ExcelWriter needs us to writer something to close properly?
|
||||
DataFrame().to_excel(writer, sheet_name="Sheet2")
|
||||
|
||||
# ensure that data_only also works for reading
|
||||
# and that formulas/values roundtrip
|
||||
assert (
|
||||
pd.read_excel(
|
||||
f,
|
||||
sheet_name="Sheet1",
|
||||
engine="openpyxl",
|
||||
engine_kwargs={"data_only": data_only},
|
||||
).iloc[0, 1]
|
||||
== expected
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
|
||||
@pytest.mark.parametrize("kwarg_value", [True, False])
|
||||
def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
|
||||
# GH 55027
|
||||
# test that `read_only` and `data_only` can be passed to
|
||||
# `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
|
||||
filename = datapath("io", "data", "excel", "test1" + ext)
|
||||
with contextlib.closing(
|
||||
OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
|
||||
) as reader:
|
||||
assert getattr(reader.book, kwarg_name) == kwarg_value
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
|
||||
)
|
||||
def test_write_append_mode(ext, mode, expected):
|
||||
df = DataFrame([1], columns=["baz"])
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
wb = openpyxl.Workbook()
|
||||
wb.worksheets[0].title = "foo"
|
||||
wb.worksheets[0]["A1"].value = "foo"
|
||||
wb.create_sheet("bar")
|
||||
wb.worksheets[1]["A1"].value = "bar"
|
||||
wb.save(f)
|
||||
|
||||
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
|
||||
df.to_excel(writer, sheet_name="baz", index=False)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
|
||||
result = [sheet.title for sheet in wb2.worksheets]
|
||||
assert result == expected
|
||||
|
||||
for index, cell_value in enumerate(expected):
|
||||
assert wb2.worksheets[index]["A1"].value == cell_value
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"if_sheet_exists,num_sheets,expected",
|
||||
[
|
||||
("new", 2, ["apple", "banana"]),
|
||||
("replace", 1, ["pear"]),
|
||||
("overlay", 1, ["pear", "banana"]),
|
||||
],
|
||||
)
|
||||
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
|
||||
# GH 40230
|
||||
df1 = DataFrame({"fruit": ["apple", "banana"]})
|
||||
df2 = DataFrame({"fruit": ["pear"]})
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
|
||||
with ExcelWriter(
|
||||
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
|
||||
) as writer:
|
||||
df2.to_excel(writer, sheet_name="foo", index=False)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(f)) as wb:
|
||||
assert len(wb.sheetnames) == num_sheets
|
||||
assert wb.sheetnames[0] == "foo"
|
||||
result = pd.read_excel(wb, "foo", engine="openpyxl")
|
||||
assert list(result["fruit"]) == expected
|
||||
if len(wb.sheetnames) == 2:
|
||||
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
|
||||
tm.assert_frame_equal(result, df2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"startrow, startcol, greeting, goodbye",
|
||||
[
|
||||
(0, 0, ["poop", "world"], ["goodbye", "people"]),
|
||||
(0, 1, ["hello", "world"], ["poop", "people"]),
|
||||
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
|
||||
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
|
||||
],
|
||||
)
|
||||
def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
|
||||
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
|
||||
df2 = DataFrame(["poop"])
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
|
||||
with ExcelWriter(
|
||||
f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
|
||||
) as writer:
|
||||
# use startrow+1 because we don't have a header
|
||||
df2.to_excel(
|
||||
writer,
|
||||
index=False,
|
||||
header=False,
|
||||
startrow=startrow + 1,
|
||||
startcol=startcol,
|
||||
sheet_name="poo",
|
||||
)
|
||||
|
||||
result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
|
||||
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"if_sheet_exists,msg",
|
||||
[
|
||||
(
|
||||
"invalid",
|
||||
"'invalid' is not valid for if_sheet_exists. Valid options "
|
||||
"are 'error', 'new', 'replace' and 'overlay'.",
|
||||
),
|
||||
(
|
||||
"error",
|
||||
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
|
||||
),
|
||||
(
|
||||
None,
|
||||
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
|
||||
# GH 40230
|
||||
df = DataFrame({"fruit": ["pear"]})
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with pytest.raises(ValueError, match=re.escape(msg)):
|
||||
df.to_excel(f, sheet_name="foo", engine="openpyxl")
|
||||
with ExcelWriter(
|
||||
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
|
||||
) as writer:
|
||||
df.to_excel(writer, sheet_name="foo")
|
||||
|
||||
|
||||
def test_to_excel_with_openpyxl_engine(ext):
|
||||
# GH 29854
|
||||
with tm.ensure_clean(ext) as filename:
|
||||
df1 = DataFrame({"A": np.linspace(1, 10, 10)})
|
||||
df2 = DataFrame({"B": np.linspace(1, 20, 10)})
|
||||
df = pd.concat([df1, df2], axis=1)
|
||||
styled = df.style.map(
|
||||
lambda val: f"color: {'red' if val < 0 else 'black'}"
|
||||
).highlight_max()
|
||||
|
||||
styled.to_excel(filename, engine="openpyxl")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("read_only", [True, False])
|
||||
def test_read_workbook(datapath, ext, read_only):
|
||||
# GH 39528
|
||||
filename = datapath("io", "data", "excel", "test1" + ext)
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(filename, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl")
|
||||
expected = pd.read_excel(filename)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"header, expected_data",
|
||||
[
|
||||
(
|
||||
0,
|
||||
{
|
||||
"Title": [np.nan, "A", 1, 2, 3],
|
||||
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
|
||||
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
|
||||
},
|
||||
),
|
||||
(2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
|
||||
)
|
||||
# When read_only is None, use read_excel instead of a workbook
|
||||
@pytest.mark.parametrize("read_only", [True, False, None])
|
||||
def test_read_with_bad_dimension(
|
||||
datapath, ext, header, expected_data, filename, read_only
|
||||
):
|
||||
# GH 38956, 39001 - no/incorrect dimension information
|
||||
path = datapath("io", "data", "excel", f"{filename}{ext}")
|
||||
if read_only is None:
|
||||
result = pd.read_excel(path, header=header)
|
||||
else:
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(path, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl", header=header)
|
||||
expected = DataFrame(expected_data)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_mode_file(ext):
|
||||
# GH 39576
|
||||
df = DataFrame()
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
df.to_excel(f, engine="openpyxl")
|
||||
|
||||
with ExcelWriter(
|
||||
f, mode="a", engine="openpyxl", if_sheet_exists="new"
|
||||
) as writer:
|
||||
df.to_excel(writer)
|
||||
|
||||
# make sure that zip files are not concatenated by making sure that
|
||||
# "docProps/app.xml" only occurs twice in the file
|
||||
data = Path(f).read_bytes()
|
||||
first = data.find(b"docProps/app.xml")
|
||||
second = data.find(b"docProps/app.xml", first + 1)
|
||||
third = data.find(b"docProps/app.xml", second + 1)
|
||||
assert second != -1 and third == -1
|
||||
|
||||
|
||||
# When read_only is None, use read_excel instead of a workbook
|
||||
@pytest.mark.parametrize("read_only", [True, False, None])
|
||||
def test_read_with_empty_trailing_rows(datapath, ext, read_only):
|
||||
# GH 39181
|
||||
path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
|
||||
if read_only is None:
|
||||
result = pd.read_excel(path)
|
||||
else:
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(path, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl")
|
||||
expected = DataFrame(
|
||||
{
|
||||
"Title": [np.nan, "A", 1, 2, 3],
|
||||
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
|
||||
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
# When read_only is None, use read_excel instead of a workbook
|
||||
@pytest.mark.parametrize("read_only", [True, False, None])
|
||||
def test_read_empty_with_blank_row(datapath, ext, read_only):
|
||||
# GH 39547 - empty excel file with a row that has no data
|
||||
path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
|
||||
if read_only is None:
|
||||
result = pd.read_excel(path)
|
||||
else:
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(path, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl")
|
||||
expected = DataFrame()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_book_and_sheets_consistent(ext):
|
||||
# GH#45687 - Ensure sheets is updated if user modifies book
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with ExcelWriter(f, engine="openpyxl") as writer:
|
||||
assert writer.sheets == {}
|
||||
sheet = writer.book.create_sheet("test_name", 0)
|
||||
assert writer.sheets == {"test_name": sheet}
|
||||
|
||||
|
||||
def test_ints_spelled_with_decimals(datapath, ext):
|
||||
# GH 46988 - openpyxl returns this sheet with floats
|
||||
path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
|
||||
result = pd.read_excel(path)
|
||||
expected = DataFrame(range(2, 12), columns=[1])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_multiindex_header_no_index_names(datapath, ext):
|
||||
# GH#47487
|
||||
path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
|
||||
result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
|
||||
expected = DataFrame(
|
||||
[[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
|
||||
),
|
||||
index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,298 @@
|
||||
import contextlib
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
read_excel,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import ExcelWriter
|
||||
from pandas.io.formats.excel import ExcelFormatter
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel
|
||||
# could compute styles and render to excel without jinja2, since there is no
|
||||
# 'template' file, but this needs the import error to delayed until render time.
|
||||
|
||||
if is_platform_windows():
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def assert_equal_cell_styles(cell1, cell2):
|
||||
# TODO: should find a better way to check equality
|
||||
assert cell1.alignment.__dict__ == cell2.alignment.__dict__
|
||||
assert cell1.border.__dict__ == cell2.border.__dict__
|
||||
assert cell1.fill.__dict__ == cell2.fill.__dict__
|
||||
assert cell1.font.__dict__ == cell2.font.__dict__
|
||||
assert cell1.number_format == cell2.number_format
|
||||
assert cell1.protection.__dict__ == cell2.protection.__dict__
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
["xlsxwriter", "openpyxl"],
|
||||
)
|
||||
def test_styler_to_excel_unstyled(engine):
|
||||
# compare DataFrame.to_excel and Styler.to_excel when no styles applied
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
|
||||
with tm.ensure_clean(".xlsx") as path:
|
||||
with ExcelWriter(path, engine=engine) as writer:
|
||||
df.to_excel(writer, sheet_name="dataframe")
|
||||
df.style.to_excel(writer, sheet_name="unstyled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
|
||||
for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
|
||||
assert len(col1) == len(col2)
|
||||
for cell1, cell2 in zip(col1, col2):
|
||||
assert cell1.value == cell2.value
|
||||
assert_equal_cell_styles(cell1, cell2)
|
||||
|
||||
|
||||
shared_style_params = [
|
||||
(
|
||||
"background-color: #111222",
|
||||
["fill", "fgColor", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
(
|
||||
"color: #111222",
|
||||
["font", "color", "value"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("font-family: Arial;", ["font", "name"], "arial"),
|
||||
("font-weight: bold;", ["font", "b"], True),
|
||||
("font-style: italic;", ["font", "i"], True),
|
||||
("text-decoration: underline;", ["font", "u"], "single"),
|
||||
("number-format: $??,???.00;", ["number_format"], "$??,???.00"),
|
||||
("text-align: left;", ["alignment", "horizontal"], "left"),
|
||||
(
|
||||
"vertical-align: bottom;",
|
||||
["alignment", "vertical"],
|
||||
{"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails
|
||||
),
|
||||
("vertical-align: middle;", ["alignment", "vertical"], "center"),
|
||||
# Border widths
|
||||
("border-left: 2pt solid red", ["border", "left", "style"], "medium"),
|
||||
("border-left: 1pt dotted red", ["border", "left", "style"], "dotted"),
|
||||
("border-left: 2pt dotted red", ["border", "left", "style"], "mediumDashDotDot"),
|
||||
("border-left: 1pt dashed red", ["border", "left", "style"], "dashed"),
|
||||
("border-left: 2pt dashed red", ["border", "left", "style"], "mediumDashed"),
|
||||
("border-left: 1pt solid red", ["border", "left", "style"], "thin"),
|
||||
("border-left: 3pt solid red", ["border", "left", "style"], "thick"),
|
||||
# Border expansion
|
||||
(
|
||||
"border-left: 2pt solid #111222",
|
||||
["border", "left", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "top", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "top", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "right", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "right", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "bottom", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "bottom", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "left", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "left", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
# Border styles
|
||||
(
|
||||
"border-left-style: hair; border-left-color: black",
|
||||
["border", "left", "style"],
|
||||
"hair",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
["xlsxwriter", "openpyxl"],
|
||||
)
|
||||
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
|
||||
def test_styler_to_excel_basic(engine, css, attrs, expected):
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
styler = df.style.map(lambda x: css)
|
||||
|
||||
with tm.ensure_clean(".xlsx") as path:
|
||||
with ExcelWriter(path, engine=engine) as writer:
|
||||
df.to_excel(writer, sheet_name="dataframe")
|
||||
styler.to_excel(writer, sheet_name="styled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
|
||||
# test unstyled data cell does not have expected styles
|
||||
# test styled cell has expected styles
|
||||
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
|
||||
for attr in attrs:
|
||||
u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
|
||||
|
||||
if isinstance(expected, dict):
|
||||
assert u_cell is None or u_cell != expected[engine]
|
||||
assert s_cell == expected[engine]
|
||||
else:
|
||||
assert u_cell is None or u_cell != expected
|
||||
assert s_cell == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
["xlsxwriter", "openpyxl"],
|
||||
)
|
||||
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
|
||||
def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
|
||||
styler = df.style
|
||||
styler.map_index(lambda x: css, axis=0)
|
||||
styler.map_index(lambda x: css, axis=1)
|
||||
|
||||
null_styler = df.style
|
||||
null_styler.map(lambda x: "null: css;")
|
||||
null_styler.map_index(lambda x: "null: css;", axis=0)
|
||||
null_styler.map_index(lambda x: "null: css;", axis=1)
|
||||
|
||||
with tm.ensure_clean(".xlsx") as path:
|
||||
with ExcelWriter(path, engine=engine) as writer:
|
||||
null_styler.to_excel(writer, sheet_name="null_styled")
|
||||
styler.to_excel(writer, sheet_name="styled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
|
||||
# test null styled index cells does not have expected styles
|
||||
# test styled cell has expected styles
|
||||
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
|
||||
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
|
||||
for attr in attrs:
|
||||
ui_cell, si_cell = getattr(ui_cell, attr, None), getattr(si_cell, attr)
|
||||
uc_cell, sc_cell = getattr(uc_cell, attr, None), getattr(sc_cell, attr)
|
||||
|
||||
if isinstance(expected, dict):
|
||||
assert ui_cell is None or ui_cell != expected[engine]
|
||||
assert si_cell == expected[engine]
|
||||
assert uc_cell is None or uc_cell != expected[engine]
|
||||
assert sc_cell == expected[engine]
|
||||
else:
|
||||
assert ui_cell is None or ui_cell != expected
|
||||
assert si_cell == expected
|
||||
assert uc_cell is None or uc_cell != expected
|
||||
assert sc_cell == expected
|
||||
|
||||
|
||||
# From https://openpyxl.readthedocs.io/en/stable/api/openpyxl.styles.borders.html
|
||||
# Note: Leaving behavior of "width"-type styles undefined; user should use border-width
|
||||
# instead
|
||||
excel_border_styles = [
|
||||
# "thin",
|
||||
"dashed",
|
||||
"mediumDashDot",
|
||||
"dashDotDot",
|
||||
"hair",
|
||||
"dotted",
|
||||
"mediumDashDotDot",
|
||||
# "medium",
|
||||
"double",
|
||||
"dashDot",
|
||||
"slantDashDot",
|
||||
# "thick",
|
||||
"mediumDashed",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"engine",
|
||||
["xlsxwriter", "openpyxl"],
|
||||
)
|
||||
@pytest.mark.parametrize("border_style", excel_border_styles)
|
||||
def test_styler_to_excel_border_style(engine, border_style):
|
||||
css = f"border-left: {border_style} black thin"
|
||||
attrs = ["border", "left", "style"]
|
||||
expected = border_style
|
||||
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
styler = df.style.map(lambda x: css)
|
||||
|
||||
with tm.ensure_clean(".xlsx") as path:
|
||||
with ExcelWriter(path, engine=engine) as writer:
|
||||
df.to_excel(writer, sheet_name="dataframe")
|
||||
styler.to_excel(writer, sheet_name="styled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
|
||||
# test unstyled data cell does not have expected styles
|
||||
# test styled cell has expected styles
|
||||
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
|
||||
for attr in attrs:
|
||||
u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
|
||||
|
||||
if isinstance(expected, dict):
|
||||
assert u_cell is None or u_cell != expected[engine]
|
||||
assert s_cell == expected[engine]
|
||||
else:
|
||||
assert u_cell is None or u_cell != expected
|
||||
assert s_cell == expected
|
||||
|
||||
|
||||
def test_styler_custom_converter():
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
|
||||
def custom_converter(css):
|
||||
return {"font": {"color": {"rgb": "111222"}}}
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
styler = df.style.map(lambda x: "color: #888999")
|
||||
with tm.ensure_clean(".xlsx") as path:
|
||||
with ExcelWriter(path, engine="openpyxl") as writer:
|
||||
ExcelFormatter(styler, style_converter=custom_converter).write(
|
||||
writer, sheet_name="custom"
|
||||
)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
|
||||
assert wb["custom"].cell(2, 2).font.color.value == "00111222"
|
||||
|
||||
|
||||
@pytest.mark.single_cpu
|
||||
@td.skip_if_not_us_locale
|
||||
def test_styler_to_s3(s3_public_bucket, s3so):
|
||||
# GH#46381
|
||||
|
||||
mock_bucket_name, target_file = s3_public_bucket.name, "test.xlsx"
|
||||
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
|
||||
styler = df.style.set_sticky(axis="index")
|
||||
styler.to_excel(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
|
||||
timeout = 5
|
||||
while True:
|
||||
if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
|
||||
break
|
||||
time.sleep(0.1)
|
||||
timeout -= 0.1
|
||||
assert timeout > 0, "Timed out waiting for file to appear on moto"
|
||||
result = read_excel(
|
||||
f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so
|
||||
)
|
||||
tm.assert_frame_equal(result, df)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,76 @@
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import ExcelFile
|
||||
from pandas.io.excel._base import inspect_excel_format
|
||||
|
||||
xlrd = pytest.importorskip("xlrd")
|
||||
|
||||
if is_platform_windows():
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture(params=[".xls"])
|
||||
def read_ext_xlrd(request):
|
||||
"""
|
||||
Valid extensions for reading Excel files with xlrd.
|
||||
|
||||
Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_read_xlrd_book(read_ext_xlrd, datapath):
|
||||
engine = "xlrd"
|
||||
sheet_name = "Sheet1"
|
||||
pth = datapath("io", "data", "excel", "test1.xls")
|
||||
with xlrd.open_workbook(pth) as book:
|
||||
with ExcelFile(book, engine=engine) as xl:
|
||||
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
|
||||
|
||||
expected = pd.read_excel(
|
||||
book, sheet_name=sheet_name, engine=engine, index_col=0
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_xlsx_fails(datapath):
|
||||
# GH 29375
|
||||
from xlrd.biffh import XLRDError
|
||||
|
||||
path = datapath("io", "data", "excel", "test1.xlsx")
|
||||
with pytest.raises(XLRDError, match="Excel xlsx file; not supported"):
|
||||
pd.read_excel(path, engine="xlrd")
|
||||
|
||||
|
||||
def test_nan_in_xls(datapath):
|
||||
# GH 54564
|
||||
path = datapath("io", "data", "excel", "test6.xls")
|
||||
|
||||
expected = pd.DataFrame({0: np.r_[0, 2].astype("int64"), 1: np.r_[1, np.nan]})
|
||||
|
||||
result = pd.read_excel(path, header=None)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_header",
|
||||
[
|
||||
b"\x09\x00\x04\x00\x07\x00\x10\x00",
|
||||
b"\x09\x02\x06\x00\x00\x00\x10\x00",
|
||||
b"\x09\x04\x06\x00\x00\x00\x10\x00",
|
||||
b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
|
||||
],
|
||||
)
|
||||
def test_read_old_xls_files(file_header):
|
||||
# GH 41226
|
||||
f = io.BytesIO(file_header)
|
||||
assert inspect_excel_format(f) == "xls"
|
@ -0,0 +1,86 @@
|
||||
import contextlib
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import ExcelWriter
|
||||
|
||||
xlsxwriter = pytest.importorskip("xlsxwriter")
|
||||
|
||||
if is_platform_windows():
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ext():
|
||||
return ".xlsx"
|
||||
|
||||
|
||||
def test_column_format(ext):
|
||||
# Test that column formats are applied to cells. Test for issue #9167.
|
||||
# Applicable to xlsxwriter only.
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
|
||||
with tm.ensure_clean(ext) as path:
|
||||
frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})
|
||||
|
||||
with ExcelWriter(path) as writer:
|
||||
frame.to_excel(writer)
|
||||
|
||||
# Add a number format to col B and ensure it is applied to cells.
|
||||
num_format = "#,##0"
|
||||
write_workbook = writer.book
|
||||
write_worksheet = write_workbook.worksheets()[0]
|
||||
col_format = write_workbook.add_format({"num_format": num_format})
|
||||
write_worksheet.set_column("B:B", None, col_format)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook:
|
||||
try:
|
||||
read_worksheet = read_workbook["Sheet1"]
|
||||
except TypeError:
|
||||
# compat
|
||||
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
|
||||
|
||||
# Get the number format from the cell.
|
||||
try:
|
||||
cell = read_worksheet["B2"]
|
||||
except TypeError:
|
||||
# compat
|
||||
cell = read_worksheet.cell("B2")
|
||||
|
||||
try:
|
||||
read_num_format = cell.number_format
|
||||
except AttributeError:
|
||||
read_num_format = cell.style.number_format._format_code
|
||||
|
||||
assert read_num_format == num_format
|
||||
|
||||
|
||||
def test_write_append_mode_raises(ext):
|
||||
msg = "Append mode is not supported with xlsxwriter!"
|
||||
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ExcelWriter(f, engine="xlsxwriter", mode="a")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
|
||||
def test_engine_kwargs(ext, nan_inf_to_errors):
|
||||
# GH 42286
|
||||
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer:
|
||||
assert writer.book.nan_inf_to_errors == nan_inf_to_errors
|
||||
|
||||
|
||||
def test_book_and_sheets_consistent(ext):
|
||||
# GH#45687 - Ensure sheets is updated if user modifies book
|
||||
with tm.ensure_clean(ext) as f:
|
||||
with ExcelWriter(f, engine="xlsxwriter") as writer:
|
||||
assert writer.sheets == {}
|
||||
sheet = writer.book.add_worksheet("test_name")
|
||||
assert writer.sheets == {"test_name": sheet}
|
Reference in New Issue
Block a user