This commit is contained in:
2024-12-04 13:35:57 +05:00
parent d346bf4b2a
commit 73ce681a55
7059 changed files with 1196501 additions and 0 deletions

View File

@ -0,0 +1,242 @@
import shlex
import subprocess
import time
import uuid
import pytest
from pandas.compat import (
is_ci_environment,
is_platform_arm,
is_platform_mac,
is_platform_windows,
)
import pandas.util._test_decorators as td
import pandas.io.common as icom
from pandas.io.parsers import read_csv
@pytest.fixture
def compression_to_extension():
return {value: key for key, value in icom.extension_to_compression.items()}
@pytest.fixture
def tips_file(datapath):
"""Path to the tips dataset"""
return datapath("io", "data", "csv", "tips.csv")
@pytest.fixture
def jsonl_file(datapath):
"""Path to a JSONL dataset"""
return datapath("io", "parser", "data", "items.jsonl")
@pytest.fixture
def salaries_table(datapath):
"""DataFrame with the salaries dataset"""
return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
@pytest.fixture
def feather_file(datapath):
return datapath("io", "data", "feather", "feather-0_3_1.feather")
@pytest.fixture
def xml_file(datapath):
return datapath("io", "data", "xml", "books.xml")
@pytest.fixture
def s3_base(worker_id, monkeypatch):
"""
Fixture for mocking S3 interaction.
Sets up moto server in separate process locally
Return url for motoserver/moto CI service
"""
pytest.importorskip("s3fs")
pytest.importorskip("boto3")
# temporary workaround as moto fails for botocore >= 1.11 otherwise,
# see https://github.com/spulec/moto/issues/1924 & 1952
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
if is_ci_environment():
if is_platform_arm() or is_platform_mac() or is_platform_windows():
# NOT RUN on Windows/macOS/ARM, only Ubuntu
# - subprocess in CI can cause timeouts
# - GitHub Actions do not support
# container services for the above OSs
# - CircleCI will probably hit the Docker rate pull limit
pytest.skip(
"S3 tests do not have a corresponding service in "
"Windows, macOS or ARM platforms"
)
else:
# set in .github/workflows/unit-tests.yml
yield "http://localhost:5000"
else:
requests = pytest.importorskip("requests")
pytest.importorskip("moto")
pytest.importorskip("flask") # server mode needs flask too
# Launching moto in server mode, i.e., as a separate process
# with an S3 endpoint on localhost
worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
endpoint_port = f"555{worker_id}"
endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"
# pipe to null to avoid logging in terminal
with subprocess.Popen(
shlex.split(f"moto_server s3 -p {endpoint_port}"),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
) as proc:
timeout = 5
while timeout > 0:
try:
# OK to go once server is accepting connections
r = requests.get(endpoint_uri)
if r.ok:
break
except Exception:
pass
timeout -= 0.1
time.sleep(0.1)
yield endpoint_uri
proc.terminate()
@pytest.fixture
def s3so(s3_base):
return {"client_kwargs": {"endpoint_url": s3_base}}
@pytest.fixture
def s3_resource(s3_base):
import boto3
s3 = boto3.resource("s3", endpoint_url=s3_base)
return s3
@pytest.fixture
def s3_public_bucket(s3_resource):
bucket = s3_resource.Bucket(f"pandas-test-{uuid.uuid4()}")
bucket.create()
yield bucket
bucket.objects.delete()
bucket.delete()
@pytest.fixture
def s3_public_bucket_with_data(
s3_public_bucket, tips_file, jsonl_file, feather_file, xml_file
):
"""
The following datasets
are loaded.
- tips.csv
- tips.csv.gz
- tips.csv.bz2
- items.jsonl
"""
test_s3_files = [
("tips#1.csv", tips_file),
("tips.csv", tips_file),
("tips.csv.gz", tips_file + ".gz"),
("tips.csv.bz2", tips_file + ".bz2"),
("items.jsonl", jsonl_file),
("simple_dataset.feather", feather_file),
("books.xml", xml_file),
]
for s3_key, file_name in test_s3_files:
with open(file_name, "rb") as f:
s3_public_bucket.put_object(Key=s3_key, Body=f)
return s3_public_bucket
@pytest.fixture
def s3_private_bucket(s3_resource):
bucket = s3_resource.Bucket(f"cant_get_it-{uuid.uuid4()}")
bucket.create(ACL="private")
yield bucket
bucket.objects.delete()
bucket.delete()
@pytest.fixture
def s3_private_bucket_with_data(
s3_private_bucket, tips_file, jsonl_file, feather_file, xml_file
):
"""
The following datasets
are loaded.
- tips.csv
- tips.csv.gz
- tips.csv.bz2
- items.jsonl
"""
test_s3_files = [
("tips#1.csv", tips_file),
("tips.csv", tips_file),
("tips.csv.gz", tips_file + ".gz"),
("tips.csv.bz2", tips_file + ".bz2"),
("items.jsonl", jsonl_file),
("simple_dataset.feather", feather_file),
("books.xml", xml_file),
]
for s3_key, file_name in test_s3_files:
with open(file_name, "rb") as f:
s3_private_bucket.put_object(Key=s3_key, Body=f)
return s3_private_bucket
_compression_formats_params = [
(".no_compress", None),
("", None),
(".gz", "gzip"),
(".GZ", "gzip"),
(".bz2", "bz2"),
(".BZ2", "bz2"),
(".zip", "zip"),
(".ZIP", "zip"),
(".xz", "xz"),
(".XZ", "xz"),
pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
]
@pytest.fixture(params=_compression_formats_params[1:])
def compression_format(request):
return request.param
@pytest.fixture(params=_compression_formats_params)
def compression_ext(request):
return request.param[0]
@pytest.fixture(
params=[
"python",
pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
]
)
def string_storage(request):
"""
Parametrized fixture for pd.options.mode.string_storage.
* 'python'
* 'pyarrow'
"""
return request.param

View File

@ -0,0 +1,77 @@
import functools
import numpy as np
import pytest
from pandas.compat import is_platform_windows
import pandas as pd
import pandas._testing as tm
pytest.importorskip("odf")
if is_platform_windows():
pytestmark = pytest.mark.single_cpu
@pytest.fixture(autouse=True)
def cd_and_set_engine(monkeypatch, datapath):
func = functools.partial(pd.read_excel, engine="odf")
monkeypatch.setattr(pd, "read_excel", func)
monkeypatch.chdir(datapath("io", "data", "excel"))
def test_read_invalid_types_raises():
# the invalid_value_type.ods required manually editing
# of the included content.xml file
with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"):
pd.read_excel("invalid_value_type.ods")
def test_read_writer_table():
# Also test reading tables from an text OpenDocument file
# (.odt)
index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header")
expected = pd.DataFrame(
[[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]],
index=index,
columns=["Column 1", "Unnamed: 2", "Column 3"],
)
result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0)
tm.assert_frame_equal(result, expected)
def test_read_newlines_between_xml_elements_table():
# GH#45598
expected = pd.DataFrame(
[[1.0, 4.0, 7], [np.nan, np.nan, 8], [3.0, 6.0, 9]],
columns=["Column 1", "Column 2", "Column 3"],
)
result = pd.read_excel("test_newlines.ods")
tm.assert_frame_equal(result, expected)
def test_read_unempty_cells():
expected = pd.DataFrame(
[1, np.nan, 3, np.nan, 5],
columns=["Column 1"],
)
result = pd.read_excel("test_unempty_cells.ods")
tm.assert_frame_equal(result, expected)
def test_read_cell_annotation():
expected = pd.DataFrame(
["test", np.nan, "test 3"],
columns=["Column 1"],
)
result = pd.read_excel("test_cell_annotation.ods")
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,106 @@
from datetime import (
date,
datetime,
)
import re
import pytest
from pandas.compat import is_platform_windows
import pandas as pd
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
odf = pytest.importorskip("odf")
if is_platform_windows():
pytestmark = pytest.mark.single_cpu
@pytest.fixture
def ext():
return ".ods"
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with odf!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="odf", mode="a")
@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
def test_engine_kwargs(ext, engine_kwargs):
# GH 42286
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
with tm.ensure_clean(ext) as f:
if engine_kwargs is not None:
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
else:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass
def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f) as writer:
assert writer.sheets == {}
table = odf.table.Table(name="test_name")
writer.book.spreadsheet.addElement(table)
assert writer.sheets == {"test_name": table}
@pytest.mark.parametrize(
["value", "cell_value_type", "cell_value_attribute", "cell_value"],
argvalues=[
(True, "boolean", "boolean-value", "true"),
("test string", "string", "string-value", "test string"),
(1, "float", "value", "1"),
(1.5, "float", "value", "1.5"),
(
datetime(2010, 10, 10, 10, 10, 10),
"date",
"date-value",
"2010-10-10T10:10:10",
),
(date(2010, 10, 10), "date", "date-value", "2010-10-10"),
],
)
def test_cell_value_type(ext, value, cell_value_type, cell_value_attribute, cell_value):
# GH#54994 ODS: cell attributes should follow specification
# http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
from odf.namespaces import OFFICENS
from odf.table import (
TableCell,
TableRow,
)
table_cell_name = TableCell().qname
with tm.ensure_clean(ext) as f:
pd.DataFrame([[value]]).to_excel(f, header=False, index=False)
with pd.ExcelFile(f) as wb:
sheet = wb._reader.get_sheet_by_index(0)
sheet_rows = sheet.getElementsByType(TableRow)
sheet_cells = [
x
for x in sheet_rows[0].childNodes
if hasattr(x, "qname") and x.qname == table_cell_name
]
cell = sheet_cells[0]
assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value

View File

@ -0,0 +1,432 @@
import contextlib
from pathlib import Path
import re
import numpy as np
import pytest
from pandas.compat import is_platform_windows
import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import (
ExcelWriter,
_OpenpyxlWriter,
)
from pandas.io.excel._openpyxl import OpenpyxlReader
openpyxl = pytest.importorskip("openpyxl")
if is_platform_windows():
pytestmark = pytest.mark.single_cpu
@pytest.fixture
def ext():
return ".xlsx"
def test_to_excel_styleconverter():
from openpyxl import styles
hstyle = {
"font": {"color": "00FF0000", "bold": True},
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
"alignment": {"horizontal": "center", "vertical": "top"},
"fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
"number_format": {"format_code": "0.00"},
"protection": {"locked": True, "hidden": False},
}
font_color = styles.Color("00FF0000")
font = styles.Font(bold=True, color=font_color)
side = styles.Side(style=styles.borders.BORDER_THIN)
border = styles.Border(top=side, right=side, bottom=side, left=side)
alignment = styles.Alignment(horizontal="center", vertical="top")
fill_color = styles.Color(rgb="006666FF", tint=0.3)
fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
number_format = "0.00"
protection = styles.Protection(locked=True, hidden=False)
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
assert kw["font"] == font
assert kw["border"] == border
assert kw["alignment"] == alignment
assert kw["fill"] == fill
assert kw["number_format"] == number_format
assert kw["protection"] == protection
def test_write_cells_merge_styled(ext):
from pandas.io.formats.excel import ExcelCell
sheet_name = "merge_styled"
sty_b1 = {"font": {"color": "00FF0000"}}
sty_a2 = {"font": {"color": "0000FF00"}}
initial_cells = [
ExcelCell(col=1, row=0, val=42, style=sty_b1),
ExcelCell(col=0, row=1, val=99, style=sty_a2),
]
sty_merged = {"font": {"color": "000000FF", "bold": True}}
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
openpyxl_sty_merged = sty_kwargs["font"]
merge_cells = [
ExcelCell(
col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
)
]
with tm.ensure_clean(ext) as path:
with _OpenpyxlWriter(path) as writer:
writer._write_cells(initial_cells, sheet_name=sheet_name)
writer._write_cells(merge_cells, sheet_name=sheet_name)
wks = writer.sheets[sheet_name]
xcell_b1 = wks["B1"]
xcell_a2 = wks["A2"]
assert xcell_b1.font == openpyxl_sty_merged
assert xcell_a2.font == openpyxl_sty_merged
@pytest.mark.parametrize("iso_dates", [True, False])
def test_engine_kwargs_write(ext, iso_dates):
# GH 42286 GH 43445
engine_kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)
def test_engine_kwargs_append_invalid(ext):
# GH 43445
# test whether an invalid engine kwargs actually raises
with tm.ensure_clean(ext) as f:
DataFrame(["hello", "world"]).to_excel(f)
with pytest.raises(
TypeError,
match=re.escape(
"load_workbook() got an unexpected keyword argument 'apple_banana'"
),
):
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
) as writer:
# ExcelWriter needs us to write something to close properly
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
def test_engine_kwargs_append_data_only(ext, data_only, expected):
# GH 43445
# tests whether the data_only engine_kwarg actually works well for
# openpyxl's load_workbook
with tm.ensure_clean(ext) as f:
DataFrame(["=1+1"]).to_excel(f)
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
) as writer:
assert writer.sheets["Sheet1"]["B2"].value == expected
# ExcelWriter needs us to writer something to close properly?
DataFrame().to_excel(writer, sheet_name="Sheet2")
# ensure that data_only also works for reading
# and that formulas/values roundtrip
assert (
pd.read_excel(
f,
sheet_name="Sheet1",
engine="openpyxl",
engine_kwargs={"data_only": data_only},
).iloc[0, 1]
== expected
)
@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
@pytest.mark.parametrize("kwarg_value", [True, False])
def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
# GH 55027
# test that `read_only` and `data_only` can be passed to
# `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
filename = datapath("io", "data", "excel", "test1" + ext)
with contextlib.closing(
OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
) as reader:
assert getattr(reader.book, kwarg_name) == kwarg_value
@pytest.mark.parametrize(
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
)
def test_write_append_mode(ext, mode, expected):
df = DataFrame([1], columns=["baz"])
with tm.ensure_clean(ext) as f:
wb = openpyxl.Workbook()
wb.worksheets[0].title = "foo"
wb.worksheets[0]["A1"].value = "foo"
wb.create_sheet("bar")
wb.worksheets[1]["A1"].value = "bar"
wb.save(f)
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
df.to_excel(writer, sheet_name="baz", index=False)
with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
result = [sheet.title for sheet in wb2.worksheets]
assert result == expected
for index, cell_value in enumerate(expected):
assert wb2.worksheets[index]["A1"].value == cell_value
@pytest.mark.parametrize(
"if_sheet_exists,num_sheets,expected",
[
("new", 2, ["apple", "banana"]),
("replace", 1, ["pear"]),
("overlay", 1, ["pear", "banana"]),
],
)
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
# GH 40230
df1 = DataFrame({"fruit": ["apple", "banana"]})
df2 = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df2.to_excel(writer, sheet_name="foo", index=False)
with contextlib.closing(openpyxl.load_workbook(f)) as wb:
assert len(wb.sheetnames) == num_sheets
assert wb.sheetnames[0] == "foo"
result = pd.read_excel(wb, "foo", engine="openpyxl")
assert list(result["fruit"]) == expected
if len(wb.sheetnames) == 2:
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
tm.assert_frame_equal(result, df2)
@pytest.mark.parametrize(
"startrow, startcol, greeting, goodbye",
[
(0, 0, ["poop", "world"], ["goodbye", "people"]),
(0, 1, ["hello", "world"], ["poop", "people"]),
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
],
)
def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
df2 = DataFrame(["poop"])
with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
) as writer:
# use startrow+1 because we don't have a header
df2.to_excel(
writer,
index=False,
header=False,
startrow=startrow + 1,
startcol=startcol,
sheet_name="poo",
)
result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"if_sheet_exists,msg",
[
(
"invalid",
"'invalid' is not valid for if_sheet_exists. Valid options "
"are 'error', 'new', 'replace' and 'overlay'.",
),
(
"error",
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
(
None,
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
],
)
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
# GH 40230
df = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=re.escape(msg)):
df.to_excel(f, sheet_name="foo", engine="openpyxl")
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df.to_excel(writer, sheet_name="foo")
def test_to_excel_with_openpyxl_engine(ext):
# GH 29854
with tm.ensure_clean(ext) as filename:
df1 = DataFrame({"A": np.linspace(1, 10, 10)})
df2 = DataFrame({"B": np.linspace(1, 20, 10)})
df = pd.concat([df1, df2], axis=1)
styled = df.style.map(
lambda val: f"color: {'red' if val < 0 else 'black'}"
).highlight_max()
styled.to_excel(filename, engine="openpyxl")
@pytest.mark.parametrize("read_only", [True, False])
def test_read_workbook(datapath, ext, read_only):
# GH 39528
filename = datapath("io", "data", "excel", "test1" + ext)
with contextlib.closing(
openpyxl.load_workbook(filename, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl")
expected = pd.read_excel(filename)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"header, expected_data",
[
(
0,
{
"Title": [np.nan, "A", 1, 2, 3],
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
},
),
(2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
],
)
@pytest.mark.parametrize(
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
)
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_with_bad_dimension(
datapath, ext, header, expected_data, filename, read_only
):
# GH 38956, 39001 - no/incorrect dimension information
path = datapath("io", "data", "excel", f"{filename}{ext}")
if read_only is None:
result = pd.read_excel(path, header=header)
else:
with contextlib.closing(
openpyxl.load_workbook(path, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl", header=header)
expected = DataFrame(expected_data)
tm.assert_frame_equal(result, expected)
def test_append_mode_file(ext):
# GH 39576
df = DataFrame()
with tm.ensure_clean(ext) as f:
df.to_excel(f, engine="openpyxl")
with ExcelWriter(
f, mode="a", engine="openpyxl", if_sheet_exists="new"
) as writer:
df.to_excel(writer)
# make sure that zip files are not concatenated by making sure that
# "docProps/app.xml" only occurs twice in the file
data = Path(f).read_bytes()
first = data.find(b"docProps/app.xml")
second = data.find(b"docProps/app.xml", first + 1)
third = data.find(b"docProps/app.xml", second + 1)
assert second != -1 and third == -1
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_with_empty_trailing_rows(datapath, ext, read_only):
# GH 39181
path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
if read_only is None:
result = pd.read_excel(path)
else:
with contextlib.closing(
openpyxl.load_workbook(path, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl")
expected = DataFrame(
{
"Title": [np.nan, "A", 1, 2, 3],
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
}
)
tm.assert_frame_equal(result, expected)
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_empty_with_blank_row(datapath, ext, read_only):
# GH 39547 - empty excel file with a row that has no data
path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
if read_only is None:
result = pd.read_excel(path)
else:
with contextlib.closing(
openpyxl.load_workbook(path, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl")
expected = DataFrame()
tm.assert_frame_equal(result, expected)
def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl") as writer:
assert writer.sheets == {}
sheet = writer.book.create_sheet("test_name", 0)
assert writer.sheets == {"test_name": sheet}
def test_ints_spelled_with_decimals(datapath, ext):
# GH 46988 - openpyxl returns this sheet with floats
path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
result = pd.read_excel(path)
expected = DataFrame(range(2, 12), columns=[1])
tm.assert_frame_equal(result, expected)
def test_read_multiindex_header_no_index_names(datapath, ext):
# GH#47487
path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
expected = DataFrame(
[[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
columns=pd.MultiIndex.from_tuples(
[("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
),
index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
)
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,298 @@
import contextlib
import time
import numpy as np
import pytest
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
read_excel,
)
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
from pandas.io.formats.excel import ExcelFormatter
pytest.importorskip("jinja2")
# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel
# could compute styles and render to excel without jinja2, since there is no
# 'template' file, but this needs the import error to delayed until render time.
if is_platform_windows():
pytestmark = pytest.mark.single_cpu
def assert_equal_cell_styles(cell1, cell2):
# TODO: should find a better way to check equality
assert cell1.alignment.__dict__ == cell2.alignment.__dict__
assert cell1.border.__dict__ == cell2.border.__dict__
assert cell1.fill.__dict__ == cell2.fill.__dict__
assert cell1.font.__dict__ == cell2.font.__dict__
assert cell1.number_format == cell2.number_format
assert cell1.protection.__dict__ == cell2.protection.__dict__
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
def test_styler_to_excel_unstyled(engine):
# compare DataFrame.to_excel and Styler.to_excel when no styles applied
pytest.importorskip(engine)
df = DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
df.style.to_excel(writer, sheet_name="unstyled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
assert len(col1) == len(col2)
for cell1, cell2 in zip(col1, col2):
assert cell1.value == cell2.value
assert_equal_cell_styles(cell1, cell2)
shared_style_params = [
(
"background-color: #111222",
["fill", "fgColor", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
(
"color: #111222",
["font", "color", "value"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("font-family: Arial;", ["font", "name"], "arial"),
("font-weight: bold;", ["font", "b"], True),
("font-style: italic;", ["font", "i"], True),
("text-decoration: underline;", ["font", "u"], "single"),
("number-format: $??,???.00;", ["number_format"], "$??,???.00"),
("text-align: left;", ["alignment", "horizontal"], "left"),
(
"vertical-align: bottom;",
["alignment", "vertical"],
{"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails
),
("vertical-align: middle;", ["alignment", "vertical"], "center"),
# Border widths
("border-left: 2pt solid red", ["border", "left", "style"], "medium"),
("border-left: 1pt dotted red", ["border", "left", "style"], "dotted"),
("border-left: 2pt dotted red", ["border", "left", "style"], "mediumDashDotDot"),
("border-left: 1pt dashed red", ["border", "left", "style"], "dashed"),
("border-left: 2pt dashed red", ["border", "left", "style"], "mediumDashed"),
("border-left: 1pt solid red", ["border", "left", "style"], "thin"),
("border-left: 3pt solid red", ["border", "left", "style"], "thick"),
# Border expansion
(
"border-left: 2pt solid #111222",
["border", "left", "color", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("border: 1pt solid red", ["border", "top", "style"], "thin"),
(
"border: 1pt solid #111222",
["border", "top", "color", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("border: 1pt solid red", ["border", "right", "style"], "thin"),
(
"border: 1pt solid #111222",
["border", "right", "color", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("border: 1pt solid red", ["border", "bottom", "style"], "thin"),
(
"border: 1pt solid #111222",
["border", "bottom", "color", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("border: 1pt solid red", ["border", "left", "style"], "thin"),
(
"border: 1pt solid #111222",
["border", "left", "color", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
# Border styles
(
"border-left-style: hair; border-left-color: black",
["border", "left", "style"],
"hair",
),
]
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
def test_styler_to_excel_basic(engine, css, attrs, expected):
pytest.importorskip(engine)
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
styler = df.style.map(lambda x: css)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
# test unstyled data cell does not have expected styles
# test styled cell has expected styles
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
for attr in attrs:
u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
if isinstance(expected, dict):
assert u_cell is None or u_cell != expected[engine]
assert s_cell == expected[engine]
else:
assert u_cell is None or u_cell != expected
assert s_cell == expected
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
pytest.importorskip(engine)
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
styler = df.style
styler.map_index(lambda x: css, axis=0)
styler.map_index(lambda x: css, axis=1)
null_styler = df.style
null_styler.map(lambda x: "null: css;")
null_styler.map_index(lambda x: "null: css;", axis=0)
null_styler.map_index(lambda x: "null: css;", axis=1)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
null_styler.to_excel(writer, sheet_name="null_styled")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
# test null styled index cells does not have expected styles
# test styled cell has expected styles
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
for attr in attrs:
ui_cell, si_cell = getattr(ui_cell, attr, None), getattr(si_cell, attr)
uc_cell, sc_cell = getattr(uc_cell, attr, None), getattr(sc_cell, attr)
if isinstance(expected, dict):
assert ui_cell is None or ui_cell != expected[engine]
assert si_cell == expected[engine]
assert uc_cell is None or uc_cell != expected[engine]
assert sc_cell == expected[engine]
else:
assert ui_cell is None or ui_cell != expected
assert si_cell == expected
assert uc_cell is None or uc_cell != expected
assert sc_cell == expected
# From https://openpyxl.readthedocs.io/en/stable/api/openpyxl.styles.borders.html
# Note: Leaving behavior of "width"-type styles undefined; user should use border-width
# instead
excel_border_styles = [
# "thin",
"dashed",
"mediumDashDot",
"dashDotDot",
"hair",
"dotted",
"mediumDashDotDot",
# "medium",
"double",
"dashDot",
"slantDashDot",
# "thick",
"mediumDashed",
]
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("border_style", excel_border_styles)
def test_styler_to_excel_border_style(engine, border_style):
css = f"border-left: {border_style} black thin"
attrs = ["border", "left", "style"]
expected = border_style
pytest.importorskip(engine)
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
styler = df.style.map(lambda x: css)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
# test unstyled data cell does not have expected styles
# test styled cell has expected styles
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
for attr in attrs:
u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
if isinstance(expected, dict):
assert u_cell is None or u_cell != expected[engine]
assert s_cell == expected[engine]
else:
assert u_cell is None or u_cell != expected
assert s_cell == expected
def test_styler_custom_converter():
openpyxl = pytest.importorskip("openpyxl")
def custom_converter(css):
return {"font": {"color": {"rgb": "111222"}}}
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
styler = df.style.map(lambda x: "color: #888999")
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine="openpyxl") as writer:
ExcelFormatter(styler, style_converter=custom_converter).write(
writer, sheet_name="custom"
)
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
assert wb["custom"].cell(2, 2).font.color.value == "00111222"
@pytest.mark.single_cpu
@td.skip_if_not_us_locale
def test_styler_to_s3(s3_public_bucket, s3so):
# GH#46381
mock_bucket_name, target_file = s3_public_bucket.name, "test.xlsx"
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
styler = df.style.set_sticky(axis="index")
styler.to_excel(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
timeout = 5
while True:
if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
break
time.sleep(0.1)
timeout -= 0.1
assert timeout > 0, "Timed out waiting for file to appear on moto"
result = read_excel(
f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so
)
tm.assert_frame_equal(result, df)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
import io
import numpy as np
import pytest
from pandas.compat import is_platform_windows
import pandas as pd
import pandas._testing as tm
from pandas.io.excel import ExcelFile
from pandas.io.excel._base import inspect_excel_format
xlrd = pytest.importorskip("xlrd")
if is_platform_windows():
pytestmark = pytest.mark.single_cpu
@pytest.fixture(params=[".xls"])
def read_ext_xlrd(request):
"""
Valid extensions for reading Excel files with xlrd.
Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm
"""
return request.param
def test_read_xlrd_book(read_ext_xlrd, datapath):
engine = "xlrd"
sheet_name = "Sheet1"
pth = datapath("io", "data", "excel", "test1.xls")
with xlrd.open_workbook(pth) as book:
with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
expected = pd.read_excel(
book, sheet_name=sheet_name, engine=engine, index_col=0
)
tm.assert_frame_equal(result, expected)
def test_read_xlsx_fails(datapath):
# GH 29375
from xlrd.biffh import XLRDError
path = datapath("io", "data", "excel", "test1.xlsx")
with pytest.raises(XLRDError, match="Excel xlsx file; not supported"):
pd.read_excel(path, engine="xlrd")
def test_nan_in_xls(datapath):
# GH 54564
path = datapath("io", "data", "excel", "test6.xls")
expected = pd.DataFrame({0: np.r_[0, 2].astype("int64"), 1: np.r_[1, np.nan]})
result = pd.read_excel(path, header=None)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"file_header",
[
b"\x09\x00\x04\x00\x07\x00\x10\x00",
b"\x09\x02\x06\x00\x00\x00\x10\x00",
b"\x09\x04\x06\x00\x00\x00\x10\x00",
b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
],
)
def test_read_old_xls_files(file_header):
# GH 41226
f = io.BytesIO(file_header)
assert inspect_excel_format(f) == "xls"

View File

@ -0,0 +1,86 @@
import contextlib
import pytest
from pandas.compat import is_platform_windows
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
xlsxwriter = pytest.importorskip("xlsxwriter")
if is_platform_windows():
pytestmark = pytest.mark.single_cpu
@pytest.fixture
def ext():
return ".xlsx"
def test_column_format(ext):
# Test that column formats are applied to cells. Test for issue #9167.
# Applicable to xlsxwriter only.
openpyxl = pytest.importorskip("openpyxl")
with tm.ensure_clean(ext) as path:
frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})
with ExcelWriter(path) as writer:
frame.to_excel(writer)
# Add a number format to col B and ensure it is applied to cells.
num_format = "#,##0"
write_workbook = writer.book
write_worksheet = write_workbook.worksheets()[0]
col_format = write_workbook.add_format({"num_format": num_format})
write_worksheet.set_column("B:B", None, col_format)
with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook:
try:
read_worksheet = read_workbook["Sheet1"]
except TypeError:
# compat
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
# Get the number format from the cell.
try:
cell = read_worksheet["B2"]
except TypeError:
# compat
cell = read_worksheet.cell("B2")
try:
read_num_format = cell.number_format
except AttributeError:
read_num_format = cell.style.number_format._format_code
assert read_num_format == num_format
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with xlsxwriter!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="xlsxwriter", mode="a")
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_engine_kwargs(ext, nan_inf_to_errors):
# GH 42286
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer:
assert writer.book.nan_inf_to_errors == nan_inf_to_errors
def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlsxwriter") as writer:
assert writer.sheets == {}
sheet = writer.book.add_worksheet("test_name")
assert writer.sheets == {"test_name": sheet}

View File

@ -0,0 +1,358 @@
import io
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
read_csv,
)
pytest.importorskip("jinja2")
def bar_grad(a=None, b=None, c=None, d=None):
"""Used in multiple tests to simplify formatting of expected result"""
ret = [("width", "10em")]
if all(x is None for x in [a, b, c, d]):
return ret
return ret + [
(
"background",
f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})",
)
]
def no_bar():
return bar_grad()
def bar_to(x, color="#d65f5f"):
return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%")
def bar_from_to(x, y, color="#d65f5f"):
return bar_grad(
f" transparent {x:.1f}%",
f" {color} {x:.1f}%",
f" {color} {y:.1f}%",
f" transparent {y:.1f}%",
)
@pytest.fixture
def df_pos():
return DataFrame([[1], [2], [3]])
@pytest.fixture
def df_neg():
return DataFrame([[-1], [-2], [-3]])
@pytest.fixture
def df_mix():
return DataFrame([[-3], [1], [2]])
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(50), bar_to(100)]),
("right", [bar_to(100), bar_from_to(50, 100), no_bar()]),
("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]),
("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]),
("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]),
(2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
(np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
],
)
def test_align_positive_cases(df_pos, align, exp):
# test different align cases for all positive values
result = df_pos.style.bar(align=align)._compute().ctx
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
("left", [bar_to(100), bar_to(50), no_bar()]),
("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]),
("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]),
("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]),
("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]),
(-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
(np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
],
)
def test_align_negative_cases(df_neg, align, exp):
# test different align cases for all negative values
result = df_neg.style.bar(align=align)._compute().ctx
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(80), bar_to(100)]),
("right", [bar_to(100), bar_from_to(80, 100), no_bar()]),
("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]),
("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
(-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
(np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]),
],
)
@pytest.mark.parametrize("nans", [True, False])
def test_align_mixed_cases(df_mix, align, exp, nans):
# test different align cases for mixed positive and negative values
# also test no impact of NaNs and no_bar
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
if nans:
df_mix.loc[3, :] = np.nan
expected.update({(3, 0): no_bar()})
result = df_mix.style.bar(align=align)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
(
"left",
{
"index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]],
"columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]],
"none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]],
},
),
(
"mid",
{
"index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]],
"columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]],
"none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]],
},
),
(
"zero",
{
"index": [
[bar_from_to(50, 66.66), bar_from_to(50, 75)],
[bar_from_to(50, 100), bar_from_to(50, 100)],
],
"columns": [
[bar_from_to(50, 75), bar_from_to(50, 100)],
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
],
"none": [
[bar_from_to(50, 62.5), bar_from_to(50, 75)],
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
],
},
),
(
2,
{
"index": [
[bar_to(50), no_bar()],
[bar_from_to(50, 100), bar_from_to(50, 100)],
],
"columns": [
[bar_to(50), no_bar()],
[bar_from_to(50, 75), bar_from_to(50, 100)],
],
"none": [
[bar_from_to(25, 50), no_bar()],
[bar_from_to(50, 75), bar_from_to(50, 100)],
],
},
),
],
)
@pytest.mark.parametrize("axis", ["index", "columns", "none"])
def test_align_axis(align, exp, axis):
# test all axis combinations with positive values and different aligns
data = DataFrame([[1, 2], [3, 4]])
result = (
data.style.bar(align=align, axis=None if axis == "none" else axis)
._compute()
.ctx
)
expected = {
(0, 0): exp[axis][0][0],
(0, 1): exp[axis][0][1],
(1, 0): exp[axis][1][0],
(1, 1): exp[axis][1][1],
}
assert result == expected
@pytest.mark.parametrize(
"values, vmin, vmax",
[
("positive", 1.5, 2.5),
("negative", -2.5, -1.5),
("mixed", -2.5, 1.5),
],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
# test that clipping occurs if any vmin > data_values or vmax < data_values
if align == "mid": # mid acts as left or right in each case
if values == "positive":
align = "left"
elif values == "negative":
align = "right"
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
vmin = None if nullify == "vmin" else vmin
vmax = None if nullify == "vmax" else vmax
clip_df = df.where(df <= (vmax if vmax else 999), other=vmax)
clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin)
result = (
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
._compute()
.ctx
)
expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"values, vmin, vmax",
[
("positive", 0.5, 4.5),
("negative", -4.5, -0.5),
("mixed", -4.5, 4.5),
],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
# test that widening occurs if any vmax > data_values or vmin < data_values
if align == "mid": # mid acts as left or right in each case
if values == "positive":
align = "left"
elif values == "negative":
align = "right"
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
vmin = None if nullify == "vmin" else vmin
vmax = None if nullify == "vmax" else vmax
expand_df = df.copy()
expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax
result = (
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
._compute()
.ctx
)
expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result.items() <= expected.items()
def test_numerics():
# test data is pre-selected for numeric values
data = DataFrame([[1, "a"], [2, "b"]])
result = data.style.bar()._compute().ctx
assert (0, 1) not in result
assert (1, 1) not in result
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(100, "green")]),
("right", [bar_to(100, "red"), no_bar()]),
("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]),
("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]),
],
)
def test_colors_mixed(align, exp):
data = DataFrame([[-1], [3]])
result = data.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result == {(0, 0): exp[0], (1, 0): exp[1]}
def test_bar_align_height():
# test when keyword height is used 'no-repeat center' and 'background-size' present
data = DataFrame([[1], [2]])
result = data.style.bar(align="left", height=50)._compute().ctx
bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center"
expected = {
(0, 0): [("width", "10em")],
(1, 0): [
("width", "10em"),
("background", bg_s),
("background-size", "100% 50.0%"),
],
}
assert result == expected
def test_bar_value_error_raises():
df = DataFrame({"A": [-100, -60, -30, -20]})
msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or"
with pytest.raises(ValueError, match=msg):
df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html()
msg = r"`width` must be a value in \[0, 100\]"
with pytest.raises(ValueError, match=msg):
df.style.bar(width=200).to_html()
msg = r"`height` must be a value in \[0, 100\]"
with pytest.raises(ValueError, match=msg):
df.style.bar(height=200).to_html()
def test_bar_color_and_cmap_error_raises():
df = DataFrame({"A": [1, 2, 3, 4]})
msg = "`color` and `cmap` cannot both be given"
# Test that providing both color and cmap raises a ValueError
with pytest.raises(ValueError, match=msg):
df.style.bar(color="#d65f5f", cmap="viridis").to_html()
def test_bar_invalid_color_type_error_raises():
df = DataFrame({"A": [1, 2, 3, 4]})
msg = (
r"`color` must be string or list or tuple of 2 strings,"
r"\(eg: color=\['#d65f5f', '#5fba7d'\]\)"
)
# Test that providing an invalid color type raises a ValueError
with pytest.raises(ValueError, match=msg):
df.style.bar(color=123).to_html()
# Test that providing a color list with more than two elements raises a ValueError
with pytest.raises(ValueError, match=msg):
df.style.bar(color=["#d65f5f", "#5fba7d", "#abcdef"]).to_html()
def test_styler_bar_with_NA_values():
df1 = DataFrame({"A": [1, 2, NA, 4]})
df2 = DataFrame([[NA, NA], [NA, NA]])
expected_substring = "style type="
html_output1 = df1.style.bar(subset="A").to_html()
html_output2 = df2.style.bar(align="left", axis=None).to_html()
assert expected_substring in html_output1
assert expected_substring in html_output2
def test_style_bar_with_pyarrow_NA_values():
data = """name,age,test1,test2,teacher
Adam,15,95.0,80,Ashby
Bob,16,81.0,82,Ashby
Dave,16,89.0,84,Jones
Fred,15,,88,Jones"""
df = read_csv(io.StringIO(data), dtype_backend="pyarrow")
expected_substring = "style type="
html_output = df.style.bar(subset="test1").to_html()
assert expected_substring in html_output

View File

@ -0,0 +1,44 @@
import pytest
jinja2 = pytest.importorskip("jinja2")
from pandas import (
DataFrame,
MultiIndex,
)
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
data=[[0, -0.609], [1, -1.228]],
columns=["A", "B"],
index=["x", "y"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_concat_bad_columns(styler):
msg = "`other.data` must have same columns as `Styler.data"
with pytest.raises(ValueError, match=msg):
styler.concat(DataFrame([[1, 2]]).style)
def test_concat_bad_type(styler):
msg = "`other` must be of type `Styler`"
with pytest.raises(TypeError, match=msg):
styler.concat(DataFrame([[1, 2]]))
def test_concat_bad_index_levels(styler, df):
df = df.copy()
df.index = MultiIndex.from_tuples([(0, 0), (1, 1)])
msg = "number of index levels must be same in `other`"
with pytest.raises(ValueError, match=msg):
styler.concat(df.style)

View File

@ -0,0 +1,562 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
MultiIndex,
NaT,
Timestamp,
option_context,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import _str_escape
@pytest.fixture
def df():
return DataFrame(
data=[[0, -0.609], [1, -1.228]],
columns=["A", "B"],
index=["x", "y"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.fixture
def df_multi():
return DataFrame(
data=np.arange(16).reshape(4, 4),
columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]),
)
@pytest.fixture
def styler_multi(df_multi):
return Styler(df_multi, uuid_len=0)
def test_display_format(styler):
ctx = styler.format("{:0.1f}")._translate(True, True)
assert all(["display_value" in c for c in row] for row in ctx["body"])
assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"])
assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize("columns", [True, False])
def test_display_format_index(styler, index, columns):
exp_index = ["x", "y"]
if index:
styler.format_index(lambda v: v.upper(), axis=0) # test callable
exp_index = ["X", "Y"]
exp_columns = ["A", "B"]
if columns:
styler.format_index("*{}*", axis=1) # test string
exp_columns = ["*A*", "*B*"]
ctx = styler._translate(True, True)
for r, row in enumerate(ctx["body"]):
assert row[0]["display_value"] == exp_index[r]
for c, col in enumerate(ctx["head"][1:]):
assert col["display_value"] == exp_columns[c]
def test_format_dict(styler):
ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.0"
assert ctx["body"][0][2]["display_value"] == "-60.90%"
def test_format_index_dict(styler):
ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True)
for i, val in enumerate(["X", "Y"]):
assert ctx["body"][i][0]["display_value"] == val
def test_format_string(styler):
ctx = styler.format("{:.2f}")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.00"
assert ctx["body"][0][2]["display_value"] == "-0.61"
assert ctx["body"][1][1]["display_value"] == "1.00"
assert ctx["body"][1][2]["display_value"] == "-1.23"
def test_format_callable(styler):
ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "pos"
assert ctx["body"][0][2]["display_value"] == "neg"
assert ctx["body"][1][1]["display_value"] == "pos"
assert ctx["body"][1][2]["display_value"] == "neg"
def test_format_with_na_rep():
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "110.00%"
assert ctx["body"][1][2]["display_value"] == "120.00%"
ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "120.00%"
def test_format_index_with_na_rep():
df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA])
ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "A"
for i in [2, 3, 4, 5]:
assert ctx["head"][0][i]["display_value"] == "--"
def test_format_non_numeric_na():
# GH 21527 28358
df = DataFrame(
{
"object": [None, np.nan, "foo"],
"datetime": [None, NaT, Timestamp("20120101")],
}
)
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "-"
@pytest.mark.parametrize(
"func, attr, kwargs",
[
("format", "_display_funcs", {}),
("format_index", "_display_funcs_index", {"axis": 0}),
("format_index", "_display_funcs_columns", {"axis": 1}),
],
)
def test_format_clear(styler, func, attr, kwargs):
assert (0, 0) not in getattr(styler, attr) # using default
getattr(styler, func)("{:.2f}", **kwargs)
assert (0, 0) in getattr(styler, attr) # formatter is specified
getattr(styler, func)(**kwargs)
assert (0, 0) not in getattr(styler, attr) # formatter cleared to default
@pytest.mark.parametrize(
"escape, exp",
[
("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
(
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
),
],
)
def test_format_escape_html(escape, exp):
chars = '<>&"%$#_{}~^\\~ ^ \\ '
df = DataFrame([[chars]])
s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
assert expected in s.to_html()
# only the value should be escaped before passing to the formatter
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
assert expected in s.to_html()
# also test format_index()
styler = Styler(DataFrame(columns=[chars]), uuid_len=0)
styler.format_index("&{0}&", escape=None, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&"
styler.format_index("&{0}&", escape=escape, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
@pytest.mark.parametrize(
"chars, expected",
[
(
r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
"".join(
[
r"$ \$&%#_{}~^\ $ ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \space \$",
]
),
),
(
r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
"".join(
[
r"\( &%#_{}~^\ \) ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \space \textbackslash (",
]
),
),
(
r"$\&%#_{}^\$",
r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$",
),
(
r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
"".join(
[
r"$ \frac{1}{2} $",
r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
]
),
),
],
)
def test_format_escape_latex_math(chars, expected):
# GH 51903
# latex-math escape works for each DataFrame cell separately. If we have
# a combination of dollar signs and brackets, the dollar sign would apply.
df = DataFrame([[chars]])
s = df.style.format("{0}", escape="latex-math")
assert s._translate(True, True)["body"][0][1]["display_value"] == expected
def test_format_escape_na_rep():
# tests the na_rep is not escaped
df = DataFrame([['<>&"', None]])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
assert ex in s.to_html()
assert expected2 in s.to_html()
# also test for format_index()
df = DataFrame(columns=['<>&"', None])
styler = Styler(df, uuid_len=0)
styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1)
ctx = styler._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "X&&lt;&gt;&amp;&#34;>X"
assert ctx["head"][0][2]["display_value"] == "&"
def test_format_escape_floats(styler):
# test given formatter for number format is not impacted by escape
s = styler.format("{:.1f}", escape="html")
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
# tests precision of floats is not impacted by escape
s = styler.format(precision=1, escape="html")
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
@pytest.mark.parametrize("formatter", [5, True, [2.0]])
@pytest.mark.parametrize("func", ["format", "format_index"])
def test_format_raises(styler, formatter, func):
with pytest.raises(TypeError, match="expected str or callable"):
getattr(styler, func)(formatter)
@pytest.mark.parametrize(
"precision, expected",
[
(1, ["1.0", "2.0", "3.2", "4.6"]),
(2, ["1.00", "2.01", "3.21", "4.57"]),
(3, ["1.000", "2.009", "3.212", "4.566"]),
],
)
def test_format_with_precision(precision, expected):
# Issue #13257
df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566])
styler = Styler(df)
styler.format(precision=precision)
styler.format_index(precision=precision, axis=1)
ctx = styler._translate(True, True)
for col, exp in enumerate(expected):
assert ctx["body"][0][col + 1]["display_value"] == exp # format test
assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
"level, expected",
[
(0, ["X", "X", "_", "_"]), # level int
("zero", ["X", "X", "_", "_"]), # level name
(1, ["_", "_", "X", "X"]), # other level int
("one", ["_", "_", "X", "X"]), # other level name
([0, 1], ["X", "X", "X", "X"]), # both levels
([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous
([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name
(["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed
],
)
def test_format_index_level(axis, level, expected):
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
df = DataFrame([[1, 2], [3, 4]])
if axis == 0:
df.index = midx
else:
df.columns = midx
styler = df.style.format_index(lambda v: "X", level=level, axis=axis)
ctx = styler._translate(True, True)
if axis == 0: # compare index
result = [ctx["body"][s][0]["display_value"] for s in range(2)]
result += [ctx["body"][s][1]["display_value"] for s in range(2)]
else: # compare columns
result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)]
result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)]
assert expected == result
def test_format_subset():
df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
ctx = df.style.format(
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :]
)._translate(True, True)
expected = "0.1"
raw_11 = "1.123400"
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
assert ctx["body"][0][2]["display_value"] == "12.34%"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][0][2]["display_value"] == "0.123400"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate(
True, True
)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == "1.1"
assert ctx["body"][0][2]["display_value"] == "0.123400"
assert ctx["body"][1][2]["display_value"] == raw_11
@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
@pytest.mark.parametrize("decimal", [".", "*"])
@pytest.mark.parametrize("precision", [None, 2])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_thousands(formatter, decimal, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1000000]], index=[1000000]).style
result = getattr(styler, func)( # testing int
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
@pytest.mark.parametrize("thousands", [None, ",", "*"])
@pytest.mark.parametrize("precision", [None, 4])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_decimal(formatter, thousands, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
def test_str_escape_error():
msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got "
with pytest.raises(ValueError, match=msg):
_str_escape("text", "bad_escape")
with pytest.raises(ValueError, match=msg):
_str_escape("text", [])
_str_escape(2.00, "bad_escape") # OK since dtype is float
def test_long_int_formatting():
df = DataFrame(data=[[1234567890123456789]], columns=["test"])
styler = df.style
ctx = styler._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "1234567890123456789"
styler = df.style.format(thousands="_")
ctx = styler._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "1_234_567_890_123_456_789"
def test_format_options():
df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]})
ctx = df.style._translate(True, True)
# test option: na_rep
assert ctx["body"][1][2]["display_value"] == "nan"
with option_context("styler.format.na_rep", "MISSING"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][2]["display_value"] == "MISSING"
# test option: decimal and precision
assert ctx["body"][0][2]["display_value"] == "1.009000"
with option_context("styler.format.decimal", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1_009000"
with option_context("styler.format.precision", 2):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1.01"
# test option: thousands
assert ctx["body"][0][1]["display_value"] == "2000"
with option_context("styler.format.thousands", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2_000"
# test option: escape
assert ctx["body"][0][3]["display_value"] == "&<"
assert ctx["body"][1][3]["display_value"] == "&~"
with option_context("styler.format.escape", "html"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][3]["display_value"] == "&amp;&lt;"
with option_context("styler.format.escape", "latex"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
with option_context("styler.format.escape", "latex-math"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
# test option: formatter
with option_context("styler.format.formatter", {"int": "{:,.2f}"}):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00"
def test_precision_zero(df):
styler = Styler(df, precision=0)
ctx = styler._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-1"
assert ctx["body"][1][2]["display_value"] == "-1"
@pytest.mark.parametrize(
"formatter, exp",
[
(lambda x: f"{x:.3f}", "9.000"),
("{:.2f}", "9.00"),
({0: "{:.1f}"}, "9.0"),
(None, "9"),
],
)
def test_formatter_options_validator(formatter, exp):
df = DataFrame([[9]])
with option_context("styler.format.formatter", formatter):
assert f" {exp} " in df.style.to_latex()
def test_formatter_options_raises():
msg = "Value must be an instance of"
with pytest.raises(ValueError, match=msg):
with option_context("styler.format.formatter", ["bad", "type"]):
DataFrame().style.to_latex()
def test_1level_multiindex():
# GH 43383
midx = MultiIndex.from_product([[1, 2]], names=[""])
df = DataFrame(-1, index=midx, columns=[0, 1])
ctx = df.style._translate(True, True)
assert ctx["body"][0][0]["display_value"] == "1"
assert ctx["body"][0][0]["is_visible"] is True
assert ctx["body"][1][0]["display_value"] == "2"
assert ctx["body"][1][0]["is_visible"] is True
def test_boolean_format():
# gh 46384: booleans do not collapse to integer representation on display
df = DataFrame([[True, False]])
ctx = df.style._translate(True, True)
assert ctx["body"][0][1]["display_value"] is True
assert ctx["body"][0][2]["display_value"] is False
@pytest.mark.parametrize(
"hide, labels",
[
(False, [1, 2]),
(True, [1, 2, 3, 4]),
],
)
def test_relabel_raise_length(styler_multi, hide, labels):
if hide:
styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
with pytest.raises(ValueError, match="``labels`` must be of length equal"):
styler_multi.relabel_index(labels=labels)
def test_relabel_index(styler_multi):
labels = [(1, 2), (3, 4)]
styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
styler_multi.relabel_index(labels=labels)
ctx = styler_multi._translate(True, True)
assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items()
assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items()
assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items()
assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items()
def test_relabel_columns(styler_multi):
labels = [(1, 2), (3, 4)]
styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")])
styler_multi.relabel_index(axis=1, labels=labels)
ctx = styler_multi._translate(True, True)
assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items()
assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items()
assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items()
assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items()
def test_relabel_roundtrip(styler):
styler.relabel_index(["{}", "{}"])
ctx = styler._translate(True, True)
assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items()
assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items()

View File

@ -0,0 +1,218 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture(params=[(None, "float64"), (NA, "Int64")])
def df(request):
# GH 45804
return DataFrame(
{"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_highlight_null(styler):
result = styler.highlight_null()._compute().ctx
expected = {
(1, 0): [("background-color", "red")],
(1, 1): [("background-color", "red")],
}
assert result == expected
def test_highlight_null_subset(styler):
# GH 31345
result = (
styler.highlight_null(color="red", subset=["A"])
.highlight_null(color="green", subset=["B"])
._compute()
.ctx
)
expected = {
(1, 0): [("background-color", "red")],
(1, 1): [("background-color", "green")],
}
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
def test_highlight_minmax_basic(df, f):
expected = {
(0, 1): [("background-color", "red")],
# ignores NaN row,
(2, 0): [("background-color", "red")],
}
if f == "highlight_min":
df = -df
result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
@pytest.mark.parametrize(
"kwargs",
[
{"axis": None, "color": "red"}, # test axis
{"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN
{"axis": None, "props": "background-color: red"}, # test props
],
)
def test_highlight_minmax_ext(df, f, kwargs):
expected = {(2, 0): [("background-color", "red")]}
if f == "highlight_min":
df = -df
result = getattr(df.style, f)(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
@pytest.mark.parametrize("axis", [None, 0, 1])
def test_highlight_minmax_nulls(f, axis):
# GH 42750
expected = {
(1, 0): [("background-color", "yellow")],
(1, 1): [("background-color", "yellow")],
}
if axis == 1:
expected.update({(2, 1): [("background-color", "yellow")]})
if f == "highlight_max":
df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]})
else:
df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]})
result = getattr(df.style, f)(axis=axis)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"kwargs",
[
{"left": 0, "right": 1}, # test basic range
{"left": 0, "right": 1, "props": "background-color: yellow"}, # test props
{"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset
{"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right
{"right": 1}, # test no left
{"left": [0, 0, 11], "axis": 0}, # test left as sequence
{"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis
{"left": 0, "right": [0, 1], "axis": 1}, # test sequence right
],
)
def test_highlight_between(styler, kwargs):
expected = {
(0, 0): [("background-color", "yellow")],
(0, 1): [("background-color", "yellow")],
}
result = styler.highlight_between(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"arg, map, axis",
[
("left", [1, 2], 0), # 0 axis has 3 elements not 2
("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3
("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2)
("right", [1, 2], 0), # same tests as above for 'right' not 'left'
("right", [1, 2, 3], 1), # ..
("right", np.array([[1, 2], [1, 2]]), None), # ..
],
)
def test_highlight_between_raises(arg, styler, map, axis):
msg = f"supplied '{arg}' is not correct shape"
with pytest.raises(ValueError, match=msg):
styler.highlight_between(**{arg: map, "axis": axis})._compute()
def test_highlight_between_raises2(styler):
msg = "values can be 'both', 'left', 'right', or 'neither'"
with pytest.raises(ValueError, match=msg):
styler.highlight_between(inclusive="badstring")._compute()
with pytest.raises(ValueError, match=msg):
styler.highlight_between(inclusive=1)._compute()
@pytest.mark.parametrize(
"inclusive, expected",
[
(
"both",
{
(0, 0): [("background-color", "yellow")],
(0, 1): [("background-color", "yellow")],
},
),
("neither", {}),
("left", {(0, 0): [("background-color", "yellow")]}),
("right", {(0, 1): [("background-color", "yellow")]}),
],
)
def test_highlight_between_inclusive(styler, inclusive, expected):
kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]}
result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute()
assert result.ctx == expected
@pytest.mark.parametrize(
"kwargs",
[
{"q_left": 0.5, "q_right": 1, "axis": 0}, # base case
{"q_left": 0.5, "q_right": 1, "axis": None}, # test axis
{"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset
{"q_left": 0.5, "axis": 0}, # test no high
{"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low
{"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop
],
)
def test_highlight_quantile(styler, kwargs):
expected = {
(2, 0): [("background-color", "yellow")],
(2, 1): [("background-color", "yellow")],
}
result = styler.highlight_quantile(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"f,kwargs",
[
("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}),
("highlight_max", {"axis": 0, "subset": [0]}),
("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}),
("highlight_between", {"subset": [0]}),
],
)
@pytest.mark.parametrize(
"df",
[
DataFrame([[0, 10], [20, 30]], dtype=int),
DataFrame([[0, 10], [20, 30]], dtype=float),
DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"),
DataFrame([[0, 10], [20, 30]], dtype=str),
DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"),
],
)
def test_all_highlight_dtypes(f, kwargs, df):
if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)):
return None # quantile incompatible with str
if f == "highlight_between":
kwargs["left"] = df.iloc[1, 0] # set the range low for testing
expected = {(1, 0): [("background-color", "yellow")]}
result = getattr(df.style, f)(**kwargs)._compute().ctx
assert result == expected

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,335 @@
import gc
import numpy as np
import pytest
from pandas import (
DataFrame,
IndexSlice,
Series,
)
pytest.importorskip("matplotlib")
pytest.importorskip("jinja2")
import matplotlib as mpl
from pandas.io.formats.style import Styler
@pytest.fixture(autouse=True)
def mpl_cleanup():
# matplotlib/testing/decorators.py#L24
# 1) Resets units registry
# 2) Resets rc_context
# 3) Closes all figures
mpl = pytest.importorskip("matplotlib")
mpl_units = pytest.importorskip("matplotlib.units")
plt = pytest.importorskip("matplotlib.pyplot")
orig_units_registry = mpl_units.registry.copy()
with mpl.rc_context():
mpl.use("template")
yield
mpl_units.registry.clear()
mpl_units.registry.update(orig_units_registry)
plt.close("all")
# https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501
gc.collect(1)
@pytest.fixture
def df():
return DataFrame([[1, 2], [2, 4]], columns=["A", "B"])
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.fixture
def df_blank():
return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"])
@pytest.fixture
def styler_blank(df_blank):
return Styler(df_blank, uuid_len=0)
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_function_gradient(styler, f):
for c_map in [None, "YlOrRd"]:
result = getattr(styler, f)(cmap=c_map)._compute().ctx
assert all("#" in x[0][1] for x in result.values())
assert result[(0, 0)] == result[(0, 1)]
assert result[(1, 0)] == result[(1, 1)]
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_background_gradient_color(styler, f):
result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx
if f == "background_gradient":
assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")]
elif f == "text_gradient":
assert result[(1, 0)] == [("color", "#fff7fb")]
@pytest.mark.parametrize(
"axis, expected",
[
(0, ["low", "low", "high", "high"]),
(1, ["low", "high", "low", "high"]),
(None, ["low", "mid", "mid", "high"]),
],
)
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_background_gradient_axis(styler, axis, expected, f):
if f == "background_gradient":
colors = {
"low": [("background-color", "#f7fbff"), ("color", "#000000")],
"mid": [("background-color", "#abd0e6"), ("color", "#000000")],
"high": [("background-color", "#08306b"), ("color", "#f1f1f1")],
}
elif f == "text_gradient":
colors = {
"low": [("color", "#f7fbff")],
"mid": [("color", "#abd0e6")],
"high": [("color", "#08306b")],
}
result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx
for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]):
assert result[cell] == colors[expected[i]]
@pytest.mark.parametrize(
"cmap, expected",
[
(
"PuBu",
{
(4, 5): [("background-color", "#86b0d3"), ("color", "#000000")],
(4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")],
},
),
(
"YlOrRd",
{
(4, 8): [("background-color", "#fd913e"), ("color", "#000000")],
(4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")],
},
),
(
None,
{
(7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")],
(7, 1): [("background-color", "#4cc26c"), ("color", "#000000")],
},
),
],
)
def test_text_color_threshold(cmap, expected):
# GH 39888
df = DataFrame(np.arange(100).reshape(10, 10))
result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx
for k in expected.keys():
assert result[k] == expected[k]
def test_background_gradient_vmin_vmax():
# GH 12145
df = DataFrame(range(5))
ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx
assert ctx[(0, 0)] == ctx[(1, 0)]
assert ctx[(4, 0)] == ctx[(3, 0)]
def test_background_gradient_int64():
# GH 28869
df1 = Series(range(3)).to_frame()
df2 = Series(range(3), dtype="Int64").to_frame()
ctx1 = df1.style.background_gradient()._compute().ctx
ctx2 = df2.style.background_gradient()._compute().ctx
assert ctx2[(0, 0)] == ctx1[(0, 0)]
assert ctx2[(1, 0)] == ctx1[(1, 0)]
assert ctx2[(2, 0)] == ctx1[(2, 0)]
@pytest.mark.parametrize(
"axis, gmap, expected",
[
(
0,
[1, 2],
{
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
(
1,
[1, 2],
{
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
(
None,
np.array([[2, 1], [1, 2]]),
{
(0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
],
)
def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected):
# tests when gmap is given as a sequence and converted to ndarray
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)]
)
def test_background_gradient_gmap_array_raises(gmap, axis):
# test when gmap as converted ndarray is bad shape
df = DataFrame([[0, 0, 0], [0, 0, 0]])
msg = "supplied 'gmap' is not correct shape"
with pytest.raises(ValueError, match=msg):
df.style.background_gradient(gmap=gmap, axis=axis)._compute()
@pytest.mark.parametrize(
"gmap",
[
DataFrame( # reverse the columns
[[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"]
),
DataFrame( # reverse the index
[[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"]
),
DataFrame( # reverse the index and columns
[[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"]
),
DataFrame( # add unnecessary columns
[[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"]
),
DataFrame( # add unnecessary index
[[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"]
),
],
)
@pytest.mark.parametrize(
"subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to
[
(None, [[1, 2], [2, 1]]),
(["A"], [[1], [2]]), # slice only column "A" in data and gmap
(["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data
(IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap
(IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data
],
)
def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap):
# test gmap given as DataFrame that it aligns to the data including subset
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset)
result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset)
assert expected._compute().ctx == result._compute().ctx
@pytest.mark.parametrize(
"gmap, axis, exp_gmap",
[
(Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index
(Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols
(Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx
(Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col
],
)
def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap):
# test gmap given as Series that it aligns to the data including subset
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute()
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute()
assert expected.ctx == result.ctx
@pytest.mark.parametrize(
"gmap, axis",
[
(DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1),
(DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0),
],
)
def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis):
# test giving a gmap in DataFrame but with wrong axis
msg = "'gmap' is a DataFrame but underlying data for operations is a Series"
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=axis)._compute()
def test_background_gradient_gmap_wrong_series(styler_blank):
# test giving a gmap in Series form but with wrong axis
msg = "'gmap' is a Series but underlying data for operations is a DataFrame"
gmap = Series([1, 2], index=["X", "Y"])
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=None)._compute()
def test_background_gradient_nullable_dtypes():
# GH 50712
df1 = DataFrame([[1], [0], [np.nan]], dtype=float)
df2 = DataFrame([[1], [0], [None]], dtype="Int64")
ctx1 = df1.style.background_gradient()._compute().ctx
ctx2 = df2.style.background_gradient()._compute().ctx
assert ctx1 == ctx2
@pytest.mark.parametrize(
"cmap",
["PuBu", mpl.colormaps["PuBu"]],
)
def test_bar_colormap(cmap):
data = DataFrame([[1, 2], [3, 4]])
ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx
pubu_colors = {
(0, 0): "#d0d1e6",
(1, 0): "#056faf",
(0, 1): "#73a9cf",
(1, 1): "#023858",
}
for k, v in pubu_colors.items():
assert v in ctx[k][1][1]
def test_bar_color_raises(df):
msg = "`color` must be string or list or tuple of 2 strings"
with pytest.raises(ValueError, match=msg):
df.style.bar(color={"a", "b"}).to_html()
with pytest.raises(ValueError, match=msg):
df.style.bar(color=["a", "b", "c"]).to_html()
msg = "`color` and `cmap` cannot both be given"
with pytest.raises(ValueError, match=msg):
df.style.bar(color="something", cmap="something else").to_html()
@pytest.mark.parametrize(
"plot_method",
["scatter", "hexbin"],
)
def test_pass_colormap_instance(df, plot_method):
# https://github.com/pandas-dev/pandas/issues/49374
cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]])
df["c"] = df.A + df.B
kwargs = {"x": "A", "y": "B", "c": "c", "colormap": cmap}
if plot_method == "hexbin":
kwargs["C"] = kwargs.pop("c")
getattr(df.plot, plot_method)(**kwargs)

View File

@ -0,0 +1,140 @@
from textwrap import dedent
import pytest
from pandas import (
DataFrame,
IndexSlice,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["i", "j", "j"],
columns=["c", "d", "d"],
dtype=float,
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_format_non_unique(df):
# GH 41269
# test dict
html = df.style.format({"d": "{:.1f}"}).to_html()
for val in ["1.000000<", "4.000000<", "7.000000<"]:
assert val in html
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html
# test subset
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html()
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]:
assert val in html
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html
@pytest.mark.parametrize("func", ["apply", "map"])
def test_apply_map_non_unique_raises(df, func):
# GH 41269
if func == "apply":
op = lambda s: ["color: red;"] * len(s)
else:
op = lambda v: "color: red;"
with pytest.raises(KeyError, match="`Styler.apply` and `.map` are not"):
getattr(df.style, func)(op)._compute()
def test_table_styles_dict_non_unique_index(styler):
styles = styler.set_table_styles(
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1
).table_styles
assert styles == [
{"selector": "td.row1", "props": [("a", "v")]},
{"selector": "td.row2", "props": [("a", "v")]},
]
def test_table_styles_dict_non_unique_columns(styler):
styles = styler.set_table_styles(
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0
).table_styles
assert styles == [
{"selector": "td.col1", "props": [("a", "v")]},
{"selector": "td.col2", "props": [("a", "v")]},
]
def test_tooltips_non_unique_raises(styler):
# ttips has unique keys
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_tooltips(ttips=ttips) # OK
# ttips has non-unique columns
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)
# ttips has non-unique index
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)
def test_set_td_classes_non_unique_raises(styler):
# classes has unique keys
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_td_classes(classes=classes) # OK
# classes has non-unique columns
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)
# classes has non-unique index
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)
def test_hide_columns_non_unique(styler):
ctx = styler.hide(["d"], axis="columns")._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "c"
assert ctx["head"][0][1]["is_visible"] is True
assert ctx["head"][0][2]["display_value"] == "d"
assert ctx["head"][0][2]["is_visible"] is False
assert ctx["head"][0][3]["display_value"] == "d"
assert ctx["head"][0][3]["is_visible"] is False
assert ctx["body"][0][1]["is_visible"] is True
assert ctx["body"][0][2]["is_visible"] is False
assert ctx["body"][0][3]["is_visible"] is False
def test_latex_non_unique(styler):
result = styler.to_latex()
assert result == dedent(
"""\
\\begin{tabular}{lrrr}
& c & d & d \\\\
i & 1.000000 & 2.000000 & 3.000000 \\\\
j & 4.000000 & 5.000000 & 6.000000 \\\\
j & 7.000000 & 8.000000 & 9.000000 \\\\
\\end{tabular}
"""
)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,96 @@
from textwrap import dedent
import pytest
from pandas import (
DataFrame,
Series,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
{"A": [0, 1], "B": [-0.61, -1.22], "C": Series(["ab", "cd"], dtype=object)}
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0, precision=2)
def test_basic_string(styler):
result = styler.to_string()
expected = dedent(
"""\
A B C
0 0 -0.61 ab
1 1 -1.22 cd
"""
)
assert result == expected
def test_string_delimiter(styler):
result = styler.to_string(delimiter=";")
expected = dedent(
"""\
;A;B;C
0;0;-0.61;ab
1;1;-1.22;cd
"""
)
assert result == expected
def test_concat(styler):
result = styler.concat(styler.data.agg(["sum"]).style).to_string()
expected = dedent(
"""\
A B C
0 0 -0.61 ab
1 1 -1.22 cd
sum 1 -1.830000 abcd
"""
)
assert result == expected
def test_concat_recursion(styler):
df = styler.data
styler1 = styler
styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
result = styler1.concat(styler2.concat(styler3)).to_string()
expected = dedent(
"""\
A B C
0 0 -0.61 ab
1 1 -1.22 cd
sum 1 -1.830 abcd
sum 1 -1.8300 abcd
"""
)
assert result == expected
def test_concat_chain(styler):
df = styler.data
styler1 = styler
styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
result = styler1.concat(styler2).concat(styler3).to_string()
expected = dedent(
"""\
A B C
0 0 -0.61 ab
1 1 -1.22 cd
sum 1 -1.830 abcd
sum 1 -1.8300 abcd
"""
)
assert result == expected

View File

@ -0,0 +1,85 @@
import numpy as np
import pytest
from pandas import DataFrame
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
columns=["A", "B", "C"],
index=["x", "y", "z"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.mark.parametrize(
"ttips",
[
DataFrame( # Test basic reindex and ignoring blank
data=[["Min", "Max"], [np.nan, ""]],
columns=["A", "C"],
index=["x", "y"],
),
DataFrame( # Test non-referenced columns, reversed col names, short index
data=[["Max", "Min", "Bad-Col"]], columns=["C", "A", "D"], index=["x"]
),
],
)
def test_tooltip_render(ttips, styler):
# GH 21266
result = styler.set_tooltips(ttips).to_html()
# test tooltip table level class
assert "#T_ .pd-t {\n visibility: hidden;\n" in result
# test 'Min' tooltip added
assert "#T_ #T__row0_col0:hover .pd-t {\n visibility: visible;\n}" in result
assert '#T_ #T__row0_col0 .pd-t::after {\n content: "Min";\n}' in result
assert 'class="data row0 col0" >0<span class="pd-t"></span></td>' in result
# test 'Max' tooltip added
assert "#T_ #T__row0_col2:hover .pd-t {\n visibility: visible;\n}" in result
assert '#T_ #T__row0_col2 .pd-t::after {\n content: "Max";\n}' in result
assert 'class="data row0 col2" >2<span class="pd-t"></span></td>' in result
# test Nan, empty string and bad column ignored
assert "#T_ #T__row1_col0:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row1_col1:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row0_col1:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row1_col2:hover .pd-t {\n visibility: visible;\n}" not in result
assert "Bad-Col" not in result
def test_tooltip_ignored(styler):
# GH 21266
result = styler.to_html() # no set_tooltips() creates no <span>
assert '<style type="text/css">\n</style>' in result
assert '<span class="pd-t"></span>' not in result
def test_tooltip_css_class(styler):
# GH 21266
result = styler.set_tooltips(
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
css_class="other-class",
props=[("color", "green")],
).to_html()
assert "#T_ .other-class {\n color: green;\n" in result
assert '#T_ #T__row0_col0 .other-class::after {\n content: "tooltip";\n' in result
# GH 39563
result = styler.set_tooltips( # set_tooltips overwrites previous
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
css_class="another-class",
props="color:green;color:red;",
).to_html()
assert "#T_ .another-class {\n color: green;\n color: red;\n}" in result

View File

@ -0,0 +1,72 @@
import locale
import pytest
from pandas._config import detect_console_encoding
class MockEncoding:
"""
Used to add a side effect when accessing the 'encoding' property. If the
side effect is a str in nature, the value will be returned. Otherwise, the
side effect should be an exception that will be raised.
"""
def __init__(self, encoding) -> None:
super().__init__()
self.val = encoding
@property
def encoding(self):
return self.raise_or_return(self.val)
@staticmethod
def raise_or_return(val):
if isinstance(val, str):
return val
else:
raise val
@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]])
def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
# Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
# they have values filled.
# GH 21552
with monkeypatch.context() as context:
context.setattr(f"sys.{empty}", MockEncoding(""))
context.setattr(f"sys.{filled}", MockEncoding(filled))
assert detect_console_encoding() == filled
@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
# GH 21552
with monkeypatch.context() as context:
context.setattr("locale.getpreferredencoding", lambda: "foo")
context.setattr("sys.stdout", MockEncoding(encoding))
assert detect_console_encoding() == "foo"
@pytest.mark.parametrize(
"std,locale",
[
["ascii", "ascii"],
["ascii", locale.Error],
[AttributeError, "ascii"],
[AttributeError, locale.Error],
[OSError, "ascii"],
[OSError, locale.Error],
],
)
def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
# When both the stdout/stdin encoding and locale preferred encoding checks
# fail (or return 'ascii', we should default to the sys default encoding.
# GH 21552
with monkeypatch.context() as context:
context.setattr(
"locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale)
)
context.setattr("sys.stdout", MockEncoding(std))
context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding")
assert detect_console_encoding() == "sysDefaultEncoding"

View File

@ -0,0 +1,289 @@
import pytest
from pandas.errors import CSSWarning
import pandas._testing as tm
from pandas.io.formats.css import CSSResolver
def assert_resolves(css, props, inherited=None):
resolve = CSSResolver()
actual = resolve(css, inherited=inherited)
assert props == actual
def assert_same_resolution(css1, css2, inherited=None):
resolve = CSSResolver()
resolved1 = resolve(css1, inherited=inherited)
resolved2 = resolve(css2, inherited=inherited)
assert resolved1 == resolved2
@pytest.mark.parametrize(
"name,norm,abnorm",
[
(
"whitespace",
"hello: world; foo: bar",
" \t hello \t :\n world \n ; \n foo: \tbar\n\n",
),
("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"),
("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"),
("empty-list", "", ";"),
],
)
def test_css_parse_normalisation(name, norm, abnorm):
assert_same_resolution(norm, abnorm)
@pytest.mark.parametrize(
"invalid_css,remainder",
[
# No colon
("hello-world", ""),
("border-style: solid; hello-world", "border-style: solid"),
(
"border-style: solid; hello-world; font-weight: bold",
"border-style: solid; font-weight: bold",
),
# Unclosed string fail
# Invalid size
("font-size: blah", "font-size: 1em"),
("font-size: 1a2b", "font-size: 1em"),
("font-size: 1e5pt", "font-size: 1em"),
("font-size: 1+6pt", "font-size: 1em"),
("font-size: 1unknownunit", "font-size: 1em"),
("font-size: 10", "font-size: 1em"),
("font-size: 10 pt", "font-size: 1em"),
# Too many args
("border-top: 1pt solid red green", "border-top: 1pt solid green"),
],
)
def test_css_parse_invalid(invalid_css, remainder):
with tm.assert_produces_warning(CSSWarning):
assert_same_resolution(invalid_css, remainder)
@pytest.mark.parametrize(
"shorthand,expansions",
[
("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]),
("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]),
(
"border-width",
[
"border-top-width",
"border-right-width",
"border-bottom-width",
"border-left-width",
],
),
(
"border-color",
[
"border-top-color",
"border-right-color",
"border-bottom-color",
"border-left-color",
],
),
(
"border-style",
[
"border-top-style",
"border-right-style",
"border-bottom-style",
"border-left-style",
],
),
],
)
def test_css_side_shorthands(shorthand, expansions):
top, right, bottom, left = expansions
assert_resolves(
f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}
)
assert_resolves(
f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}
)
assert_resolves(
f"{shorthand}: 1pt 4pt 2pt",
{top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"},
)
assert_resolves(
f"{shorthand}: 1pt 4pt 2pt 0pt",
{top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
)
with tm.assert_produces_warning(CSSWarning):
assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {})
@pytest.mark.parametrize(
"shorthand,sides",
[
("border-top", ["top"]),
("border-right", ["right"]),
("border-bottom", ["bottom"]),
("border-left", ["left"]),
("border", ["top", "right", "bottom", "left"]),
],
)
def test_css_border_shorthand_sides(shorthand, sides):
def create_border_dict(sides, color=None, style=None, width=None):
resolved = {}
for side in sides:
if color:
resolved[f"border-{side}-color"] = color
if style:
resolved[f"border-{side}-style"] = style
if width:
resolved[f"border-{side}-width"] = width
return resolved
assert_resolves(
f"{shorthand}: 1pt red solid", create_border_dict(sides, "red", "solid", "1pt")
)
@pytest.mark.parametrize(
"prop, expected",
[
("1pt red solid", ("red", "solid", "1pt")),
("red 1pt solid", ("red", "solid", "1pt")),
("red solid 1pt", ("red", "solid", "1pt")),
("solid 1pt red", ("red", "solid", "1pt")),
("red solid", ("red", "solid", "1.500000pt")),
# Note: color=black is not CSS conforming
# (See https://drafts.csswg.org/css-backgrounds/#border-shorthands)
("1pt solid", ("black", "solid", "1pt")),
("1pt red", ("red", "none", "1pt")),
("red", ("red", "none", "1.500000pt")),
("1pt", ("black", "none", "1pt")),
("solid", ("black", "solid", "1.500000pt")),
# Sizes
("1em", ("black", "none", "12pt")),
],
)
def test_css_border_shorthands(prop, expected):
color, style, width = expected
assert_resolves(
f"border-left: {prop}",
{
"border-left-color": color,
"border-left-style": style,
"border-left-width": width,
},
)
@pytest.mark.parametrize(
"style,inherited,equiv",
[
("margin: 1px; margin: 2px", "", "margin: 2px"),
("margin: 1px", "margin: 2px", "margin: 1px"),
("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"),
(
"margin: 1px; margin-top: 2px",
"",
"margin-left: 1px; margin-right: 1px; "
"margin-bottom: 1px; margin-top: 2px",
),
("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"),
("margin: 1px", "margin-top: 2px", "margin: 1px"),
(
"margin: 1px; margin-top: inherit",
"margin: 2px",
"margin: 1px; margin-top: 2px",
),
],
)
def test_css_precedence(style, inherited, equiv):
resolve = CSSResolver()
inherited_props = resolve(inherited)
style_props = resolve(style, inherited=inherited_props)
equiv_props = resolve(equiv)
assert style_props == equiv_props
@pytest.mark.parametrize(
"style,equiv",
[
(
"margin: 1px; margin-top: inherit",
"margin-bottom: 1px; margin-right: 1px; margin-left: 1px",
),
("margin-top: inherit", ""),
("margin-top: initial", ""),
],
)
def test_css_none_absent(style, equiv):
assert_same_resolution(style, equiv)
@pytest.mark.parametrize(
"size,resolved",
[
("xx-small", "6pt"),
("x-small", f"{7.5:f}pt"),
("small", f"{9.6:f}pt"),
("medium", "12pt"),
("large", f"{13.5:f}pt"),
("x-large", "18pt"),
("xx-large", "24pt"),
("8px", "6pt"),
("1.25pc", "15pt"),
(".25in", "18pt"),
("02.54cm", "72pt"),
("25.4mm", "72pt"),
("101.6q", "72pt"),
("101.6q", "72pt"),
],
)
@pytest.mark.parametrize("relative_to", [None, "16pt"]) # invariant to inherited size
def test_css_absolute_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
@pytest.mark.parametrize(
"size,relative_to,resolved",
[
("1em", None, "12pt"),
("1.0em", None, "12pt"),
("1.25em", None, "15pt"),
("1em", "16pt", "16pt"),
("1.0em", "16pt", "16pt"),
("1.25em", "16pt", "20pt"),
("1rem", "16pt", "12pt"),
("1.0rem", "16pt", "12pt"),
("1.25rem", "16pt", "15pt"),
("100%", None, "12pt"),
("125%", None, "15pt"),
("100%", "16pt", "16pt"),
("125%", "16pt", "20pt"),
("2ex", None, "12pt"),
("2.0ex", None, "12pt"),
("2.50ex", None, "15pt"),
("inherit", "16pt", "16pt"),
("smaller", None, "10pt"),
("smaller", "18pt", "15pt"),
("larger", None, f"{14.4:f}pt"),
("larger", "15pt", "18pt"),
],
)
def test_css_relative_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)

View File

@ -0,0 +1,254 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
reset_option,
set_eng_float_format,
)
from pandas.io.formats.format import EngFormatter
@pytest.fixture(autouse=True)
def reset_float_format():
yield
reset_option("display.float_format")
class TestEngFormatter:
def test_eng_float_formatter2(self, float_frame):
df = float_frame
df.loc[5] = 0
set_eng_float_format()
repr(df)
set_eng_float_format(use_eng_prefix=True)
repr(df)
set_eng_float_format(accuracy=0)
repr(df)
def test_eng_float_formatter(self):
df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})
set_eng_float_format()
result = df.to_string()
expected = (
" A\n"
"0 1.410E+00\n"
"1 141.000E+00\n"
"2 14.100E+03\n"
"3 1.410E+06"
)
assert result == expected
set_eng_float_format(use_eng_prefix=True)
result = df.to_string()
expected = " A\n0 1.410\n1 141.000\n2 14.100k\n3 1.410M"
assert result == expected
set_eng_float_format(accuracy=0)
result = df.to_string()
expected = " A\n0 1E+00\n1 141E+00\n2 14E+03\n3 1E+06"
assert result == expected
def compare(self, formatter, input, output):
formatted_input = formatter(input)
assert formatted_input == output
def compare_all(self, formatter, in_out):
"""
Parameters:
-----------
formatter: EngFormatter under test
in_out: list of tuples. Each tuple = (number, expected_formatting)
It is tested if 'formatter(number) == expected_formatting'.
*number* should be >= 0 because formatter(-number) == fmt is also
tested. *fmt* is derived from *expected_formatting*
"""
for input, output in in_out:
self.compare(formatter, input, output)
self.compare(formatter, -input, "-" + output[1:])
def test_exponents_with_eng_prefix(self):
formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
f = np.sqrt(2)
in_out = [
(f * 10**-24, " 1.414y"),
(f * 10**-23, " 14.142y"),
(f * 10**-22, " 141.421y"),
(f * 10**-21, " 1.414z"),
(f * 10**-20, " 14.142z"),
(f * 10**-19, " 141.421z"),
(f * 10**-18, " 1.414a"),
(f * 10**-17, " 14.142a"),
(f * 10**-16, " 141.421a"),
(f * 10**-15, " 1.414f"),
(f * 10**-14, " 14.142f"),
(f * 10**-13, " 141.421f"),
(f * 10**-12, " 1.414p"),
(f * 10**-11, " 14.142p"),
(f * 10**-10, " 141.421p"),
(f * 10**-9, " 1.414n"),
(f * 10**-8, " 14.142n"),
(f * 10**-7, " 141.421n"),
(f * 10**-6, " 1.414u"),
(f * 10**-5, " 14.142u"),
(f * 10**-4, " 141.421u"),
(f * 10**-3, " 1.414m"),
(f * 10**-2, " 14.142m"),
(f * 10**-1, " 141.421m"),
(f * 10**0, " 1.414"),
(f * 10**1, " 14.142"),
(f * 10**2, " 141.421"),
(f * 10**3, " 1.414k"),
(f * 10**4, " 14.142k"),
(f * 10**5, " 141.421k"),
(f * 10**6, " 1.414M"),
(f * 10**7, " 14.142M"),
(f * 10**8, " 141.421M"),
(f * 10**9, " 1.414G"),
(f * 10**10, " 14.142G"),
(f * 10**11, " 141.421G"),
(f * 10**12, " 1.414T"),
(f * 10**13, " 14.142T"),
(f * 10**14, " 141.421T"),
(f * 10**15, " 1.414P"),
(f * 10**16, " 14.142P"),
(f * 10**17, " 141.421P"),
(f * 10**18, " 1.414E"),
(f * 10**19, " 14.142E"),
(f * 10**20, " 141.421E"),
(f * 10**21, " 1.414Z"),
(f * 10**22, " 14.142Z"),
(f * 10**23, " 141.421Z"),
(f * 10**24, " 1.414Y"),
(f * 10**25, " 14.142Y"),
(f * 10**26, " 141.421Y"),
]
self.compare_all(formatter, in_out)
def test_exponents_without_eng_prefix(self):
formatter = EngFormatter(accuracy=4, use_eng_prefix=False)
f = np.pi
in_out = [
(f * 10**-24, " 3.1416E-24"),
(f * 10**-23, " 31.4159E-24"),
(f * 10**-22, " 314.1593E-24"),
(f * 10**-21, " 3.1416E-21"),
(f * 10**-20, " 31.4159E-21"),
(f * 10**-19, " 314.1593E-21"),
(f * 10**-18, " 3.1416E-18"),
(f * 10**-17, " 31.4159E-18"),
(f * 10**-16, " 314.1593E-18"),
(f * 10**-15, " 3.1416E-15"),
(f * 10**-14, " 31.4159E-15"),
(f * 10**-13, " 314.1593E-15"),
(f * 10**-12, " 3.1416E-12"),
(f * 10**-11, " 31.4159E-12"),
(f * 10**-10, " 314.1593E-12"),
(f * 10**-9, " 3.1416E-09"),
(f * 10**-8, " 31.4159E-09"),
(f * 10**-7, " 314.1593E-09"),
(f * 10**-6, " 3.1416E-06"),
(f * 10**-5, " 31.4159E-06"),
(f * 10**-4, " 314.1593E-06"),
(f * 10**-3, " 3.1416E-03"),
(f * 10**-2, " 31.4159E-03"),
(f * 10**-1, " 314.1593E-03"),
(f * 10**0, " 3.1416E+00"),
(f * 10**1, " 31.4159E+00"),
(f * 10**2, " 314.1593E+00"),
(f * 10**3, " 3.1416E+03"),
(f * 10**4, " 31.4159E+03"),
(f * 10**5, " 314.1593E+03"),
(f * 10**6, " 3.1416E+06"),
(f * 10**7, " 31.4159E+06"),
(f * 10**8, " 314.1593E+06"),
(f * 10**9, " 3.1416E+09"),
(f * 10**10, " 31.4159E+09"),
(f * 10**11, " 314.1593E+09"),
(f * 10**12, " 3.1416E+12"),
(f * 10**13, " 31.4159E+12"),
(f * 10**14, " 314.1593E+12"),
(f * 10**15, " 3.1416E+15"),
(f * 10**16, " 31.4159E+15"),
(f * 10**17, " 314.1593E+15"),
(f * 10**18, " 3.1416E+18"),
(f * 10**19, " 31.4159E+18"),
(f * 10**20, " 314.1593E+18"),
(f * 10**21, " 3.1416E+21"),
(f * 10**22, " 31.4159E+21"),
(f * 10**23, " 314.1593E+21"),
(f * 10**24, " 3.1416E+24"),
(f * 10**25, " 31.4159E+24"),
(f * 10**26, " 314.1593E+24"),
]
self.compare_all(formatter, in_out)
def test_rounding(self):
formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
in_out = [
(5.55555, " 5.556"),
(55.5555, " 55.556"),
(555.555, " 555.555"),
(5555.55, " 5.556k"),
(55555.5, " 55.556k"),
(555555, " 555.555k"),
]
self.compare_all(formatter, in_out)
formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
in_out = [
(5.55555, " 5.6"),
(55.5555, " 55.6"),
(555.555, " 555.6"),
(5555.55, " 5.6k"),
(55555.5, " 55.6k"),
(555555, " 555.6k"),
]
self.compare_all(formatter, in_out)
formatter = EngFormatter(accuracy=0, use_eng_prefix=True)
in_out = [
(5.55555, " 6"),
(55.5555, " 56"),
(555.555, " 556"),
(5555.55, " 6k"),
(55555.5, " 56k"),
(555555, " 556k"),
]
self.compare_all(formatter, in_out)
formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
result = formatter(0)
assert result == " 0.000"
def test_nan(self):
# Issue #11981
formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.nan)
assert result == "NaN"
df = DataFrame(
{
"a": [1.5, 10.3, 20.5],
"b": [50.3, 60.67, 70.12],
"c": [100.2, 101.33, 120.33],
}
)
pt = df.pivot_table(values="a", index="b", columns="c")
set_eng_float_format(accuracy=1)
result = pt.to_string()
assert "NaN" in result
def test_inf(self):
# Issue #11981
formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.inf)
assert result == "inf"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,90 @@
import numpy as np
import pandas._config.config as cf
from pandas import (
DataFrame,
MultiIndex,
)
class TestTableSchemaRepr:
def test_publishes(self, ip):
ipython = ip.instance(config=ip.config)
df = DataFrame({"A": [1, 2]})
objects = [df["A"], df] # dataframe / series
expected_keys = [
{"text/plain", "application/vnd.dataresource+json"},
{"text/plain", "text/html", "application/vnd.dataresource+json"},
]
opt = cf.option_context("display.html.table_schema", True)
last_obj = None
for obj, expected in zip(objects, expected_keys):
last_obj = obj
with opt:
formatted = ipython.display_formatter.format(obj)
assert set(formatted[0].keys()) == expected
with_latex = cf.option_context("styler.render.repr", "latex")
with opt, with_latex:
formatted = ipython.display_formatter.format(last_obj)
expected = {
"text/plain",
"text/html",
"text/latex",
"application/vnd.dataresource+json",
}
assert set(formatted[0].keys()) == expected
def test_publishes_not_implemented(self, ip):
# column MultiIndex
# GH#15996
midx = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = DataFrame(
np.random.default_rng(2).standard_normal((5, len(midx))), columns=midx
)
opt = cf.option_context("display.html.table_schema", True)
with opt:
formatted = ip.instance(config=ip.config).display_formatter.format(df)
expected = {"text/plain", "text/html"}
assert set(formatted[0].keys()) == expected
def test_config_on(self):
df = DataFrame({"A": [1, 2]})
with cf.option_context("display.html.table_schema", True):
result = df._repr_data_resource_()
assert result is not None
def test_config_default_off(self):
df = DataFrame({"A": [1, 2]})
with cf.option_context("display.html.table_schema", False):
result = df._repr_data_resource_()
assert result is None
def test_enable_data_resource_formatter(self, ip):
# GH#10491
formatters = ip.instance(config=ip.config).display_formatter.formatters
mimetype = "application/vnd.dataresource+json"
with cf.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# still there, just disabled
assert "application/vnd.dataresource+json" in formatters
assert not formatters[mimetype].enabled
# able to re-set
with cf.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# smoke test that it works
ip.instance(config=ip.config).display_formatter.format(cf)

View File

@ -0,0 +1,129 @@
# Note! This file is aimed specifically at pandas.io.formats.printing utility
# functions, not the general printing of pandas objects.
import string
import pandas._config.config as cf
from pandas.io.formats import printing
def test_adjoin():
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
class TestPPrintThing:
def test_repr_binary_type(self):
letters = string.ascii_letters
try:
raw = bytes(letters, encoding=cf.get_option("display.encoding"))
except TypeError:
raw = bytes(letters)
b = str(raw.decode("utf-8"))
res = printing.pprint_thing(b, quote_strings=True)
assert res == repr(b)
res = printing.pprint_thing(b, quote_strings=False)
assert res == b
def test_repr_obeys_max_seq_limit(self):
with cf.option_context("display.max_seq_items", 2000):
assert len(printing.pprint_thing(list(range(1000)))) > 1000
with cf.option_context("display.max_seq_items", 5):
assert len(printing.pprint_thing(list(range(1000)))) < 100
with cf.option_context("display.max_seq_items", 1):
assert len(printing.pprint_thing(list(range(1000)))) < 9
def test_repr_set(self):
assert printing.pprint_thing({1}) == "{1}"
class TestFormatBase:
def test_adjoin(self):
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_adjoin_unicode(self):
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]]
expected = "あ dd ggg\nb ええ hhh\nc ff いいい"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
adj = printing._EastAsianTextAdjustment()
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 13
assert adj.len(cols[1]) == 13
assert adj.len(cols[2]) == 16
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(7, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 23
assert adj.len(cols[1]) == 23
assert adj.len(cols[2]) == 26
def test_justify(self):
adj = printing._EastAsianTextAdjustment()
def just(x, *args, **kwargs):
# wrapper to test single str
return adj.justify([x], *args, **kwargs)[0]
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("パンダ", 5, mode="left") == "パンダ"
assert just("パンダ", 5, mode="center") == "パンダ"
assert just("パンダ", 5, mode="right") == "パンダ"
assert just("パンダ", 10, mode="left") == "パンダ "
assert just("パンダ", 10, mode="center") == " パンダ "
assert just("パンダ", 10, mode="right") == " パンダ"
def test_east_asian_len(self):
adj = printing._EastAsianTextAdjustment()
assert adj.len("abc") == 3
assert adj.len("abc") == 3
assert adj.len("パンダ") == 6
assert adj.len("パンダ") == 5
assert adj.len("パンダpanda") == 11
assert adj.len("パンダpanda") == 10
def test_ambiguous_width(self):
adj = printing._EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 4
with cf.option_context("display.unicode.ambiguous_as_wide", True):
adj = printing._EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 6
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]]
expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい"
adjoined = adj.adjoin(2, *data)
assert adjoined == expected

View File

@ -0,0 +1,758 @@
import io
import os
import sys
from zipfile import ZipFile
from _csv import Error
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
compat,
)
import pandas._testing as tm
class TestToCSV:
def test_to_csv_with_single_column(self):
# see gh-18676, https://bugs.python.org/issue32255
#
# Python's CSV library adds an extraneous '""'
# before the newline when the NaN-value is in
# the first row. Otherwise, only the newline
# character is added. This behavior is inconsistent
# and was patched in https://bugs.python.org/pull_request4672.
df1 = DataFrame([None, 1])
expected1 = """\
""
1.0
"""
with tm.ensure_clean("test.csv") as path:
df1.to_csv(path, header=None, index=None)
with open(path, encoding="utf-8") as f:
assert f.read() == expected1
df2 = DataFrame([1, None])
expected2 = """\
1.0
""
"""
with tm.ensure_clean("test.csv") as path:
df2.to_csv(path, header=None, index=None)
with open(path, encoding="utf-8") as f:
assert f.read() == expected2
def test_to_csv_default_encoding(self):
# GH17097
df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
with tm.ensure_clean("test.csv") as path:
# the default to_csv encoding is uft-8.
df.to_csv(path)
tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
def test_to_csv_quotechar(self):
df = DataFrame({"col": [1, 2]})
expected = """\
"","col"
"0","1"
"1","2"
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1) # 1=QUOTE_ALL
with open(path, encoding="utf-8") as f:
assert f.read() == expected
expected = """\
$$,$col$
$0$,$1$
$1$,$2$
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, quotechar="$")
with open(path, encoding="utf-8") as f:
assert f.read() == expected
with tm.ensure_clean("test.csv") as path:
with pytest.raises(TypeError, match="quotechar"):
df.to_csv(path, quoting=1, quotechar=None)
def test_to_csv_doublequote(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = '''\
"","col"
"0","a""a"
"1","""bb"""
'''
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL
with open(path, encoding="utf-8") as f:
assert f.read() == expected
with tm.ensure_clean("test.csv") as path:
with pytest.raises(Error, match="escapechar"):
df.to_csv(path, doublequote=False) # no escapechar set
def test_to_csv_escapechar(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = """\
"","col"
"0","a\\"a"
"1","\\"bb\\""
"""
with tm.ensure_clean("test.csv") as path: # QUOTE_ALL
df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
with open(path, encoding="utf-8") as f:
assert f.read() == expected
df = DataFrame({"col": ["a,a", ",bb,"]})
expected = """\
,col
0,a\\,a
1,\\,bb\\,
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE
with open(path, encoding="utf-8") as f:
assert f.read() == expected
def test_csv_to_string(self):
df = DataFrame({"col": [1, 2]})
expected_rows = [",col", "0,1", "1,2"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected
def test_to_csv_decimal(self):
# see gh-781
df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected_default
expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(decimal=",", sep=";") == expected_european_excel
expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(float_format="%.2f") == expected_float_format_default
expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
assert (
df.to_csv(decimal=",", sep=";", float_format="%.2f")
== expected_float_format
)
# see gh-11553: testing if decimal is taken into account for '0.0'
df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False, decimal="^") == expected
# same but for an index
assert df.set_index("a").to_csv(decimal="^") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
def test_to_csv_float_format(self):
# testing if float_format is taken into account for the index
# GH 11553
df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(float_format="%.2f") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
def test_to_csv_na_rep(self):
# see gh-11553
#
# Testing if NaN values are correctly represented in the index.
df = DataFrame({"a": [0, np.nan], "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# now with an index containing only NaNs
df = DataFrame({"a": np.nan, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# check if na_rep parameter does not break anything when no NaN
df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
assert expected == csv
def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
# GH 29975
# Make sure full na_rep shows up when a dtype is provided
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
na_rep="ZZZZZ"
)
assert expected == csv
def test_to_csv_date_format(self):
# GH 10209
df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-01 00:00:01",
"2,2013-01-01 00:00:02",
"3,2013-01-01 00:00:03",
"4,2013-01-01 00:00:04",
]
expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv() == expected_default_sec
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-02 00:00:00",
"2,2013-01-03 00:00:00",
"3,2013-01-04 00:00:00",
"4,2013-01-05 00:00:00",
]
expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-01",
"2,2013-01-01",
"3,2013-01-01",
"4,2013-01-01",
]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-02",
"2,2013-01-03",
"3,2013-01-04",
"4,2013-01-05",
]
expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv() == expected_default_day
assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
# see gh-7791
#
# Testing if date_format parameter is taken into account
# for multi-indexed DataFrames.
df_sec["B"] = 0
df_sec["C"] = 1
expected_rows = ["A,B,C", "2013-01-01,0,1.0"]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
def test_to_csv_different_datetime_formats(self):
# GH#21734
df = DataFrame(
{
"date": pd.to_datetime("1970-01-01"),
"datetime": pd.date_range("1970-01-01", periods=2, freq="h"),
}
)
expected_rows = [
"date,datetime",
"1970-01-01,1970-01-01 00:00:00",
"1970-01-01,1970-01-01 01:00:00",
]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False) == expected
def test_to_csv_date_format_in_categorical(self):
# GH#40754
ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
ser = ser.astype("category")
expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
assert ser.to_csv(index=False) == expected
ser = pd.Series(
pd.date_range(
start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin"
).append(pd.DatetimeIndex([pd.NaT]))
)
ser = ser.astype("category")
assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
def test_to_csv_float_ea_float_format(self):
# GH#45991
df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
df["a"] = df["a"].astype("Float64")
result = df.to_csv(index=False, float_format="%.5f")
expected = tm.convert_rows_list_to_csv_str(
["a,b", "1.10000,c", "2.02000,c", ",c", "6.00001,c"]
)
assert result == expected
def test_to_csv_float_ea_no_float_format(self):
# GH#45991
df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
df["a"] = df["a"].astype("Float64")
result = df.to_csv(index=False)
expected = tm.convert_rows_list_to_csv_str(
["a,b", "1.1,c", "2.02,c", ",c", "6.000006,c"]
)
assert result == expected
def test_to_csv_multi_index(self):
# see gh-6618
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
exp_rows = [",1", ",2", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame(
[1],
columns=pd.MultiIndex.from_arrays([[1], [2]]),
index=pd.MultiIndex.from_arrays([[1], [2]]),
)
exp_rows = [",,1", ",,2", "1,2,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
exp_rows = [",foo", ",bar", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["foo", "bar", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
@pytest.mark.parametrize(
"ind,expected",
[
(
pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]),
"x,data\n1.0,1\n",
),
(
pd.MultiIndex(
levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"]
),
"x,y,data\n1.0,2.0,1\n",
),
],
)
def test_to_csv_single_level_multi_index(self, ind, expected, frame_or_series):
# see gh-19589
obj = frame_or_series(pd.Series([1], ind, name="data"))
result = obj.to_csv(lineterminator="\n", header=True)
assert result == expected
def test_to_csv_string_array_ascii(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = DataFrame(str_array)
expected_ascii = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("str_test.csv") as path:
df.to_csv(path, encoding="ascii")
with open(path, encoding="utf-8") as f:
assert f.read() == expected_ascii
def test_to_csv_string_array_utf8(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = DataFrame(str_array)
expected_utf8 = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("unicode_test.csv") as path:
df.to_csv(path, encoding="utf-8")
with open(path, encoding="utf-8") as f:
assert f.read() == expected_utf8
def test_to_csv_string_with_lf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
df = DataFrame(data)
with tm.ensure_clean("lf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_lf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\nef"'
+ os_linesep
+ b'3,"g\nh\n\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("lf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
df.to_csv(path, lineterminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("lf_test.csv") as path:
# case 3: CRLF as line terminator
# 'lineterminator' should not change inner element
expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
df.to_csv(path, lineterminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_string_with_crlf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
df = DataFrame(data)
with tm.ensure_clean("crlf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_crlf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\r\nef"'
+ os_linesep
+ b'3,"g\r\nh\r\n\r\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("crlf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
df.to_csv(path, lineterminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("crlf_test.csv") as path:
# case 3: CRLF as line terminator
# 'lineterminator' should not change inner element
expected_crlf = (
b"int,str_crlf\r\n"
b"1,abc\r\n"
b'2,"d\r\nef"\r\n'
b'3,"g\r\nh\r\n\r\ni"\r\n'
)
df.to_csv(path, lineterminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_stdout_file(self, capsys):
# GH 21561
df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
df.to_csv(sys.stdout, encoding="ascii")
captured = capsys.readouterr()
assert captured.out == expected_ascii
assert not sys.stdout.closed
@pytest.mark.xfail(
compat.is_platform_windows(),
reason=(
"Especially in Windows, file stream should not be passed"
"to csv writer without newline='' option."
"(https://docs.python.org/3/library/csv.html#csv.writer)"
),
)
def test_to_csv_write_to_open_file(self):
# GH 21696
df = DataFrame({"a": ["x", "y", "z"]})
expected = """\
manual header
x
y
z
"""
with tm.ensure_clean("test.txt") as path:
with open(path, "w", encoding="utf-8") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path, encoding="utf-8") as f:
assert f.read() == expected
def test_to_csv_write_to_open_file_with_newline_py3(self):
# see gh-21696
# see gh-20353
df = DataFrame({"a": ["x", "y", "z"]})
expected_rows = ["x", "y", "z"]
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
with tm.ensure_clean("test.txt") as path:
with open(path, "w", newline="", encoding="utf-8") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path, "rb") as f:
assert f.read() == bytes(expected, "utf-8")
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
def test_to_csv_compression(
self, compression_only, read_infer, to_infer, compression_to_extension
):
# see gh-15008
compression = compression_only
# We'll complete file extension subsequently.
filename = "test."
filename += compression_to_extension[compression]
df = DataFrame({"A": [1]})
to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression=to_compression)
result = pd.read_csv(path, index_col=0, compression=read_compression)
tm.assert_frame_equal(result, df)
def test_to_csv_compression_dict(self, compression_only):
# GH 26023
method = compression_only
df = DataFrame({"ABC": [1]})
filename = "to_csv_compress_as_dict."
extension = {
"gzip": "gz",
"zstd": "zst",
}.get(method, method)
filename += extension
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression={"method": method})
read_df = pd.read_csv(path, index_col=0)
tm.assert_frame_equal(read_df, df)
def test_to_csv_compression_dict_no_method_raises(self):
# GH 26023
df = DataFrame({"ABC": [1]})
compression = {"some_option": True}
msg = "must have key 'method'"
with tm.ensure_clean("out.zip") as path:
with pytest.raises(ValueError, match=msg):
df.to_csv(path, compression=compression)
@pytest.mark.parametrize("compression", ["zip", "infer"])
@pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
def test_to_csv_zip_arguments(self, compression, archive_name):
# GH 26023
df = DataFrame({"ABC": [1]})
with tm.ensure_clean("to_csv_archive_name.zip") as path:
df.to_csv(
path, compression={"method": compression, "archive_name": archive_name}
)
with ZipFile(path) as zp:
assert len(zp.filelist) == 1
archived_file = zp.filelist[0].filename
assert archived_file == archive_name
@pytest.mark.parametrize(
"filename,expected_arcname",
[
("archive.csv", "archive.csv"),
("archive.tsv", "archive.tsv"),
("archive.csv.zip", "archive.csv"),
("archive.tsv.zip", "archive.tsv"),
("archive.zip", "archive"),
],
)
def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname):
# GH 39465
df = DataFrame({"ABC": [1]})
path = tmp_path / filename
df.to_csv(path, compression="zip")
with ZipFile(path) as zp:
assert len(zp.filelist) == 1
archived_file = zp.filelist[0].filename
assert archived_file == expected_arcname
@pytest.mark.parametrize("df_new_type", ["Int64"])
def test_to_csv_na_rep_long_string(self, df_new_type):
# see gh-25099
df = DataFrame({"c": [float("nan")] * 3})
df = df.astype(df_new_type)
expected_rows = ["c", "mynull", "mynull", "mynull"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
assert expected == result
def test_to_csv_timedelta_precision(self):
# GH 6783
s = pd.Series([1, 1]).astype("timedelta64[ns]")
buf = io.StringIO()
s.to_csv(buf)
result = buf.getvalue()
expected_rows = [
",0",
"0,0 days 00:00:00.000000001",
"1,0 days 00:00:00.000000001",
]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected
def test_na_rep_truncated(self):
# https://github.com/pandas-dev/pandas/issues/31447
result = pd.Series(range(8, 12)).to_csv(na_rep="-")
expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
assert result == expected
result = pd.Series([True, False]).to_csv(na_rep="nan")
expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
assert result == expected
result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
assert result == expected
@pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
def test_to_csv_errors(self, errors):
# GH 22610
data = ["\ud800foo"]
ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
with tm.ensure_clean("test.csv") as path:
ser.to_csv(path, errors=errors)
# No use in reading back the data as it is not the same anymore
# due to the error handling
@pytest.mark.parametrize("mode", ["wb", "w"])
def test_to_csv_binary_handle(self, mode):
"""
Binary file objects should work (if 'mode' contains a 'b') or even without
it in most cases.
GH 35058 and GH 19827
"""
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with open(path, mode="w+b") as handle:
df.to_csv(handle, mode=mode)
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@pytest.mark.parametrize("mode", ["wb", "w"])
def test_to_csv_encoding_binary_handle(self, mode):
"""
Binary file objects should honor a specified encoding.
GH 23854 and GH 13068 with binary handles
"""
# example from GH 23854
content = "a, b, 🐟".encode("utf-8-sig")
buffer = io.BytesIO(content)
df = pd.read_csv(buffer, encoding="utf-8-sig")
buffer = io.BytesIO()
df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False)
buffer.seek(0) # tests whether file handle wasn't closed
assert buffer.getvalue().startswith(content)
# example from GH 13068
with tm.ensure_clean() as path:
with open(path, "w+b") as handle:
DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
handle.seek(0)
assert handle.read().startswith(b'\xef\xbb\xbf""')
def test_to_csv_iterative_compression_name(compression):
# GH 38714
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
df.to_csv(path, compression=compression, chunksize=1)
tm.assert_frame_equal(
pd.read_csv(path, compression=compression, index_col=0), df
)
def test_to_csv_iterative_compression_buffer(compression):
# GH 38714
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD")),
index=Index([f"i-{i}" for i in range(30)]),
)
with io.BytesIO() as buffer:
df.to_csv(buffer, compression=compression, chunksize=1)
buffer.seek(0)
tm.assert_frame_equal(
pd.read_csv(buffer, compression=compression, index_col=0), df
)
assert not buffer.closed
def test_to_csv_pos_args_deprecation():
# GH-54229
df = DataFrame({"a": [1, 2, 3]})
msg = (
r"Starting with pandas version 3.0 all arguments of to_csv except for the "
r"argument 'path_or_buf' will be keyword-only."
)
with tm.assert_produces_warning(FutureWarning, match=msg):
buffer = io.BytesIO()
df.to_csv(buffer, ";")

View File

@ -0,0 +1,429 @@
"""Tests formatting as writer-agnostic ExcelCells
ExcelFormatter is tested implicitly in pandas/tests/io/excel
"""
import string
import pytest
from pandas.errors import CSSWarning
import pandas._testing as tm
from pandas.io.formats.excel import (
CssExcelCell,
CSSToExcelConverter,
)
@pytest.mark.parametrize(
"css,expected",
[
# FONT
# - name
("font-family: foo,bar", {"font": {"name": "foo"}}),
('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}),
("font-family: foo,\nbar", {"font": {"name": "foo"}}),
("font-family: foo, bar, baz", {"font": {"name": "foo"}}),
("font-family: bar, foo", {"font": {"name": "bar"}}),
("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}),
("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}),
('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}),
('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}),
# - family
("font-family: serif", {"font": {"name": "serif", "family": 1}}),
("font-family: Serif", {"font": {"name": "serif", "family": 1}}),
("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}),
("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}),
("font-family: roman, sans serif", {"font": {"name": "roman"}}),
("font-family: roman, sansserif", {"font": {"name": "roman"}}),
("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}),
("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}),
# - size
("font-size: 1em", {"font": {"size": 12}}),
("font-size: xx-small", {"font": {"size": 6}}),
("font-size: x-small", {"font": {"size": 7.5}}),
("font-size: small", {"font": {"size": 9.6}}),
("font-size: medium", {"font": {"size": 12}}),
("font-size: large", {"font": {"size": 13.5}}),
("font-size: x-large", {"font": {"size": 18}}),
("font-size: xx-large", {"font": {"size": 24}}),
("font-size: 50%", {"font": {"size": 6}}),
# - bold
("font-weight: 100", {"font": {"bold": False}}),
("font-weight: 200", {"font": {"bold": False}}),
("font-weight: 300", {"font": {"bold": False}}),
("font-weight: 400", {"font": {"bold": False}}),
("font-weight: normal", {"font": {"bold": False}}),
("font-weight: lighter", {"font": {"bold": False}}),
("font-weight: bold", {"font": {"bold": True}}),
("font-weight: bolder", {"font": {"bold": True}}),
("font-weight: 700", {"font": {"bold": True}}),
("font-weight: 800", {"font": {"bold": True}}),
("font-weight: 900", {"font": {"bold": True}}),
# - italic
("font-style: italic", {"font": {"italic": True}}),
("font-style: oblique", {"font": {"italic": True}}),
# - underline
("text-decoration: underline", {"font": {"underline": "single"}}),
("text-decoration: overline", {}),
("text-decoration: none", {}),
# - strike
("text-decoration: line-through", {"font": {"strike": True}}),
(
"text-decoration: underline line-through",
{"font": {"strike": True, "underline": "single"}},
),
(
"text-decoration: underline; text-decoration: line-through",
{"font": {"strike": True}},
),
# - color
("color: red", {"font": {"color": "FF0000"}}),
("color: #ff0000", {"font": {"color": "FF0000"}}),
("color: #f0a", {"font": {"color": "FF00AA"}}),
# - shadow
("text-shadow: none", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}),
("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}),
("text-shadow: 0px -2em", {"font": {"shadow": True}}),
# FILL
# - color, fillType
(
"background-color: red",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #ff0000",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #f0a",
{"fill": {"fgColor": "FF00AA", "patternType": "solid"}},
),
# BORDER
# - style
(
"border-style: solid",
{
"border": {
"top": {"style": "medium"},
"bottom": {"style": "medium"},
"left": {"style": "medium"},
"right": {"style": "medium"},
}
},
),
(
"border-style: solid; border-width: thin",
{
"border": {
"top": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
"right": {"style": "thin"},
}
},
),
(
"border-top-style: solid; border-top-width: thin",
{"border": {"top": {"style": "thin"}}},
),
(
"border-top-style: solid; border-top-width: 1pt",
{"border": {"top": {"style": "thin"}}},
),
("border-top-style: solid", {"border": {"top": {"style": "medium"}}}),
(
"border-top-style: solid; border-top-width: medium",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: 2pt",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: thick",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: solid; border-top-width: 4pt",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: dotted",
{"border": {"top": {"style": "mediumDashDotDot"}}},
),
(
"border-top-style: dotted; border-top-width: thin",
{"border": {"top": {"style": "dotted"}}},
),
("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}),
(
"border-top-style: dashed; border-top-width: thin",
{"border": {"top": {"style": "dashed"}}},
),
("border-top-style: double", {"border": {"top": {"style": "double"}}}),
# - color
(
"border-style: solid; border-color: #0000ff",
{
"border": {
"top": {"style": "medium", "color": "0000FF"},
"right": {"style": "medium", "color": "0000FF"},
"bottom": {"style": "medium", "color": "0000FF"},
"left": {"style": "medium", "color": "0000FF"},
}
},
),
(
"border-top-style: double; border-top-color: blue",
{"border": {"top": {"style": "double", "color": "0000FF"}}},
),
(
"border-top-style: solid; border-top-color: #06c",
{"border": {"top": {"style": "medium", "color": "0066CC"}}},
),
(
"border-top-color: blue",
{"border": {"top": {"color": "0000FF", "style": "none"}}},
),
# ALIGNMENT
# - horizontal
("text-align: center", {"alignment": {"horizontal": "center"}}),
("text-align: left", {"alignment": {"horizontal": "left"}}),
("text-align: right", {"alignment": {"horizontal": "right"}}),
("text-align: justify", {"alignment": {"horizontal": "justify"}}),
# - vertical
("vertical-align: top", {"alignment": {"vertical": "top"}}),
("vertical-align: text-top", {"alignment": {"vertical": "top"}}),
("vertical-align: middle", {"alignment": {"vertical": "center"}}),
("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}),
("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}),
# - wrap_text
("white-space: nowrap", {"alignment": {"wrap_text": False}}),
("white-space: pre", {"alignment": {"wrap_text": False}}),
("white-space: pre-line", {"alignment": {"wrap_text": False}}),
("white-space: normal", {"alignment": {"wrap_text": True}}),
# NUMBER FORMAT
("number-format: 0%", {"number_format": {"format_code": "0%"}}),
(
"number-format: 0§[Red](0)§-§@;",
{"number_format": {"format_code": "0;[red](0);-;@"}}, # GH 46152
),
],
)
def test_css_to_excel(css, expected):
convert = CSSToExcelConverter()
assert expected == convert(css)
def test_css_to_excel_multiple():
convert = CSSToExcelConverter()
actual = convert(
"""
font-weight: bold;
text-decoration: underline;
color: red;
border-width: thin;
text-align: center;
vertical-align: top;
unused: something;
"""
)
assert {
"font": {"bold": True, "underline": "single", "color": "FF0000"},
"border": {
"top": {"style": "thin"},
"right": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
},
"alignment": {"horizontal": "center", "vertical": "top"},
} == actual
@pytest.mark.parametrize(
"css,inherited,expected",
[
("font-weight: bold", "", {"font": {"bold": True}}),
("", "font-weight: bold", {"font": {"bold": True}}),
(
"font-weight: bold",
"font-style: italic",
{"font": {"bold": True, "italic": True}},
),
("font-style: normal", "font-style: italic", {"font": {"italic": False}}),
("font-style: inherit", "", {}),
(
"font-style: normal; font-style: inherit",
"font-style: italic",
{"font": {"italic": True}},
),
],
)
def test_css_to_excel_inherited(css, inherited, expected):
convert = CSSToExcelConverter(inherited)
assert expected == convert(css)
@pytest.mark.parametrize(
"input_color,output_color",
(
list(CSSToExcelConverter.NAMED_COLORS.items())
+ [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()]
+ [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]
),
)
def test_css_to_excel_good_colors(input_color, output_color):
# see gh-18392
css = (
f"border-top-color: {input_color}; "
f"border-right-color: {input_color}; "
f"border-bottom-color: {input_color}; "
f"border-left-color: {input_color}; "
f"background-color: {input_color}; "
f"color: {input_color}"
)
expected = {}
expected["fill"] = {"patternType": "solid", "fgColor": output_color}
expected["font"] = {"color": output_color}
expected["border"] = {
k: {"color": output_color, "style": "none"}
for k in ("top", "right", "bottom", "left")
}
with tm.assert_produces_warning(None):
convert = CSSToExcelConverter()
assert expected == convert(css)
@pytest.mark.parametrize("input_color", [None, "not-a-color"])
def test_css_to_excel_bad_colors(input_color):
# see gh-18392
css = (
f"border-top-color: {input_color}; "
f"border-right-color: {input_color}; "
f"border-bottom-color: {input_color}; "
f"border-left-color: {input_color}; "
f"background-color: {input_color}; "
f"color: {input_color}"
)
expected = {}
if input_color is not None:
expected["fill"] = {"patternType": "solid"}
with tm.assert_produces_warning(CSSWarning):
convert = CSSToExcelConverter()
assert expected == convert(css)
def tests_css_named_colors_valid():
upper_hexs = set(map(str.upper, string.hexdigits))
for color in CSSToExcelConverter.NAMED_COLORS.values():
assert len(color) == 6 and all(c in upper_hexs for c in color)
def test_css_named_colors_from_mpl_present():
mpl_colors = pytest.importorskip("matplotlib.colors")
pd_colors = CSSToExcelConverter.NAMED_COLORS
for name, color in mpl_colors.CSS4_COLORS.items():
assert name in pd_colors and pd_colors[name] == color[1:]
@pytest.mark.parametrize(
"styles,expected",
[
([("color", "green"), ("color", "red")], "color: red;"),
([("font-weight", "bold"), ("font-weight", "normal")], "font-weight: normal;"),
([("text-align", "center"), ("TEXT-ALIGN", "right")], "text-align: right;"),
],
)
def test_css_excel_cell_precedence(styles, expected):
"""It applies favors latter declarations over former declarations"""
# See GH 47371
converter = CSSToExcelConverter()
converter._call_cached.cache_clear()
css_styles = {(0, 0): styles}
cell = CssExcelCell(
row=0,
col=0,
val="",
style=None,
css_styles=css_styles,
css_row=0,
css_col=0,
css_converter=converter,
)
converter._call_cached.cache_clear()
assert cell.style == converter(expected)
@pytest.mark.parametrize(
"styles,cache_hits,cache_misses",
[
([[("color", "green"), ("color", "red"), ("color", "green")]], 0, 1),
(
[
[("font-weight", "bold")],
[("font-weight", "normal"), ("font-weight", "bold")],
],
1,
1,
),
([[("text-align", "center")], [("TEXT-ALIGN", "center")]], 1, 1),
(
[
[("font-weight", "bold"), ("text-align", "center")],
[("font-weight", "bold"), ("text-align", "left")],
],
0,
2,
),
(
[
[("font-weight", "bold"), ("text-align", "center")],
[("font-weight", "bold"), ("text-align", "left")],
[("font-weight", "bold"), ("text-align", "center")],
],
1,
2,
),
],
)
def test_css_excel_cell_cache(styles, cache_hits, cache_misses):
"""It caches unique cell styles"""
# See GH 47371
converter = CSSToExcelConverter()
converter._call_cached.cache_clear()
css_styles = {(0, i): _style for i, _style in enumerate(styles)}
for css_row, css_col in css_styles:
CssExcelCell(
row=0,
col=0,
val="",
style=None,
css_styles=css_styles,
css_row=css_row,
css_col=css_col,
css_converter=converter,
)
cache_info = converter._call_cached.cache_info()
converter._call_cached.cache_clear()
assert cache_info.hits == cache_hits
assert cache_info.misses == cache_misses

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
from io import (
BytesIO,
StringIO,
)
import pytest
import pandas as pd
import pandas._testing as tm
pytest.importorskip("tabulate")
def test_simple():
buf = StringIO()
df = pd.DataFrame([1, 2, 3])
df.to_markdown(buf=buf)
result = buf.getvalue()
assert (
result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
def test_empty_frame():
buf = StringIO()
df = pd.DataFrame({"id": [], "first_name": [], "last_name": []}).set_index("id")
df.to_markdown(buf=buf)
result = buf.getvalue()
assert result == (
"| id | first_name | last_name |\n"
"|------|--------------|-------------|"
)
def test_other_tablefmt():
buf = StringIO()
df = pd.DataFrame([1, 2, 3])
df.to_markdown(buf=buf, tablefmt="jira")
result = buf.getvalue()
assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
def test_other_headers():
buf = StringIO()
df = pd.DataFrame([1, 2, 3])
df.to_markdown(buf=buf, headers=["foo", "bar"])
result = buf.getvalue()
assert result == (
"| foo | bar |\n|------:|------:|\n| 0 "
"| 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
def test_series():
buf = StringIO()
s = pd.Series([1, 2, 3], name="foo")
s.to_markdown(buf=buf)
result = buf.getvalue()
assert result == (
"| | foo |\n|---:|------:|\n| 0 | 1 "
"|\n| 1 | 2 |\n| 2 | 3 |"
)
def test_no_buf():
df = pd.DataFrame([1, 2, 3])
result = df.to_markdown()
assert (
result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
@pytest.mark.parametrize("index", [True, False])
def test_index(index):
# GH 32667
df = pd.DataFrame([1, 2, 3])
result = df.to_markdown(index=index)
if index:
expected = (
"| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
else:
expected = "| 0 |\n|----:|\n| 1 |\n| 2 |\n| 3 |"
assert result == expected
def test_showindex_disallowed_in_kwargs():
# GH 32667; disallowing showindex in kwargs enforced in 2.0
df = pd.DataFrame([1, 2, 3])
with pytest.raises(ValueError, match="Pass 'index' instead of 'showindex"):
df.to_markdown(index=True, showindex=True)
def test_markdown_pos_args_deprecatation():
# GH-54229
df = pd.DataFrame({"a": [1, 2, 3]})
msg = (
r"Starting with pandas version 3.0 all arguments of to_markdown except for the "
r"argument 'buf' will be keyword-only."
)
with tm.assert_produces_warning(FutureWarning, match=msg):
buffer = BytesIO()
df.to_markdown(buffer, "grid")

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,342 @@
"""
self-contained to write legacy storage pickle files
To use this script. Create an environment where you want
generate pickles, say its for 0.20.3, with your pandas clone
in ~/pandas
. activate pandas_0.20.3
cd ~/pandas/pandas
$ python -m tests.io.generate_legacy_storage_files \
tests/io/data/legacy_pickle/0.20.3/ pickle
This script generates a storage file for the current arch, system,
and python version
pandas version: 0.20.3
output dir : pandas/pandas/tests/io/data/legacy_pickle/0.20.3/
storage format: pickle
created pickle file: 0.20.3_x86_64_darwin_3.5.2.pickle
The idea here is you are using the *current* version of the
generate_legacy_storage_files with an *older* version of pandas to
generate a pickle file. We will then check this file into a current
branch, and test using test_pickle.py. This will load the *older*
pickles and test versus the current data that is generated
(with main). These are then compared.
If we have cases where we changed the signature (e.g. we renamed
offset -> freq in Timestamp). Then we have to conditionally execute
in the generate_legacy_storage_files.py to make it
run under the older AND the newer version.
"""
from datetime import timedelta
import os
import pickle
import platform as pl
import sys
# Remove script directory from path, otherwise Python will try to
# import the JSON test directory as the json module
sys.path.pop(0)
import numpy as np
import pandas
from pandas import (
Categorical,
DataFrame,
Index,
MultiIndex,
NaT,
Period,
RangeIndex,
Series,
Timestamp,
bdate_range,
date_range,
interval_range,
period_range,
timedelta_range,
)
from pandas.arrays import SparseArray
from pandas.tseries.offsets import (
FY5253,
BusinessDay,
BusinessHour,
CustomBusinessDay,
DateOffset,
Day,
Easter,
Hour,
LastWeekOfMonth,
Minute,
MonthBegin,
MonthEnd,
QuarterBegin,
QuarterEnd,
SemiMonthBegin,
SemiMonthEnd,
Week,
WeekOfMonth,
YearBegin,
YearEnd,
)
def _create_sp_series():
nan = np.nan
# nan-based
arr = np.arange(15, dtype=np.float64)
arr[7:12] = nan
arr[-1:] = nan
bseries = Series(SparseArray(arr, kind="block"))
bseries.name = "bseries"
return bseries
def _create_sp_tsseries():
nan = np.nan
# nan-based
arr = np.arange(15, dtype=np.float64)
arr[7:12] = nan
arr[-1:] = nan
date_index = bdate_range("1/1/2011", periods=len(arr))
bseries = Series(SparseArray(arr, kind="block"), index=date_index)
bseries.name = "btsseries"
return bseries
def _create_sp_frame():
nan = np.nan
data = {
"A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
"B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
"C": np.arange(10).astype(np.int64),
"D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan],
}
dates = bdate_range("1/1/2011", periods=10)
return DataFrame(data, index=dates).apply(SparseArray)
def create_pickle_data():
"""create the pickle data"""
data = {
"A": [0.0, 1.0, 2.0, 3.0, np.nan],
"B": [0, 1, 0, 1, 0],
"C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
"D": date_range("1/1/2009", periods=5),
"E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
}
scalars = {"timestamp": Timestamp("20130101"), "period": Period("2012", "M")}
index = {
"int": Index(np.arange(10)),
"date": date_range("20130101", periods=10),
"period": period_range("2013-01-01", freq="M", periods=10),
"float": Index(np.arange(10, dtype=np.float64)),
"uint": Index(np.arange(10, dtype=np.uint64)),
"timedelta": timedelta_range("00:00:00", freq="30min", periods=10),
}
index["range"] = RangeIndex(10)
index["interval"] = interval_range(0, periods=10)
mi = {
"reg2": MultiIndex.from_tuples(
tuple(
zip(
*[
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
)
),
names=["first", "second"],
)
}
series = {
"float": Series(data["A"]),
"int": Series(data["B"]),
"mixed": Series(data["E"]),
"ts": Series(
np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)
),
"mi": Series(
np.arange(5).astype(np.float64),
index=MultiIndex.from_tuples(
tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]
),
),
"dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]),
"cat": Series(Categorical(["foo", "bar", "baz"])),
"dt": Series(date_range("20130101", periods=5)),
"dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")),
"period": Series([Period("2000Q1")] * 5),
}
mixed_dup_df = DataFrame(data)
mixed_dup_df.columns = list("ABCDA")
frame = {
"float": DataFrame({"A": series["float"], "B": series["float"] + 1}),
"int": DataFrame({"A": series["int"], "B": series["int"] + 1}),
"mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}),
"mi": DataFrame(
{"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)},
index=MultiIndex.from_tuples(
tuple(
zip(
*[
["bar", "bar", "baz", "baz", "baz"],
["one", "two", "one", "two", "three"],
]
)
),
names=["first", "second"],
),
),
"dup": DataFrame(
np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]
),
"cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}),
"cat_and_float": DataFrame(
{
"A": Categorical(["foo", "bar", "baz"]),
"B": np.arange(3).astype(np.int64),
}
),
"mixed_dup": mixed_dup_df,
"dt_mixed_tzs": DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern"),
"B": Timestamp("20130603", tz="CET"),
},
index=range(5),
),
"dt_mixed2_tzs": DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern"),
"B": Timestamp("20130603", tz="CET"),
"C": Timestamp("20130603", tz="UTC"),
},
index=range(5),
),
}
cat = {
"int8": Categorical(list("abcdefg")),
"int16": Categorical(np.arange(1000)),
"int32": Categorical(np.arange(10000)),
}
timestamp = {
"normal": Timestamp("2011-01-01"),
"nat": NaT,
"tz": Timestamp("2011-01-01", tz="US/Eastern"),
}
off = {
"DateOffset": DateOffset(years=1),
"DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
"BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
"BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
"CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
"SemiMonthBegin": SemiMonthBegin(day_of_month=9),
"SemiMonthEnd": SemiMonthEnd(day_of_month=24),
"MonthBegin": MonthBegin(1),
"MonthEnd": MonthEnd(1),
"QuarterBegin": QuarterBegin(1),
"QuarterEnd": QuarterEnd(1),
"Day": Day(1),
"YearBegin": YearBegin(1),
"YearEnd": YearEnd(1),
"Week": Week(1),
"Week_Tues": Week(2, normalize=False, weekday=1),
"WeekOfMonth": WeekOfMonth(week=3, weekday=4),
"LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
"FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
"Easter": Easter(),
"Hour": Hour(1),
"Minute": Minute(1),
}
return {
"series": series,
"frame": frame,
"index": index,
"scalars": scalars,
"mi": mi,
"sp_series": {"float": _create_sp_series(), "ts": _create_sp_tsseries()},
"sp_frame": {"float": _create_sp_frame()},
"cat": cat,
"timestamp": timestamp,
"offsets": off,
}
def platform_name():
return "_".join(
[
str(pandas.__version__),
str(pl.machine()),
str(pl.system().lower()),
str(pl.python_version()),
]
)
def write_legacy_pickles(output_dir):
version = pandas.__version__
print(
"This script generates a storage file for the current arch, system, "
"and python version"
)
print(f" pandas version: {version}")
print(f" output dir : {output_dir}")
print(" storage format: pickle")
pth = f"{platform_name()}.pickle"
with open(os.path.join(output_dir, pth), "wb") as fh:
pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
print(f"created pickle file: {pth}")
def write_legacy_file():
# force our cwd to be the first searched
sys.path.insert(0, "")
if not 3 <= len(sys.argv) <= 4:
sys.exit(
"Specify output directory and storage type: generate_legacy_"
"storage_files.py <output_dir> <storage_type> "
)
output_dir = str(sys.argv[1])
storage_type = str(sys.argv[2])
if not os.path.exists(output_dir):
os.mkdir(output_dir)
if storage_type == "pickle":
write_legacy_pickles(output_dir=output_dir)
else:
sys.exit("storage_type must be one of {'pickle'}")
if __name__ == "__main__":
write_legacy_file()

Some files were not shown because too many files have changed in this diff Show More