2024-12-04 13:35:57 +05:00

88 lines
2.9 KiB
Python

# Copyright (c) 2010-2024 openpyxl
from itertools import accumulate
import operator
import numpy
from openpyxl.compat.product import prod
def dataframe_to_rows(df, index=True, header=True):
"""
Convert a Pandas dataframe into something suitable for passing into a worksheet.
If index is True then the index will be included, starting one row below the header.
If header is True then column headers will be included starting one column to the right.
Formatting should be done by client code.
"""
from pandas import Timestamp
if header:
if df.columns.nlevels > 1:
rows = expand_index(df.columns, header)
else:
rows = [list(df.columns.values)]
for row in rows:
n = []
for v in row:
if isinstance(v, numpy.datetime64):
v = Timestamp(v)
n.append(v)
row = n
if index:
row = [None]*df.index.nlevels + row
yield row
if index:
yield df.index.names
expanded = ([v] for v in df.index)
if df.index.nlevels > 1:
expanded = expand_index(df.index)
# Using the expanded index is preferable to df.itertuples(index=True) so that we have 'None' inserted where applicable
for (df_index, row) in zip(expanded, df.itertuples(index=False)):
row = list(row)
if index:
row = df_index + row
yield row
def expand_index(index, header=False):
"""
Expand axis or column Multiindex
For columns use header = True
For axes use header = False (default)
"""
# For each element of the index, zip the members with the previous row
# If the 2 elements of the zipped list do not match, we can insert the new value into the row
# or if an earlier member was different, all later members should be added to the row
values = list(index.values)
previous_value = [None] * len(values[0])
result = []
for value in values:
row = [None] * len(value)
# Once there's a difference in member of an index with the prior index, we need to store all subsequent members in the row
prior_change = False
for idx, (current_index_member, previous_index_member) in enumerate(zip(value, previous_value)):
if current_index_member != previous_index_member or prior_change:
row[idx] = current_index_member
prior_change = True
previous_value = value
# If this is for a row index, we're already returning a row so just yield
if not header:
yield row
else:
result.append(row)
# If it's for a header, we need to transpose to get it in row order
# Example: result = [['A', 'A'], [None, 'B']] -> [['A', None], ['A', 'B']]
if header:
result = numpy.array(result).transpose().tolist()
for row in result:
yield row