Source code for pykern.pkcli.xlsx
"""manipulate Excel spreadsheets
:copyright: Copyright (c) 2025 RadiaSoft LLC. All Rights Reserved.
:license: http://www.apache.org/licenses/LICENSE-2.0.html
"""
from pykern.pkcollections import PKDict
from pykern.pkdebug import pkdc, pkdlog, pkdp
import pandas
import pykern.pkcli
import pykern.pkio
[docs]
def to_csv(xlsx_path, sheet=None, csv_path=None):
"""Dump sheets from xlsx_path
Args:
xlsx_path (str or py.path): what to parse
sheet (str or int): dump a specfic sheet [None: all]
csv_path (str or py.path): where to write [base#N.csv]
Returns:
tuple: list of csv files created
"""
return ToCSV(xlsx_path, sheet, csv_path).result
[docs]
class ToCSV:
def __init__(self, xlsx_path, sheet, csv_path):
if csv_path is not None:
csv_path = pykern.pkio.py_path(csv_path)
self._xlsx_path = pykern.pkio.py_path(xlsx_path)
self._xlsx = pandas.ExcelFile(str(self._xlsx_path))
self._csv_path = csv_path or self._xlsx_path
self.result = (
tuple(self._multiple()) if sheet is None else (self._one(sheet, csv_path),)
)
def _multiple(self):
for i in range(len(self._xlsx.sheet_names)):
yield self._one(i, None)
def _one(self, sheet, csv_path):
if csv_path is None:
csv_path = self._csv_path.new(
purebasename=f"{self._csv_path.purebasename}#{sheet}",
ext=".csv",
)
try:
sheet = int(sheet)
except ValueError:
pass
d = self._xlsx.parse(index_col=None, sheet_name=sheet)
d.columns = d.columns.map(lambda c: "" if "Unnamed" in str(c) else str(c))
d.to_csv(
str(csv_path),
encoding="utf-8",
index=False,
lineterminator="\r\n",
)
return csv_path