Source code for pykern.pkcli.xlsx

"""manipulate Excel spreadsheets

:copyright: Copyright (c) 2025 RadiaSoft LLC.  All Rights Reserved.
:license: http://www.apache.org/licenses/LICENSE-2.0.html
"""

from pykern.pkcollections import PKDict
from pykern.pkdebug import pkdc, pkdlog, pkdp
import pandas
import pykern.pkcli
import pykern.pkio



[docs]
def to_csv(xlsx_path, sheet=None, csv_path=None):
    """Dump sheets from xlsx_path

    Args:
        xlsx_path (str or py.path): what to parse
        sheet (str or int): dump a specfic sheet [None: all]
        csv_path (str or py.path): where to write [base#N.csv]
    Returns:
        tuple: list of csv files created
    """
    return ToCSV(xlsx_path, sheet, csv_path).result




[docs]
class ToCSV:

    def __init__(self, xlsx_path, sheet, csv_path):
        if csv_path is not None:
            csv_path = pykern.pkio.py_path(csv_path)
        self._xlsx_path = pykern.pkio.py_path(xlsx_path)
        self._xlsx = pandas.ExcelFile(str(self._xlsx_path))
        self._csv_path = csv_path or self._xlsx_path
        self.result = (
            tuple(self._multiple()) if sheet is None else (self._one(sheet, csv_path),)
        )

    def _multiple(self):
        for i in range(len(self._xlsx.sheet_names)):
            yield self._one(i, None)

    def _one(self, sheet, csv_path):
        if csv_path is None:
            csv_path = self._csv_path.new(
                purebasename=f"{self._csv_path.purebasename}#{sheet}",
                ext=".csv",
            )
        try:
            sheet = int(sheet)
        except ValueError:
            pass
        d = self._xlsx.parse(index_col=None, sheet_name=sheet)
        d.columns = d.columns.map(lambda c: "" if "Unnamed" in str(c) else str(c))
        d.to_csv(
            str(csv_path),
            encoding="utf-8",
            index=False,
            lineterminator="\r\n",
        )
        return csv_path
Source code for pykern.pkcli.xlsx

pykern

Navigation

Related Topics