使用 Pandas 和 Openpyxl 读取 Excel 文件时出现问题

Problem reading an Excel file using Pandas and Openpyxl

提问人:Ostlimpa 提问时间:10/21/2023 最后编辑:Lee TaylorOstlimpa 更新时间:10/21/2023 访问量:39

问:

我正在尝试将特定数据(例如收入)从下载的财务报表移动到我的估值模板的过程自动化。我觉得这应该是可行的,虽然我对 Python 没有那么经验,但我在 chatgpt 的帮助下进行了尝试。

根据我收到的错误消息,由于我的理解中的样式问题,似乎软件包无法读取 Excel 文件。

Cell In [19], line 15
     12     return None  # If item not found
     14 # Load Twilio financial document with data_only=True
---> 15 twilio_wb = pd.read_excel('C:/Users/coldp/OneDrive/Documents/Twilio Inc NYSE TWLO Financials.xlsx')
     16 twilio_ws = twilio_wb.active
     18 # Load DCF template

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:457, in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
    455 if not isinstance(io, ExcelFile):
    456     should_close = True
--> 457     io = ExcelFile(io, storage_options=storage_options, engine=engine)
    458 elif engine and engine != io.engine:
    459     raise ValueError(
    460         "Engine should not be specified when passing "
    461         "an ExcelFile - ExcelFile already has the engine set"
    462     )

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:1419, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
   1416 self.engine = engine
   1417 self.storage_options = storage_options
-> 1419 self._reader = self._engines[engine](self._io, storage_options=storage_options)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_openpyxl.py:525, in OpenpyxlReader.__init__(self, filepath_or_buffer, storage_options)
    514 """
    515 Reader using openpyxl engine.
    516 
   (...)
    522     passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
    523 """
    524 import_optional_dependency("openpyxl")
--> 525 super().__init__(filepath_or_buffer, storage_options=storage_options)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_base.py:518, in BaseExcelReader.__init__(self, filepath_or_buffer, storage_options)
    516 self.handles.handle.seek(0)
    517 try:
--> 518     self.book = self.load_workbook(self.handles.handle)
    519 except Exception:
    520     self.close()

File ~\OneDrive\Documents\Anaconda\lib\site-packages\pandas\io\excel\_openpyxl.py:536, in OpenpyxlReader.load_workbook(self, filepath_or_buffer)
    533 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
    534     from openpyxl import load_workbook
--> 536     return load_workbook(
    537         filepath_or_buffer, read_only=True, data_only=True, keep_links=False
    538     )

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\reader\excel.py:317, in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
    290 """Open the given filename and return the workbook
    291 
    292 :param filename: the path to open or a file-like object
   (...)
    313 
    314 """
    315 reader = ExcelReader(filename, read_only, keep_vba,
    316                     data_only, keep_links)
--> 317 reader.read()
    318 return reader.wb

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\reader\excel.py:281, in ExcelReader.read(self)
    279 self.read_properties()
    280 self.read_theme()
--> 281 apply_stylesheet(self.archive, self.wb)
    282 self.read_worksheets()
    283 self.parser.assign_names()

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:198, in apply_stylesheet(archive, wb)
    195     return wb
    197 node = fromstring(src)
--> 198 stylesheet = Stylesheet.from_tree(node)
    200 if stylesheet.cell_styles:
    202     wb._borders = IndexedList(stylesheet.borders)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:103, in Stylesheet.from_tree(cls, node)
    101 for k in attrs:
    102     del node.attrib[k]
--> 103 return super(Stylesheet, cls).from_tree(node)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\descriptors\serialisable.py:103, in Serialisable.from_tree(cls, node)
    100     else:
    101         attrib[tag] = obj
--> 103 return cls(**attrib)

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:94, in Stylesheet.__init__(self, numFmts, fonts, fills, borders, cellStyleXfs, cellXfs, cellStyles, dxfs, tableStyles, colors, extLst)
     92 self.protections = self.cellXfs.prots
     93 self._normalise_numbers()
---> 94 self.named_styles = self._merge_named_styles()

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:114, in Stylesheet._merge_named_styles(self)
    111 named_styles = self.cellStyles.names
    113 for style in named_styles:
--> 114     self._expand_named_style(style)
    116 return named_styles

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\stylesheet.py:124, in Stylesheet._expand_named_style(self, named_style)
    119 def _expand_named_style(self, named_style):
    120     """
    121     Bind format definitions for a named style from the associated style
    122     record
    123     """
--> 124     xf = self.cellStyleXfs[named_style.xfId]
    125     named_style.font = self.fonts[xf.fontId]
    126     named_style.fill = self.fills[xf.fillId]

File ~\OneDrive\Documents\Anaconda\lib\site-packages\openpyxl\styles\cell_style.py:185, in CellStyleList.__getitem__(self, idx)
    184 def __getitem__(self, idx):
--> 185     return self.xf[idx]

    IndexError: list index out of range

我确保 excel 文件没有样式、奇怪的图像等,但即便如此,问题仍然存在。我尝试了 openpyxl 和 pandas,但它仍然无法读取文件。

但是,excel文件可能仍然存在我不知道的问题,我一直在尽我所能对其进行筛选。

pandas excel 列表 jupyter-notebook openpyxl

评论


答: 暂无答案