mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-20 00:06:56 +08:00
feat: support xlsx file parsing (#304)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
bbe58327c8
commit
0abd67288b
31
api/core/index/readers/xlsx_parser.py
Normal file
31
api/core/index/readers/xlsx_parser.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from pathlib import Path
|
||||
import json
|
||||
from typing import Dict
|
||||
from openpyxl import load_workbook
|
||||
|
||||
from llama_index.readers.file.base_parser import BaseParser
|
||||
from flask import current_app
|
||||
|
||||
|
||||
class XLSXParser(BaseParser):
|
||||
"""XLSX parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser"""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
data = []
|
||||
keys = []
|
||||
with open(file, "r") as fp:
|
||||
wb = load_workbook(filename=file, read_only=True)
|
||||
# loop over all sheets
|
||||
for sheet in wb:
|
||||
for row in sheet.iter_rows(values_only=True):
|
||||
if all(v is None for v in row):
|
||||
continue
|
||||
if keys == []:
|
||||
keys = row
|
||||
else:
|
||||
data.append(json.dumps(dict(zip(keys, row)), ensure_ascii=False))
|
||||
return data
|
||||
Reference in New Issue
Block a user