feat: support xlsx file parsing (#304)

Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
lisaifei@cvte.com
2023-06-09 15:57:19 +08:00
committed by GitHub
parent bbe58327c8
commit 0abd67288b
4 changed files with 41 additions and 2 deletions

View File

@@ -0,0 +1,31 @@
from pathlib import Path
import json
from typing import Dict
from openpyxl import load_workbook
from llama_index.readers.file.base_parser import BaseParser
from flask import current_app
class XLSXParser(BaseParser):
"""XLSX parser."""
def _init_parser(self) -> Dict:
"""Init parser"""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> str:
data = []
keys = []
with open(file, "r") as fp:
wb = load_workbook(filename=file, read_only=True)
# loop over all sheets
for sheet in wb:
for row in sheet.iter_rows(values_only=True):
if all(v is None for v in row):
continue
if keys == []:
keys = row
else:
data.append(json.dumps(dict(zip(keys, row)), ensure_ascii=False))
return data