fix: filter empty value in xlsx to improve vector similarity hit (#422)

This commit is contained in:
lisaifei@cvte.com
2023-06-21 11:25:52 +08:00
committed by GitHub
parent d637a147ee
commit 23ef2262bd
3 changed files with 6 additions and 3 deletions

View File

@@ -27,5 +27,7 @@ class XLSXParser(BaseParser):
if keys == []:
keys = list(map(str, row))
else:
data.append(json.dumps(dict(zip(keys, list(map(str, row)))), ensure_ascii=False))
row_dict = dict(zip(keys, row))
row_dict = {k: v for k, v in row_dict.items() if v}
data.append(json.dumps(row_dict, ensure_ascii=False))
return '\n\n'.join(data)