feat:api Add support for extracting EPUB files in ExtractProcessor (#3254)

Co-authored-by: crazywoola <427733928@qq.com>
This commit is contained in:
LiuVaayne
2024-04-12 11:25:02 +08:00
committed by GitHub
parent 44448ba68d
commit b00466f025
4 changed files with 44 additions and 2 deletions

View File

@@ -22,7 +22,7 @@ IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml', 'epub']
PREVIEW_WORDS_LIMIT = 3000