install pandoc (#16825)

This commit is contained in:
Jyong
2025-03-26 22:34:10 +08:00
committed by GitHub
parent 91db2207b3
commit 30792a1e1a
4 changed files with 66 additions and 52 deletions

View File

@@ -1,6 +1,8 @@
import logging
from typing import Optional
import pypandoc # type: ignore
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
@@ -34,6 +36,7 @@ class UnstructuredEpubExtractor(BaseExtractor):
else:
from unstructured.partition.epub import partition_epub
pypandoc.download_pandoc()
elements = partition_epub(filename=self._file_path, xml_keep_tags=True)
from unstructured.chunking.title import chunk_by_title