mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-09 02:46:52 +08:00
fix unstructured setting (#12116)
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import base64
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
|
||||
@@ -15,7 +16,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
|
||||
file_path: Path to the file to load.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
||||
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
|
||||
@@ -19,7 +19,7 @@ class UnstructuredEpubExtractor(BaseExtractor):
|
||||
self,
|
||||
file_path: str,
|
||||
api_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_key: str = "",
|
||||
):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
@@ -30,9 +30,6 @@ class UnstructuredEpubExtractor(BaseExtractor):
|
||||
if self._api_url:
|
||||
from unstructured.partition.api import partition_via_api
|
||||
|
||||
if self._api_key is None:
|
||||
raise ValueError("api_key is required")
|
||||
|
||||
elements = partition_via_api(filename=self._file_path, api_url=self._api_url, api_key=self._api_key)
|
||||
else:
|
||||
from unstructured.partition.epub import partition_epub
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
@@ -24,7 +25,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):
|
||||
if the specified encoding fails.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
||||
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
@@ -14,7 +15,7 @@ class UnstructuredMsgExtractor(BaseExtractor):
|
||||
file_path: Path to the file to load.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
||||
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
@@ -14,7 +15,7 @@ class UnstructuredPPTExtractor(BaseExtractor):
|
||||
file_path: Path to the file to load.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
||||
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
@@ -14,7 +15,7 @@ class UnstructuredPPTXExtractor(BaseExtractor):
|
||||
file_path: Path to the file to load.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
||||
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
@@ -14,7 +15,7 @@ class UnstructuredXmlExtractor(BaseExtractor):
|
||||
file_path: Path to the file to load.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, api_url: str, api_key: str):
|
||||
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
|
||||
Reference in New Issue
Block a user