feat: support legacy doc (#2100)

This commit is contained in:
crazywoola
2024-01-20 22:21:51 +08:00
committed by GitHub
parent 0113627d7b
commit 1f48e3d44a
8 changed files with 17 additions and 46 deletions

View File

@@ -1,9 +1,7 @@
import logging
import re
from typing import List, Optional, Tuple, cast
from typing import List
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.helpers import detect_file_encodings
from langchain.schema import Document
logger = logging.getLogger(__name__)

View File

@@ -1,14 +1,10 @@
import logging
import re
from typing import List, Optional, Tuple, cast
from typing import List
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.helpers import detect_file_encodings
from langchain.schema import Document
logger = logging.getLogger(__name__)
class UnstructuredPPTLoader(BaseLoader):
"""Load msg files.

View File

@@ -1,14 +1,10 @@
import logging
import re
from typing import List, Optional, Tuple, cast
from typing import List
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.helpers import detect_file_encodings
from langchain.schema import Document
logger = logging.getLogger(__name__)
class UnstructuredPPTXLoader(BaseLoader):
"""Load msg files.

View File

@@ -1,9 +1,7 @@
import logging
import re
from typing import List, Optional, Tuple, cast
from typing import List
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.helpers import detect_file_encodings
from langchain.schema import Document
logger = logging.getLogger(__name__)

View File

@@ -1,9 +1,7 @@
import logging
import re
from typing import List, Optional, Tuple, cast
from typing import List
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.helpers import detect_file_encodings
from langchain.schema import Document
logger = logging.getLogger(__name__)