feat: support single run doc extractor node (#11318)

This commit is contained in:
非法操作
2025-01-08 15:20:15 +08:00
committed by GitHub
parent 0a49d3dd52
commit d649037c3e
5 changed files with 103 additions and 3 deletions

View File

@@ -5,7 +5,8 @@ import logging
import operator
import os
import tempfile
from typing import cast
from collections.abc import Mapping, Sequence
from typing import Any, cast
import docx
import pandas as pd
@@ -81,6 +82,23 @@ class DocumentExtractorNode(BaseNode[DocumentExtractorNodeData]):
process_data=process_data,
)
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,
*,
graph_config: Mapping[str, Any],
node_id: str,
node_data: DocumentExtractorNodeData,
) -> Mapping[str, Sequence[str]]:
"""
Extract variable selector to variable mapping
:param graph_config: graph config
:param node_id: node id
:param node_data: node data
:return:
"""
return {node_id + ".files": node_data.variable_selector}
def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
"""Extract text from a file based on its MIME type."""