chore: remove Langchain tools import (#3407)

2025-12-09 19:06:51 +08:00 · 2024-04-12 16:26:09 +08:00
parent c227f3d985
commit 0737e930cb
9 changed files with 98 additions and 73 deletions
--- a/api/core/rag/extractor/blod/blod.py
+++ b/api/core/rag/extractor/blod/blod.py
@@ -159,7 +159,7 @@ class BlobLoader(ABC):
    def yield_blobs(
        self,
    ) -> Iterable[Blob]:
-        """A lazy loader for raw data represented by LangChain's Blob object.
+        """A lazy loader for raw data represented by Blob object.

        Returns:
            A generator over blobs
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@@ -2,7 +2,6 @@ import threading
 from typing import Optional, cast

 from flask import Flask, current_app
-from langchain.tools import BaseTool

 from core.app.app_config.entities import DatasetEntity, DatasetRetrieveConfigEntity
 from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity
@@ -19,6 +18,7 @@ from core.rag.retrieval.router.multi_dataset_function_call_router import Functio
 from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
 from core.rerank.rerank import RerankRunner
 from core.tools.tool.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool
+from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
 from core.tools.tool.dataset_retriever.dataset_retriever_tool import DatasetRetrieverTool
 from extensions.ext_database import db
 from models.dataset import Dataset, DatasetQuery, DocumentSegment
@@ -383,7 +383,7 @@ class DatasetRetrieval:
                                  return_resource: bool,
                                  invoke_from: InvokeFrom,
                                  hit_callback: DatasetIndexToolCallbackHandler) \
-            -> Optional[list[BaseTool]]:
+            -> Optional[list[DatasetRetrieverBaseTool]]:
        """
        A dataset tool is a tool that can be used to retrieve information from a dataset
        :param tenant_id: tenant id
--- a/api/core/rag/retrieval/output_parser/react_output.py
+++ b/api/core/rag/retrieval/output_parser/react_output.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import NamedTuple, Union
+
+
+@dataclass
+class ReactAction:
+    """A full description of an action for an ReactAction to execute."""
+
+    tool: str
+    """The name of the Tool to execute."""
+    tool_input: Union[str, dict]
+    """The input to pass in to the Tool."""
+    log: str
+    """Additional information to log about the action."""
+
+
+class ReactFinish(NamedTuple):
+    """The final return value of an ReactFinish."""
+
+    return_values: dict
+    """Dictionary of return values."""
+    log: str
+    """Additional information to log about the return value"""
--- a/api/core/rag/retrieval/output_parser/structured_chat.py
+++ b/api/core/rag/retrieval/output_parser/structured_chat.py
@@ -2,28 +2,24 @@ import json
 import re
 from typing import Union

-from langchain.agents.structured_chat.output_parser import StructuredChatOutputParser as LCStructuredChatOutputParser
-from langchain.agents.structured_chat.output_parser import logger
-from langchain.schema import AgentAction, AgentFinish, OutputParserException
+from core.rag.retrieval.output_parser.react_output import ReactAction, ReactFinish


-class StructuredChatOutputParser(LCStructuredChatOutputParser):
-    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
+class StructuredChatOutputParser:
+    def parse(self, text: str) -> Union[ReactAction, ReactFinish]:
        try:
            action_match = re.search(r"```(\w*)\n?({.*?)```", text, re.DOTALL)
            if action_match is not None:
                response = json.loads(action_match.group(2).strip(), strict=False)
                if isinstance(response, list):
-                    # gpt turbo frequently ignores the directive to emit a single action
-                    logger.warning("Got multiple action responses: %s", response)
                    response = response[0]
                if response["action"] == "Final Answer":
-                    return AgentFinish({"output": response["action_input"]}, text)
+                    return ReactFinish({"output": response["action_input"]}, text)
                else:
-                    return AgentAction(
+                    return ReactAction(
                        response["action"], response.get("action_input", {}), text
                    )
            else:
-                return AgentFinish({"output": text}, text)
+                return ReactFinish({"output": text}, text)
        except Exception as e:
-            raise OutputParserException(f"Could not parse LLM output: {text}")
+            raise ValueError(f"Could not parse LLM output: {text}")
--- a/api/core/rag/retrieval/router/multi_dataset_react_route.py
+++ b/api/core/rag/retrieval/router/multi_dataset_react_route.py
@@ -1,20 +1,21 @@
 from collections.abc import Generator, Sequence
-from typing import Optional, Union
-
-from langchain import PromptTemplate
-from langchain.agents.structured_chat.base import HUMAN_MESSAGE_TEMPLATE
-from langchain.agents.structured_chat.prompt import PREFIX, SUFFIX
-from langchain.schema import AgentAction
+from typing import Union

 from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.llm_entities import LLMUsage
 from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool
 from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
-from core.prompt.entities.advanced_prompt_entities import ChatModelMessage
+from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate
+from core.rag.retrieval.output_parser.react_output import ReactAction
 from core.rag.retrieval.output_parser.structured_chat import StructuredChatOutputParser
 from core.workflow.nodes.llm.llm_node import LLMNode

+PREFIX = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:"""
+
+SUFFIX = """Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+Thought:"""
+
 FORMAT_INSTRUCTIONS = """Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
 The nouns in the format of "Thought", "Action", "Action Input", "Final Answer" must be expressed in English.
 Valid "action" values: "Final Answer" or {tool_names}
@@ -86,7 +87,6 @@ class ReactMultiDatasetRouter:
            tenant_id: str,
            prefix: str = PREFIX,
            suffix: str = SUFFIX,
-            human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
            format_instructions: str = FORMAT_INSTRUCTIONS,
    ) -> Union[str, None]:
        if model_config.mode == "chat":
@@ -95,7 +95,6 @@ class ReactMultiDatasetRouter:
                tools=tools,
                prefix=prefix,
                suffix=suffix,
-                human_message_template=human_message_template,
                format_instructions=format_instructions,
            )
        else:
@@ -103,7 +102,6 @@ class ReactMultiDatasetRouter:
                tools=tools,
                prefix=prefix,
                format_instructions=format_instructions,
-                input_variables=None
            )
        stop = ['Observation:']
        # handle invoke result
@@ -127,9 +125,9 @@ class ReactMultiDatasetRouter:
            tenant_id=tenant_id
        )
        output_parser = StructuredChatOutputParser()
-        agent_decision = output_parser.parse(result_text)
-        if isinstance(agent_decision, AgentAction):
-            return agent_decision.tool
+        react_decision = output_parser.parse(result_text)
+        if isinstance(react_decision, ReactAction):
+            return react_decision.tool
        return None

    def _invoke_llm(self, completion_param: dict,
@@ -139,7 +137,6 @@ class ReactMultiDatasetRouter:
                    ) -> tuple[str, LLMUsage]:
        """
            Invoke large language model
-            :param node_data: node data
            :param model_instance: model instance
            :param prompt_messages: prompt messages
            :param stop: stop
@@ -197,7 +194,6 @@ class ReactMultiDatasetRouter:
            tools: Sequence[PromptMessageTool],
            prefix: str = PREFIX,
            suffix: str = SUFFIX,
-            human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
            format_instructions: str = FORMAT_INSTRUCTIONS,
    ) -> list[ChatModelMessage]:
        tool_strings = []
@@ -227,16 +223,13 @@ class ReactMultiDatasetRouter:
            tools: Sequence[PromptMessageTool],
            prefix: str = PREFIX,
            format_instructions: str = FORMAT_INSTRUCTIONS,
-            input_variables: Optional[list[str]] = None,
-    ) -> PromptTemplate:
+    ) -> CompletionModelPromptTemplate:
        """Create prompt in the style of the zero shot agent.

        Args:
            tools: List of tools the agent will have access to, used to format the
                prompt.
            prefix: String to put before the list of tools.
-            input_variables: List of input variables the final prompt will expect.
-
        Returns:
            A PromptTemplate with the template assembled from the pieces here.
        """
@@ -249,6 +242,4 @@ Thought: {agent_scratchpad}
        tool_names = ", ".join([tool.name for tool in tools])
        format_instructions = format_instructions.format(tool_names=tool_names)
        template = "\n\n".join([prefix, tool_strings, format_instructions, suffix])
-        if input_variables is None:
-            input_variables = ["input", "agent_scratchpad"]
-        return PromptTemplate(template=template, input_variables=input_variables)
+        return CompletionModelPromptTemplate(text=template)