Remove langchain dataset retrival agent logic (#3311)

2025-12-15 22:06:52 +08:00 · 2024-04-10 20:37:22 +08:00
parent 8cefa6b82e
commit b6de97ad53
14 changed files with 341 additions and 966 deletions
--- a/api/core/rag/retrieval/router/multi_dataset_function_call_router.py
+++ b/api/core/rag/retrieval/router/multi_dataset_function_call_router.py
@@ -0,0 +1,47 @@
+from typing import Union
+
+from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
+from core.model_manager import ModelInstance
+from core.model_runtime.entities.message_entities import PromptMessageTool, SystemPromptMessage, UserPromptMessage
+
+
+class FunctionCallMultiDatasetRouter:
+
+    def invoke(
+            self,
+            query: str,
+            dataset_tools: list[PromptMessageTool],
+            model_config: ModelConfigWithCredentialsEntity,
+            model_instance: ModelInstance,
+
+    ) -> Union[str, None]:
+        """Given input, decided what to do.
+        Returns:
+            Action specifying what tool to use.
+        """
+        if len(dataset_tools) == 0:
+            return None
+        elif len(dataset_tools) == 1:
+            return dataset_tools[0].name
+
+        try:
+            prompt_messages = [
+                SystemPromptMessage(content='You are a helpful AI assistant.'),
+                UserPromptMessage(content=query)
+            ]
+            result = model_instance.invoke_llm(
+                prompt_messages=prompt_messages,
+                tools=dataset_tools,
+                stream=False,
+                model_parameters={
+                    'temperature': 0.2,
+                    'top_p': 0.3,
+                    'max_tokens': 1500
+                }
+            )
+            if result.message.tool_calls:
+                # get retrieval model config
+                return result.message.tool_calls[0].function.name
+            return None
+        except Exception as e:
+            return None
--- a/api/core/rag/retrieval/router/multi_dataset_react_route.py
+++ b/api/core/rag/retrieval/router/multi_dataset_react_route.py
@@ -0,0 +1,254 @@
+from collections.abc import Generator, Sequence
+from typing import Optional, Union
+
+from langchain import PromptTemplate
+from langchain.agents.structured_chat.base import HUMAN_MESSAGE_TEMPLATE
+from langchain.agents.structured_chat.prompt import PREFIX, SUFFIX
+from langchain.schema import AgentAction
+
+from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
+from core.model_manager import ModelInstance
+from core.model_runtime.entities.llm_entities import LLMUsage
+from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool
+from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
+from core.prompt.entities.advanced_prompt_entities import ChatModelMessage
+from core.rag.retrieval.output_parser.structured_chat import StructuredChatOutputParser
+from core.workflow.nodes.llm.llm_node import LLMNode
+
+FORMAT_INSTRUCTIONS = """Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
+The nouns in the format of "Thought", "Action", "Action Input", "Final Answer" must be expressed in English.
+Valid "action" values: "Final Answer" or {tool_names}
+
+Provide only ONE action per $JSON_BLOB, as shown:
+
+```
+{{
+  "action": $TOOL_NAME,
+  "action_input": $INPUT
+}}
+```
+
+Follow this format:
+
+Question: input question to answer
+Thought: consider previous and subsequent steps
+Action:
+```
+$JSON_BLOB
+```
+Observation: action result
+... (repeat Thought/Action/Observation N times)
+Thought: I know what to respond
+Action:
+```
+{{
+  "action": "Final Answer",
+  "action_input": "Final response to human"
+}}
+```"""
+
+
+class ReactMultiDatasetRouter:
+
+    def invoke(
+            self,
+            query: str,
+            dataset_tools: list[PromptMessageTool],
+            model_config: ModelConfigWithCredentialsEntity,
+            model_instance: ModelInstance,
+            user_id: str,
+            tenant_id: str
+
+    ) -> Union[str, None]:
+        """Given input, decided what to do.
+        Returns:
+            Action specifying what tool to use.
+        """
+        if len(dataset_tools) == 0:
+            return None
+        elif len(dataset_tools) == 1:
+            return dataset_tools[0].name
+
+        try:
+            return self._react_invoke(query=query, model_config=model_config,
+                                      model_instance=model_instance,
+                                      tools=dataset_tools, user_id=user_id, tenant_id=tenant_id)
+        except Exception as e:
+            return None
+
+    def _react_invoke(
+            self,
+            query: str,
+            model_config: ModelConfigWithCredentialsEntity,
+            model_instance: ModelInstance,
+            tools: Sequence[PromptMessageTool],
+            user_id: str,
+            tenant_id: str,
+            prefix: str = PREFIX,
+            suffix: str = SUFFIX,
+            human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
+            format_instructions: str = FORMAT_INSTRUCTIONS,
+    ) -> Union[str, None]:
+        if model_config.mode == "chat":
+            prompt = self.create_chat_prompt(
+                query=query,
+                tools=tools,
+                prefix=prefix,
+                suffix=suffix,
+                human_message_template=human_message_template,
+                format_instructions=format_instructions,
+            )
+        else:
+            prompt = self.create_completion_prompt(
+                tools=tools,
+                prefix=prefix,
+                format_instructions=format_instructions,
+                input_variables=None
+            )
+        stop = ['Observation:']
+        # handle invoke result
+        prompt_transform = AdvancedPromptTransform()
+        prompt_messages = prompt_transform.get_prompt(
+            prompt_template=prompt,
+            inputs={},
+            query='',
+            files=[],
+            context='',
+            memory_config=None,
+            memory=None,
+            model_config=model_config
+        )
+        result_text, usage = self._invoke_llm(
+            completion_param=model_config.parameters,
+            model_instance=model_instance,
+            prompt_messages=prompt_messages,
+            stop=stop,
+            user_id=user_id,
+            tenant_id=tenant_id
+        )
+        output_parser = StructuredChatOutputParser()
+        agent_decision = output_parser.parse(result_text)
+        if isinstance(agent_decision, AgentAction):
+            return agent_decision.tool
+        return None
+
+    def _invoke_llm(self, completion_param: dict,
+                    model_instance: ModelInstance,
+                    prompt_messages: list[PromptMessage],
+                    stop: list[str], user_id: str, tenant_id: str
+                    ) -> tuple[str, LLMUsage]:
+        """
+            Invoke large language model
+            :param node_data: node data
+            :param model_instance: model instance
+            :param prompt_messages: prompt messages
+            :param stop: stop
+            :return:
+        """
+        invoke_result = model_instance.invoke_llm(
+            prompt_messages=prompt_messages,
+            model_parameters=completion_param,
+            stop=stop,
+            stream=True,
+            user=user_id,
+        )
+
+        # handle invoke result
+        text, usage = self._handle_invoke_result(
+            invoke_result=invoke_result
+        )
+
+        # deduct quota
+        LLMNode.deduct_llm_quota(tenant_id=tenant_id, model_instance=model_instance, usage=usage)
+
+        return text, usage
+
+    def _handle_invoke_result(self, invoke_result: Generator) -> tuple[str, LLMUsage]:
+        """
+        Handle invoke result
+        :param invoke_result: invoke result
+        :return:
+        """
+        model = None
+        prompt_messages = []
+        full_text = ''
+        usage = None
+        for result in invoke_result:
+            text = result.delta.message.content
+            full_text += text
+
+            if not model:
+                model = result.model
+
+            if not prompt_messages:
+                prompt_messages = result.prompt_messages
+
+            if not usage and result.delta.usage:
+                usage = result.delta.usage
+
+        if not usage:
+            usage = LLMUsage.empty_usage()
+
+        return full_text, usage
+
+    def create_chat_prompt(
+            self,
+            query: str,
+            tools: Sequence[PromptMessageTool],
+            prefix: str = PREFIX,
+            suffix: str = SUFFIX,
+            human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
+            format_instructions: str = FORMAT_INSTRUCTIONS,
+    ) -> list[ChatModelMessage]:
+        tool_strings = []
+        for tool in tools:
+            tool_strings.append(
+                f"{tool.name}: {tool.description}, args: {{'query': {{'title': 'Query', 'description': 'Query for the dataset to be used to retrieve the dataset.', 'type': 'string'}}}}")
+        formatted_tools = "\n".join(tool_strings)
+        unique_tool_names = set(tool.name for tool in tools)
+        tool_names = ", ".join('"' + name + '"' for name in unique_tool_names)
+        format_instructions = format_instructions.format(tool_names=tool_names)
+        template = "\n\n".join([prefix, formatted_tools, format_instructions, suffix])
+        prompt_messages = []
+        system_prompt_messages = ChatModelMessage(
+            role=PromptMessageRole.SYSTEM,
+            text=template
+        )
+        prompt_messages.append(system_prompt_messages)
+        user_prompt_message = ChatModelMessage(
+            role=PromptMessageRole.USER,
+            text=query
+        )
+        prompt_messages.append(user_prompt_message)
+        return prompt_messages
+
+    def create_completion_prompt(
+            self,
+            tools: Sequence[PromptMessageTool],
+            prefix: str = PREFIX,
+            format_instructions: str = FORMAT_INSTRUCTIONS,
+            input_variables: Optional[list[str]] = None,
+    ) -> PromptTemplate:
+        """Create prompt in the style of the zero shot agent.
+
+        Args:
+            tools: List of tools the agent will have access to, used to format the
+                prompt.
+            prefix: String to put before the list of tools.
+            input_variables: List of input variables the final prompt will expect.
+
+        Returns:
+            A PromptTemplate with the template assembled from the pieces here.
+        """
+        suffix = """Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+Question: {input}
+Thought: {agent_scratchpad}
+"""
+
+        tool_strings = "\n".join([f"{tool.name}: {tool.description}" for tool in tools])
+        tool_names = ", ".join([tool.name for tool in tools])
+        format_instructions = format_instructions.format(tool_names=tool_names)
+        template = "\n\n".join([prefix, tool_strings, format_instructions, suffix])
+        if input_variables is None:
+            input_variables = ["input", "agent_scratchpad"]
+        return PromptTemplate(template=template, input_variables=input_variables)