mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-11 11:56:53 +08:00
chore(api/core): apply ruff reformatting (#7624)
This commit is contained in:
@@ -12,8 +12,8 @@ from core.tools.errors import ToolInvokeError, ToolParameterValidationError, Too
|
||||
from core.tools.tool.tool import Tool
|
||||
|
||||
API_TOOL_DEFAULT_TIMEOUT = (
|
||||
int(getenv('API_TOOL_DEFAULT_CONNECT_TIMEOUT', '10')),
|
||||
int(getenv('API_TOOL_DEFAULT_READ_TIMEOUT', '60'))
|
||||
int(getenv("API_TOOL_DEFAULT_CONNECT_TIMEOUT", "10")),
|
||||
int(getenv("API_TOOL_DEFAULT_READ_TIMEOUT", "60")),
|
||||
)
|
||||
|
||||
|
||||
@@ -24,31 +24,32 @@ class ApiTool(Tool):
|
||||
Api tool
|
||||
"""
|
||||
|
||||
def fork_tool_runtime(self, runtime: dict[str, Any]) -> 'Tool':
|
||||
def fork_tool_runtime(self, runtime: dict[str, Any]) -> "Tool":
|
||||
"""
|
||||
fork a new tool with meta data
|
||||
fork a new tool with meta data
|
||||
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
"""
|
||||
return self.__class__(
|
||||
identity=self.identity.model_copy() if self.identity else None,
|
||||
parameters=self.parameters.copy() if self.parameters else None,
|
||||
description=self.description.model_copy() if self.description else None,
|
||||
api_bundle=self.api_bundle.model_copy() if self.api_bundle else None,
|
||||
runtime=Tool.Runtime(**runtime)
|
||||
runtime=Tool.Runtime(**runtime),
|
||||
)
|
||||
|
||||
def validate_credentials(self, credentials: dict[str, Any], parameters: dict[str, Any],
|
||||
format_only: bool = False) -> str:
|
||||
def validate_credentials(
|
||||
self, credentials: dict[str, Any], parameters: dict[str, Any], format_only: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
validate the credentials for Api tool
|
||||
validate the credentials for Api tool
|
||||
"""
|
||||
# assemble validate request and request parameters
|
||||
# assemble validate request and request parameters
|
||||
headers = self.assembling_request(parameters)
|
||||
|
||||
if format_only:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, parameters)
|
||||
# validate response
|
||||
@@ -61,30 +62,30 @@ class ApiTool(Tool):
|
||||
headers = {}
|
||||
credentials = self.runtime.credentials or {}
|
||||
|
||||
if 'auth_type' not in credentials:
|
||||
raise ToolProviderCredentialValidationError('Missing auth_type')
|
||||
if "auth_type" not in credentials:
|
||||
raise ToolProviderCredentialValidationError("Missing auth_type")
|
||||
|
||||
if credentials['auth_type'] == 'api_key':
|
||||
api_key_header = 'api_key'
|
||||
if credentials["auth_type"] == "api_key":
|
||||
api_key_header = "api_key"
|
||||
|
||||
if 'api_key_header' in credentials:
|
||||
api_key_header = credentials['api_key_header']
|
||||
if "api_key_header" in credentials:
|
||||
api_key_header = credentials["api_key_header"]
|
||||
|
||||
if 'api_key_value' not in credentials:
|
||||
raise ToolProviderCredentialValidationError('Missing api_key_value')
|
||||
elif not isinstance(credentials['api_key_value'], str):
|
||||
raise ToolProviderCredentialValidationError('api_key_value must be a string')
|
||||
if "api_key_value" not in credentials:
|
||||
raise ToolProviderCredentialValidationError("Missing api_key_value")
|
||||
elif not isinstance(credentials["api_key_value"], str):
|
||||
raise ToolProviderCredentialValidationError("api_key_value must be a string")
|
||||
|
||||
if 'api_key_header_prefix' in credentials:
|
||||
api_key_header_prefix = credentials['api_key_header_prefix']
|
||||
if api_key_header_prefix == 'basic' and credentials['api_key_value']:
|
||||
credentials['api_key_value'] = f'Basic {credentials["api_key_value"]}'
|
||||
elif api_key_header_prefix == 'bearer' and credentials['api_key_value']:
|
||||
credentials['api_key_value'] = f'Bearer {credentials["api_key_value"]}'
|
||||
elif api_key_header_prefix == 'custom':
|
||||
if "api_key_header_prefix" in credentials:
|
||||
api_key_header_prefix = credentials["api_key_header_prefix"]
|
||||
if api_key_header_prefix == "basic" and credentials["api_key_value"]:
|
||||
credentials["api_key_value"] = f'Basic {credentials["api_key_value"]}'
|
||||
elif api_key_header_prefix == "bearer" and credentials["api_key_value"]:
|
||||
credentials["api_key_value"] = f'Bearer {credentials["api_key_value"]}'
|
||||
elif api_key_header_prefix == "custom":
|
||||
pass
|
||||
|
||||
headers[api_key_header] = credentials['api_key_value']
|
||||
headers[api_key_header] = credentials["api_key_value"]
|
||||
|
||||
needed_parameters = [parameter for parameter in self.api_bundle.parameters if parameter.required]
|
||||
for parameter in needed_parameters:
|
||||
@@ -98,13 +99,13 @@ class ApiTool(Tool):
|
||||
|
||||
def validate_and_parse_response(self, response: httpx.Response) -> str:
|
||||
"""
|
||||
validate the response
|
||||
validate the response
|
||||
"""
|
||||
if isinstance(response, httpx.Response):
|
||||
if response.status_code >= 400:
|
||||
raise ToolInvokeError(f"Request failed with status code {response.status_code} and {response.text}")
|
||||
if not response.content:
|
||||
return 'Empty response from the tool, please check your parameters and try again.'
|
||||
return "Empty response from the tool, please check your parameters and try again."
|
||||
try:
|
||||
response = response.json()
|
||||
try:
|
||||
@@ -114,21 +115,22 @@ class ApiTool(Tool):
|
||||
except Exception as e:
|
||||
return response.text
|
||||
else:
|
||||
raise ValueError(f'Invalid response type {type(response)}')
|
||||
raise ValueError(f"Invalid response type {type(response)}")
|
||||
|
||||
@staticmethod
|
||||
def get_parameter_value(parameter, parameters):
|
||||
if parameter['name'] in parameters:
|
||||
return parameters[parameter['name']]
|
||||
elif parameter.get('required', False):
|
||||
if parameter["name"] in parameters:
|
||||
return parameters[parameter["name"]]
|
||||
elif parameter.get("required", False):
|
||||
raise ToolParameterValidationError(f"Missing required parameter {parameter['name']}")
|
||||
else:
|
||||
return (parameter.get('schema', {}) or {}).get('default', '')
|
||||
return (parameter.get("schema", {}) or {}).get("default", "")
|
||||
|
||||
def do_http_request(self, url: str, method: str, headers: dict[str, Any],
|
||||
parameters: dict[str, Any]) -> httpx.Response:
|
||||
def do_http_request(
|
||||
self, url: str, method: str, headers: dict[str, Any], parameters: dict[str, Any]
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
do http request depending on api bundle
|
||||
do http request depending on api bundle
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
@@ -138,29 +140,30 @@ class ApiTool(Tool):
|
||||
cookies = {}
|
||||
|
||||
# check parameters
|
||||
for parameter in self.api_bundle.openapi.get('parameters', []):
|
||||
for parameter in self.api_bundle.openapi.get("parameters", []):
|
||||
value = self.get_parameter_value(parameter, parameters)
|
||||
if parameter['in'] == 'path':
|
||||
path_params[parameter['name']] = value
|
||||
if parameter["in"] == "path":
|
||||
path_params[parameter["name"]] = value
|
||||
|
||||
elif parameter['in'] == 'query':
|
||||
if value !='': params[parameter['name']] = value
|
||||
elif parameter["in"] == "query":
|
||||
if value != "":
|
||||
params[parameter["name"]] = value
|
||||
|
||||
elif parameter['in'] == 'cookie':
|
||||
cookies[parameter['name']] = value
|
||||
elif parameter["in"] == "cookie":
|
||||
cookies[parameter["name"]] = value
|
||||
|
||||
elif parameter['in'] == 'header':
|
||||
headers[parameter['name']] = value
|
||||
elif parameter["in"] == "header":
|
||||
headers[parameter["name"]] = value
|
||||
|
||||
# check if there is a request body and handle it
|
||||
if 'requestBody' in self.api_bundle.openapi and self.api_bundle.openapi['requestBody'] is not None:
|
||||
if "requestBody" in self.api_bundle.openapi and self.api_bundle.openapi["requestBody"] is not None:
|
||||
# handle json request body
|
||||
if 'content' in self.api_bundle.openapi['requestBody']:
|
||||
for content_type in self.api_bundle.openapi['requestBody']['content']:
|
||||
headers['Content-Type'] = content_type
|
||||
body_schema = self.api_bundle.openapi['requestBody']['content'][content_type]['schema']
|
||||
required = body_schema.get('required', [])
|
||||
properties = body_schema.get('properties', {})
|
||||
if "content" in self.api_bundle.openapi["requestBody"]:
|
||||
for content_type in self.api_bundle.openapi["requestBody"]["content"]:
|
||||
headers["Content-Type"] = content_type
|
||||
body_schema = self.api_bundle.openapi["requestBody"]["content"][content_type]["schema"]
|
||||
required = body_schema.get("required", [])
|
||||
properties = body_schema.get("properties", {})
|
||||
for name, property in properties.items():
|
||||
if name in parameters:
|
||||
# convert type
|
||||
@@ -169,63 +172,71 @@ class ApiTool(Tool):
|
||||
raise ToolParameterValidationError(
|
||||
f"Missing required parameter {name} in operation {self.api_bundle.operation_id}"
|
||||
)
|
||||
elif 'default' in property:
|
||||
body[name] = property['default']
|
||||
elif "default" in property:
|
||||
body[name] = property["default"]
|
||||
else:
|
||||
body[name] = None
|
||||
break
|
||||
|
||||
# replace path parameters
|
||||
for name, value in path_params.items():
|
||||
url = url.replace(f'{{{name}}}', f'{value}')
|
||||
url = url.replace(f"{{{name}}}", f"{value}")
|
||||
|
||||
# parse http body data if needed, for GET/HEAD/OPTIONS/TRACE, the body is ignored
|
||||
if 'Content-Type' in headers:
|
||||
if headers['Content-Type'] == 'application/json':
|
||||
if "Content-Type" in headers:
|
||||
if headers["Content-Type"] == "application/json":
|
||||
body = json.dumps(body)
|
||||
elif headers['Content-Type'] == 'application/x-www-form-urlencoded':
|
||||
elif headers["Content-Type"] == "application/x-www-form-urlencoded":
|
||||
body = urlencode(body)
|
||||
else:
|
||||
body = body
|
||||
|
||||
if method in ('get', 'head', 'post', 'put', 'delete', 'patch'):
|
||||
response = getattr(ssrf_proxy, method)(url, params=params, headers=headers, cookies=cookies, data=body,
|
||||
timeout=API_TOOL_DEFAULT_TIMEOUT, follow_redirects=True)
|
||||
if method in ("get", "head", "post", "put", "delete", "patch"):
|
||||
response = getattr(ssrf_proxy, method)(
|
||||
url,
|
||||
params=params,
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
data=body,
|
||||
timeout=API_TOOL_DEFAULT_TIMEOUT,
|
||||
follow_redirects=True,
|
||||
)
|
||||
return response
|
||||
else:
|
||||
raise ValueError(f'Invalid http method {self.method}')
|
||||
raise ValueError(f"Invalid http method {self.method}")
|
||||
|
||||
def _convert_body_property_any_of(self, property: dict[str, Any], value: Any, any_of: list[dict[str, Any]],
|
||||
max_recursive=10) -> Any:
|
||||
def _convert_body_property_any_of(
|
||||
self, property: dict[str, Any], value: Any, any_of: list[dict[str, Any]], max_recursive=10
|
||||
) -> Any:
|
||||
if max_recursive <= 0:
|
||||
raise Exception("Max recursion depth reached")
|
||||
for option in any_of or []:
|
||||
try:
|
||||
if 'type' in option:
|
||||
if "type" in option:
|
||||
# Attempt to convert the value based on the type.
|
||||
if option['type'] == 'integer' or option['type'] == 'int':
|
||||
if option["type"] == "integer" or option["type"] == "int":
|
||||
return int(value)
|
||||
elif option['type'] == 'number':
|
||||
if '.' in str(value):
|
||||
elif option["type"] == "number":
|
||||
if "." in str(value):
|
||||
return float(value)
|
||||
else:
|
||||
return int(value)
|
||||
elif option['type'] == 'string':
|
||||
elif option["type"] == "string":
|
||||
return str(value)
|
||||
elif option['type'] == 'boolean':
|
||||
if str(value).lower() in ['true', '1']:
|
||||
elif option["type"] == "boolean":
|
||||
if str(value).lower() in ["true", "1"]:
|
||||
return True
|
||||
elif str(value).lower() in ['false', '0']:
|
||||
elif str(value).lower() in ["false", "0"]:
|
||||
return False
|
||||
else:
|
||||
continue # Not a boolean, try next option
|
||||
elif option['type'] == 'null' and not value:
|
||||
elif option["type"] == "null" and not value:
|
||||
return None
|
||||
else:
|
||||
continue # Unsupported type, try next option
|
||||
elif 'anyOf' in option and isinstance(option['anyOf'], list):
|
||||
elif "anyOf" in option and isinstance(option["anyOf"], list):
|
||||
# Recursive call to handle nested anyOf
|
||||
return self._convert_body_property_any_of(property, value, option['anyOf'], max_recursive - 1)
|
||||
return self._convert_body_property_any_of(property, value, option["anyOf"], max_recursive - 1)
|
||||
except ValueError:
|
||||
continue # Conversion failed, try next option
|
||||
# If no option succeeded, you might want to return the value as is or raise an error
|
||||
@@ -233,23 +244,23 @@ class ApiTool(Tool):
|
||||
|
||||
def _convert_body_property_type(self, property: dict[str, Any], value: Any) -> Any:
|
||||
try:
|
||||
if 'type' in property:
|
||||
if property['type'] == 'integer' or property['type'] == 'int':
|
||||
if "type" in property:
|
||||
if property["type"] == "integer" or property["type"] == "int":
|
||||
return int(value)
|
||||
elif property['type'] == 'number':
|
||||
elif property["type"] == "number":
|
||||
# check if it is a float
|
||||
if '.' in str(value):
|
||||
if "." in str(value):
|
||||
return float(value)
|
||||
else:
|
||||
return int(value)
|
||||
elif property['type'] == 'string':
|
||||
elif property["type"] == "string":
|
||||
return str(value)
|
||||
elif property['type'] == 'boolean':
|
||||
elif property["type"] == "boolean":
|
||||
return bool(value)
|
||||
elif property['type'] == 'null':
|
||||
elif property["type"] == "null":
|
||||
if value is None:
|
||||
return None
|
||||
elif property['type'] == 'object' or property['type'] == 'array':
|
||||
elif property["type"] == "object" or property["type"] == "array":
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
# an array str like '[1,2]' also can convert to list [1,2] through json.loads
|
||||
@@ -264,8 +275,8 @@ class ApiTool(Tool):
|
||||
return value
|
||||
else:
|
||||
raise ValueError(f"Invalid type {property['type']} for property {property}")
|
||||
elif 'anyOf' in property and isinstance(property['anyOf'], list):
|
||||
return self._convert_body_property_any_of(property, value, property['anyOf'])
|
||||
elif "anyOf" in property and isinstance(property["anyOf"], list):
|
||||
return self._convert_body_property_any_of(property, value, property["anyOf"])
|
||||
except ValueError as e:
|
||||
return value
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, SystemPromptMessage, UserPromptMessage
|
||||
from core.tools.entities.tool_entities import ToolProviderType
|
||||
@@ -16,40 +15,38 @@ Please summarize the text you got.
|
||||
|
||||
class BuiltinTool(Tool):
|
||||
"""
|
||||
Builtin tool
|
||||
Builtin tool
|
||||
|
||||
:param meta: the meta data of a tool call processing
|
||||
:param meta: the meta data of a tool call processing
|
||||
"""
|
||||
|
||||
def invoke_model(
|
||||
self, user_id: str, prompt_messages: list[PromptMessage], stop: list[str]
|
||||
) -> LLMResult:
|
||||
def invoke_model(self, user_id: str, prompt_messages: list[PromptMessage], stop: list[str]) -> LLMResult:
|
||||
"""
|
||||
invoke model
|
||||
invoke model
|
||||
|
||||
:param model_config: the model config
|
||||
:param prompt_messages: the prompt messages
|
||||
:param stop: the stop words
|
||||
:return: the model result
|
||||
:param model_config: the model config
|
||||
:param prompt_messages: the prompt messages
|
||||
:param stop: the stop words
|
||||
:return: the model result
|
||||
"""
|
||||
# invoke model
|
||||
return ModelInvocationUtils.invoke(
|
||||
user_id=user_id,
|
||||
tenant_id=self.runtime.tenant_id,
|
||||
tool_type='builtin',
|
||||
tool_type="builtin",
|
||||
tool_name=self.identity.name,
|
||||
prompt_messages=prompt_messages,
|
||||
)
|
||||
|
||||
|
||||
def tool_provider_type(self) -> ToolProviderType:
|
||||
return ToolProviderType.BUILT_IN
|
||||
|
||||
|
||||
def get_max_tokens(self) -> int:
|
||||
"""
|
||||
get max tokens
|
||||
get max tokens
|
||||
|
||||
:param model_config: the model config
|
||||
:return: the max tokens
|
||||
:param model_config: the model config
|
||||
:return: the max tokens
|
||||
"""
|
||||
return ModelInvocationUtils.get_max_llm_context_tokens(
|
||||
tenant_id=self.runtime.tenant_id,
|
||||
@@ -57,39 +54,34 @@ class BuiltinTool(Tool):
|
||||
|
||||
def get_prompt_tokens(self, prompt_messages: list[PromptMessage]) -> int:
|
||||
"""
|
||||
get prompt tokens
|
||||
get prompt tokens
|
||||
|
||||
:param prompt_messages: the prompt messages
|
||||
:return: the tokens
|
||||
:param prompt_messages: the prompt messages
|
||||
:return: the tokens
|
||||
"""
|
||||
return ModelInvocationUtils.calculate_tokens(
|
||||
tenant_id=self.runtime.tenant_id,
|
||||
prompt_messages=prompt_messages
|
||||
)
|
||||
return ModelInvocationUtils.calculate_tokens(tenant_id=self.runtime.tenant_id, prompt_messages=prompt_messages)
|
||||
|
||||
def summary(self, user_id: str, content: str) -> str:
|
||||
max_tokens = self.get_max_tokens()
|
||||
|
||||
if self.get_prompt_tokens(prompt_messages=[
|
||||
UserPromptMessage(content=content)
|
||||
]) < max_tokens * 0.6:
|
||||
if self.get_prompt_tokens(prompt_messages=[UserPromptMessage(content=content)]) < max_tokens * 0.6:
|
||||
return content
|
||||
|
||||
|
||||
def get_prompt_tokens(content: str) -> int:
|
||||
return self.get_prompt_tokens(prompt_messages=[
|
||||
SystemPromptMessage(content=_SUMMARY_PROMPT),
|
||||
UserPromptMessage(content=content)
|
||||
])
|
||||
|
||||
return self.get_prompt_tokens(
|
||||
prompt_messages=[SystemPromptMessage(content=_SUMMARY_PROMPT), UserPromptMessage(content=content)]
|
||||
)
|
||||
|
||||
def summarize(content: str) -> str:
|
||||
summary = self.invoke_model(user_id=user_id, prompt_messages=[
|
||||
SystemPromptMessage(content=_SUMMARY_PROMPT),
|
||||
UserPromptMessage(content=content)
|
||||
], stop=[])
|
||||
summary = self.invoke_model(
|
||||
user_id=user_id,
|
||||
prompt_messages=[SystemPromptMessage(content=_SUMMARY_PROMPT), UserPromptMessage(content=content)],
|
||||
stop=[],
|
||||
)
|
||||
|
||||
return summary.message.content
|
||||
|
||||
lines = content.split('\n')
|
||||
lines = content.split("\n")
|
||||
new_lines = []
|
||||
# split long line into multiple lines
|
||||
for i in range(len(lines)):
|
||||
@@ -100,8 +92,8 @@ class BuiltinTool(Tool):
|
||||
new_lines.append(line)
|
||||
elif get_prompt_tokens(line) > max_tokens * 0.7:
|
||||
while get_prompt_tokens(line) > max_tokens * 0.7:
|
||||
new_lines.append(line[:int(max_tokens * 0.5)])
|
||||
line = line[int(max_tokens * 0.5):]
|
||||
new_lines.append(line[: int(max_tokens * 0.5)])
|
||||
line = line[int(max_tokens * 0.5) :]
|
||||
new_lines.append(line)
|
||||
else:
|
||||
new_lines.append(line)
|
||||
@@ -125,17 +117,15 @@ class BuiltinTool(Tool):
|
||||
summary = summarize(message)
|
||||
summaries.append(summary)
|
||||
|
||||
result = '\n'.join(summaries)
|
||||
result = "\n".join(summaries)
|
||||
|
||||
if self.get_prompt_tokens(prompt_messages=[
|
||||
UserPromptMessage(content=result)
|
||||
]) > max_tokens * 0.7:
|
||||
if self.get_prompt_tokens(prompt_messages=[UserPromptMessage(content=result)]) > max_tokens * 0.7:
|
||||
return self.summary(user_id=user_id, content=result)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_url(self, url: str, user_agent: str = None) -> str:
|
||||
"""
|
||||
get url
|
||||
get url
|
||||
"""
|
||||
return get_url(url, user_agent=user_agent)
|
||||
return get_url(url, user_agent=user_agent)
|
||||
|
||||
@@ -14,14 +14,11 @@ from extensions.ext_database import db
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
|
||||
default_retrieval_model = {
|
||||
'search_method': RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
'reranking_enable': False,
|
||||
'reranking_model': {
|
||||
'reranking_provider_name': '',
|
||||
'reranking_model_name': ''
|
||||
},
|
||||
'top_k': 2,
|
||||
'score_threshold_enabled': False
|
||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
"reranking_enable": False,
|
||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||
"top_k": 2,
|
||||
"score_threshold_enabled": False,
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +28,7 @@ class DatasetMultiRetrieverToolInput(BaseModel):
|
||||
|
||||
class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
||||
"""Tool for querying multi dataset."""
|
||||
|
||||
name: str = "dataset_"
|
||||
args_schema: type[BaseModel] = DatasetMultiRetrieverToolInput
|
||||
description: str = "dataset multi retriever and rerank. "
|
||||
@@ -38,27 +36,26 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
||||
reranking_provider_name: str
|
||||
reranking_model_name: str
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dataset(cls, dataset_ids: list[str], tenant_id: str, **kwargs):
|
||||
return cls(
|
||||
name=f"dataset_{tenant_id.replace('-', '_')}",
|
||||
tenant_id=tenant_id,
|
||||
dataset_ids=dataset_ids,
|
||||
**kwargs
|
||||
name=f"dataset_{tenant_id.replace('-', '_')}", tenant_id=tenant_id, dataset_ids=dataset_ids, **kwargs
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
threads = []
|
||||
all_documents = []
|
||||
for dataset_id in self.dataset_ids:
|
||||
retrieval_thread = threading.Thread(target=self._retriever, kwargs={
|
||||
'flask_app': current_app._get_current_object(),
|
||||
'dataset_id': dataset_id,
|
||||
'query': query,
|
||||
'all_documents': all_documents,
|
||||
'hit_callbacks': self.hit_callbacks
|
||||
})
|
||||
retrieval_thread = threading.Thread(
|
||||
target=self._retriever,
|
||||
kwargs={
|
||||
"flask_app": current_app._get_current_object(),
|
||||
"dataset_id": dataset_id,
|
||||
"query": query,
|
||||
"all_documents": all_documents,
|
||||
"hit_callbacks": self.hit_callbacks,
|
||||
},
|
||||
)
|
||||
threads.append(retrieval_thread)
|
||||
retrieval_thread.start()
|
||||
for thread in threads:
|
||||
@@ -69,7 +66,7 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
||||
tenant_id=self.tenant_id,
|
||||
provider=self.reranking_provider_name,
|
||||
model_type=ModelType.RERANK,
|
||||
model=self.reranking_model_name
|
||||
model=self.reranking_model_name,
|
||||
)
|
||||
|
||||
rerank_runner = RerankModelRunner(rerank_model_instance)
|
||||
@@ -80,62 +77,61 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
||||
|
||||
document_score_list = {}
|
||||
for item in all_documents:
|
||||
if item.metadata.get('score'):
|
||||
document_score_list[item.metadata['doc_id']] = item.metadata['score']
|
||||
if item.metadata.get("score"):
|
||||
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
|
||||
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata['doc_id'] for document in all_documents]
|
||||
index_node_ids = [document.metadata["doc_id"] for document in all_documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id.in_(self.dataset_ids),
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == 'completed',
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids)
|
||||
DocumentSegment.index_node_id.in_(index_node_ids),
|
||||
).all()
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(segments,
|
||||
key=lambda segment: index_node_id_to_position.get(segment.index_node_id,
|
||||
float('inf')))
|
||||
sorted_segments = sorted(
|
||||
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
|
||||
)
|
||||
for segment in sorted_segments:
|
||||
if segment.answer:
|
||||
document_context_list.append(f'question:{segment.get_sign_content()} answer:{segment.answer}')
|
||||
document_context_list.append(f"question:{segment.get_sign_content()} answer:{segment.answer}")
|
||||
else:
|
||||
document_context_list.append(segment.get_sign_content())
|
||||
if self.return_resource:
|
||||
context_list = []
|
||||
resource_number = 1
|
||||
for segment in sorted_segments:
|
||||
dataset = Dataset.query.filter_by(
|
||||
id=segment.dataset_id
|
||||
dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
|
||||
document = Document.query.filter(
|
||||
Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
document = Document.query.filter(Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
'position': resource_number,
|
||||
'dataset_id': dataset.id,
|
||||
'dataset_name': dataset.name,
|
||||
'document_id': document.id,
|
||||
'document_name': document.name,
|
||||
'data_source_type': document.data_source_type,
|
||||
'segment_id': segment.id,
|
||||
'retriever_from': self.retriever_from,
|
||||
'score': document_score_list.get(segment.index_node_id, None)
|
||||
"position": resource_number,
|
||||
"dataset_id": dataset.id,
|
||||
"dataset_name": dataset.name,
|
||||
"document_id": document.id,
|
||||
"document_name": document.name,
|
||||
"data_source_type": document.data_source_type,
|
||||
"segment_id": segment.id,
|
||||
"retriever_from": self.retriever_from,
|
||||
"score": document_score_list.get(segment.index_node_id, None),
|
||||
}
|
||||
|
||||
if self.retriever_from == 'dev':
|
||||
source['hit_count'] = segment.hit_count
|
||||
source['word_count'] = segment.word_count
|
||||
source['segment_position'] = segment.position
|
||||
source['index_node_hash'] = segment.index_node_hash
|
||||
if self.retriever_from == "dev":
|
||||
source["hit_count"] = segment.hit_count
|
||||
source["word_count"] = segment.word_count
|
||||
source["segment_position"] = segment.position
|
||||
source["index_node_hash"] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source['content'] = f'question:{segment.content} \nanswer:{segment.answer}'
|
||||
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
|
||||
else:
|
||||
source['content'] = segment.content
|
||||
source["content"] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
|
||||
@@ -144,13 +140,18 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
||||
|
||||
return str("\n".join(document_context_list))
|
||||
|
||||
def _retriever(self, flask_app: Flask, dataset_id: str, query: str, all_documents: list,
|
||||
hit_callbacks: list[DatasetIndexToolCallbackHandler]):
|
||||
def _retriever(
|
||||
self,
|
||||
flask_app: Flask,
|
||||
dataset_id: str,
|
||||
query: str,
|
||||
all_documents: list,
|
||||
hit_callbacks: list[DatasetIndexToolCallbackHandler],
|
||||
):
|
||||
with flask_app.app_context():
|
||||
dataset = db.session.query(Dataset).filter(
|
||||
Dataset.tenant_id == self.tenant_id,
|
||||
Dataset.id == dataset_id
|
||||
).first()
|
||||
dataset = (
|
||||
db.session.query(Dataset).filter(Dataset.tenant_id == self.tenant_id, Dataset.id == dataset_id).first()
|
||||
)
|
||||
|
||||
if not dataset:
|
||||
return []
|
||||
@@ -163,27 +164,29 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
||||
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
documents = RetrievalService.retrieve(retrieval_method='keyword_search',
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k
|
||||
)
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=self.top_k
|
||||
)
|
||||
if documents:
|
||||
all_documents.extend(documents)
|
||||
else:
|
||||
if self.top_k > 0:
|
||||
# retrieval source
|
||||
documents = RetrievalService.retrieve(retrieval_method=retrieval_model['search_method'],
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k,
|
||||
score_threshold=retrieval_model.get('score_threshold', .0)
|
||||
if retrieval_model['score_threshold_enabled'] else None,
|
||||
reranking_model=retrieval_model.get('reranking_model', None)
|
||||
if retrieval_model['reranking_enable'] else None,
|
||||
reranking_mode=retrieval_model.get('reranking_mode')
|
||||
if retrieval_model.get('reranking_mode') else 'reranking_model',
|
||||
weights=retrieval_model.get('weights', None),
|
||||
)
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_model["search_method"],
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k,
|
||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||
if retrieval_model["score_threshold_enabled"]
|
||||
else None,
|
||||
reranking_model=retrieval_model.get("reranking_model", None)
|
||||
if retrieval_model["reranking_enable"]
|
||||
else None,
|
||||
reranking_mode=retrieval_model.get("reranking_mode")
|
||||
if retrieval_model.get("reranking_mode")
|
||||
else "reranking_model",
|
||||
weights=retrieval_model.get("weights", None),
|
||||
)
|
||||
|
||||
all_documents.extend(documents)
|
||||
all_documents.extend(documents)
|
||||
|
||||
@@ -9,6 +9,7 @@ from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCa
|
||||
|
||||
class DatasetRetrieverBaseTool(BaseModel, ABC):
|
||||
"""Tool for querying a Dataset."""
|
||||
|
||||
name: str = "dataset"
|
||||
description: str = "use this to retrieve a dataset. "
|
||||
tenant_id: str
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from core.rag.datasource.retrieval_service import RetrievalService
|
||||
@@ -8,15 +7,12 @@ from extensions.ext_database import db
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
|
||||
default_retrieval_model = {
|
||||
'search_method': RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
'reranking_enable': False,
|
||||
'reranking_model': {
|
||||
'reranking_provider_name': '',
|
||||
'reranking_model_name': ''
|
||||
},
|
||||
'reranking_mode': 'reranking_model',
|
||||
'top_k': 2,
|
||||
'score_threshold_enabled': False
|
||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||
"reranking_enable": False,
|
||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||
"reranking_mode": "reranking_model",
|
||||
"top_k": 2,
|
||||
"score_threshold_enabled": False,
|
||||
}
|
||||
|
||||
|
||||
@@ -26,35 +22,34 @@ class DatasetRetrieverToolInput(BaseModel):
|
||||
|
||||
class DatasetRetrieverTool(DatasetRetrieverBaseTool):
|
||||
"""Tool for querying a Dataset."""
|
||||
|
||||
name: str = "dataset"
|
||||
args_schema: type[BaseModel] = DatasetRetrieverToolInput
|
||||
description: str = "use this to retrieve a dataset. "
|
||||
dataset_id: str
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dataset(cls, dataset: Dataset, **kwargs):
|
||||
description = dataset.description
|
||||
if not description:
|
||||
description = 'useful for when you want to answer queries about the ' + dataset.name
|
||||
description = "useful for when you want to answer queries about the " + dataset.name
|
||||
|
||||
description = description.replace('\n', '').replace('\r', '')
|
||||
description = description.replace("\n", "").replace("\r", "")
|
||||
return cls(
|
||||
name=f"dataset_{dataset.id.replace('-', '_')}",
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
description=description,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
dataset = db.session.query(Dataset).filter(
|
||||
Dataset.tenant_id == self.tenant_id,
|
||||
Dataset.id == self.dataset_id
|
||||
).first()
|
||||
dataset = (
|
||||
db.session.query(Dataset).filter(Dataset.tenant_id == self.tenant_id, Dataset.id == self.dataset_id).first()
|
||||
)
|
||||
|
||||
if not dataset:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_query(query, dataset.id)
|
||||
@@ -63,27 +58,29 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
|
||||
retrieval_model = dataset.retrieval_model if dataset.retrieval_model else default_retrieval_model
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
documents = RetrievalService.retrieve(retrieval_method='keyword_search',
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k
|
||||
)
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=self.top_k
|
||||
)
|
||||
return str("\n".join([document.page_content for document in documents]))
|
||||
else:
|
||||
if self.top_k > 0:
|
||||
# retrieval source
|
||||
documents = RetrievalService.retrieve(retrieval_method=retrieval_model.get('search_method', 'semantic_search'),
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k,
|
||||
score_threshold=retrieval_model.get('score_threshold', .0)
|
||||
if retrieval_model['score_threshold_enabled'] else None,
|
||||
reranking_model=retrieval_model.get('reranking_model', None)
|
||||
if retrieval_model['reranking_enable'] else None,
|
||||
reranking_mode=retrieval_model.get('reranking_mode')
|
||||
if retrieval_model.get('reranking_mode') else 'reranking_model',
|
||||
weights=retrieval_model.get('weights', None),
|
||||
)
|
||||
documents = RetrievalService.retrieve(
|
||||
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
||||
dataset_id=dataset.id,
|
||||
query=query,
|
||||
top_k=self.top_k,
|
||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||
if retrieval_model["score_threshold_enabled"]
|
||||
else None,
|
||||
reranking_model=retrieval_model.get("reranking_model", None)
|
||||
if retrieval_model["reranking_enable"]
|
||||
else None,
|
||||
reranking_mode=retrieval_model.get("reranking_mode")
|
||||
if retrieval_model.get("reranking_mode")
|
||||
else "reranking_model",
|
||||
weights=retrieval_model.get("weights", None),
|
||||
)
|
||||
else:
|
||||
documents = []
|
||||
|
||||
@@ -92,25 +89,26 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
|
||||
document_score_list = {}
|
||||
if dataset.indexing_technique != "economy":
|
||||
for item in documents:
|
||||
if item.metadata.get('score'):
|
||||
document_score_list[item.metadata['doc_id']] = item.metadata['score']
|
||||
if item.metadata.get("score"):
|
||||
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata['doc_id'] for document in documents]
|
||||
segments = DocumentSegment.query.filter(DocumentSegment.dataset_id == self.dataset_id,
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == 'completed',
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids)
|
||||
).all()
|
||||
index_node_ids = [document.metadata["doc_id"] for document in documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id == self.dataset_id,
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids),
|
||||
).all()
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(segments,
|
||||
key=lambda segment: index_node_id_to_position.get(segment.index_node_id,
|
||||
float('inf')))
|
||||
sorted_segments = sorted(
|
||||
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
|
||||
)
|
||||
for segment in sorted_segments:
|
||||
if segment.answer:
|
||||
document_context_list.append(f'question:{segment.get_sign_content()} answer:{segment.answer}')
|
||||
document_context_list.append(f"question:{segment.get_sign_content()} answer:{segment.answer}")
|
||||
else:
|
||||
document_context_list.append(segment.get_sign_content())
|
||||
if self.return_resource:
|
||||
@@ -118,36 +116,36 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
|
||||
resource_number = 1
|
||||
for segment in sorted_segments:
|
||||
context = {}
|
||||
document = Document.query.filter(Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
document = Document.query.filter(
|
||||
Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
'position': resource_number,
|
||||
'dataset_id': dataset.id,
|
||||
'dataset_name': dataset.name,
|
||||
'document_id': document.id,
|
||||
'document_name': document.name,
|
||||
'data_source_type': document.data_source_type,
|
||||
'segment_id': segment.id,
|
||||
'retriever_from': self.retriever_from,
|
||||
'score': document_score_list.get(segment.index_node_id, None)
|
||||
|
||||
"position": resource_number,
|
||||
"dataset_id": dataset.id,
|
||||
"dataset_name": dataset.name,
|
||||
"document_id": document.id,
|
||||
"document_name": document.name,
|
||||
"data_source_type": document.data_source_type,
|
||||
"segment_id": segment.id,
|
||||
"retriever_from": self.retriever_from,
|
||||
"score": document_score_list.get(segment.index_node_id, None),
|
||||
}
|
||||
if self.retriever_from == 'dev':
|
||||
source['hit_count'] = segment.hit_count
|
||||
source['word_count'] = segment.word_count
|
||||
source['segment_position'] = segment.position
|
||||
source['index_node_hash'] = segment.index_node_hash
|
||||
if self.retriever_from == "dev":
|
||||
source["hit_count"] = segment.hit_count
|
||||
source["word_count"] = segment.word_count
|
||||
source["segment_position"] = segment.position
|
||||
source["index_node_hash"] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source['content'] = f'question:{segment.content} \nanswer:{segment.answer}'
|
||||
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
|
||||
else:
|
||||
source['content'] = segment.content
|
||||
source["content"] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
|
||||
return str("\n".join(document_context_list))
|
||||
return str("\n".join(document_context_list))
|
||||
|
||||
@@ -20,13 +20,14 @@ class DatasetRetrieverTool(Tool):
|
||||
retrieval_tool: DatasetRetrieverBaseTool
|
||||
|
||||
@staticmethod
|
||||
def get_dataset_tools(tenant_id: str,
|
||||
dataset_ids: list[str],
|
||||
retrieve_config: DatasetRetrieveConfigEntity,
|
||||
return_resource: bool,
|
||||
invoke_from: InvokeFrom,
|
||||
hit_callback: DatasetIndexToolCallbackHandler
|
||||
) -> list['DatasetRetrieverTool']:
|
||||
def get_dataset_tools(
|
||||
tenant_id: str,
|
||||
dataset_ids: list[str],
|
||||
retrieve_config: DatasetRetrieveConfigEntity,
|
||||
return_resource: bool,
|
||||
invoke_from: InvokeFrom,
|
||||
hit_callback: DatasetIndexToolCallbackHandler,
|
||||
) -> list["DatasetRetrieverTool"]:
|
||||
"""
|
||||
get dataset tool
|
||||
"""
|
||||
@@ -48,7 +49,7 @@ class DatasetRetrieverTool(Tool):
|
||||
retrieve_config=retrieve_config,
|
||||
return_resource=return_resource,
|
||||
invoke_from=invoke_from,
|
||||
hit_callback=hit_callback
|
||||
hit_callback=hit_callback,
|
||||
)
|
||||
# restore retrieve strategy
|
||||
retrieve_config.retrieve_strategy = original_retriever_mode
|
||||
@@ -58,13 +59,13 @@ class DatasetRetrieverTool(Tool):
|
||||
for retrieval_tool in retrieval_tools:
|
||||
tool = DatasetRetrieverTool(
|
||||
retrieval_tool=retrieval_tool,
|
||||
identity=ToolIdentity(provider='', author='', name=retrieval_tool.name, label=I18nObject(en_US='', zh_Hans='')),
|
||||
identity=ToolIdentity(
|
||||
provider="", author="", name=retrieval_tool.name, label=I18nObject(en_US="", zh_Hans="")
|
||||
),
|
||||
parameters=[],
|
||||
is_team_authorization=True,
|
||||
description=ToolDescription(
|
||||
human=I18nObject(en_US='', zh_Hans=''),
|
||||
llm=retrieval_tool.description),
|
||||
runtime=DatasetRetrieverTool.Runtime()
|
||||
description=ToolDescription(human=I18nObject(en_US="", zh_Hans=""), llm=retrieval_tool.description),
|
||||
runtime=DatasetRetrieverTool.Runtime(),
|
||||
)
|
||||
|
||||
tools.append(tool)
|
||||
@@ -73,16 +74,18 @@ class DatasetRetrieverTool(Tool):
|
||||
|
||||
def get_runtime_parameters(self) -> list[ToolParameter]:
|
||||
return [
|
||||
ToolParameter(name='query',
|
||||
label=I18nObject(en_US='', zh_Hans=''),
|
||||
human_description=I18nObject(en_US='', zh_Hans=''),
|
||||
type=ToolParameter.ToolParameterType.STRING,
|
||||
form=ToolParameter.ToolParameterForm.LLM,
|
||||
llm_description='Query for the dataset to be used to retrieve the dataset.',
|
||||
required=True,
|
||||
default=''),
|
||||
ToolParameter(
|
||||
name="query",
|
||||
label=I18nObject(en_US="", zh_Hans=""),
|
||||
human_description=I18nObject(en_US="", zh_Hans=""),
|
||||
type=ToolParameter.ToolParameterType.STRING,
|
||||
form=ToolParameter.ToolParameterForm.LLM,
|
||||
llm_description="Query for the dataset to be used to retrieve the dataset.",
|
||||
required=True,
|
||||
default="",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def tool_provider_type(self) -> ToolProviderType:
|
||||
return ToolProviderType.DATASET_RETRIEVAL
|
||||
|
||||
@@ -90,9 +93,9 @@ class DatasetRetrieverTool(Tool):
|
||||
"""
|
||||
invoke dataset retriever tool
|
||||
"""
|
||||
query = tool_parameters.get('query')
|
||||
query = tool_parameters.get("query")
|
||||
if not query:
|
||||
return self.create_text_message(text='please input query')
|
||||
return self.create_text_message(text="please input query")
|
||||
|
||||
# invoke dataset retriever tool
|
||||
result = self.retrieval_tool._run(query=query)
|
||||
|
||||
@@ -35,15 +35,16 @@ class Tool(BaseModel, ABC):
|
||||
# pydantic configs
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
@field_validator('parameters', mode='before')
|
||||
@field_validator("parameters", mode="before")
|
||||
@classmethod
|
||||
def set_parameters(cls, v, validation_info: ValidationInfo) -> list[ToolParameter]:
|
||||
return v or []
|
||||
|
||||
class Runtime(BaseModel):
|
||||
"""
|
||||
Meta data of a tool call processing
|
||||
Meta data of a tool call processing
|
||||
"""
|
||||
|
||||
def __init__(self, **data: Any):
|
||||
super().__init__(**data)
|
||||
if not self.runtime_parameters:
|
||||
@@ -63,14 +64,14 @@ class Tool(BaseModel, ABC):
|
||||
super().__init__(**data)
|
||||
|
||||
class VARIABLE_KEY(Enum):
|
||||
IMAGE = 'image'
|
||||
IMAGE = "image"
|
||||
|
||||
def fork_tool_runtime(self, runtime: dict[str, Any]) -> 'Tool':
|
||||
def fork_tool_runtime(self, runtime: dict[str, Any]) -> "Tool":
|
||||
"""
|
||||
fork a new tool with meta data
|
||||
fork a new tool with meta data
|
||||
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
"""
|
||||
return self.__class__(
|
||||
identity=self.identity.model_copy() if self.identity else None,
|
||||
@@ -82,22 +83,22 @@ class Tool(BaseModel, ABC):
|
||||
@abstractmethod
|
||||
def tool_provider_type(self) -> ToolProviderType:
|
||||
"""
|
||||
get the tool provider type
|
||||
get the tool provider type
|
||||
|
||||
:return: the tool provider type
|
||||
:return: the tool provider type
|
||||
"""
|
||||
|
||||
def load_variables(self, variables: ToolRuntimeVariablePool):
|
||||
"""
|
||||
load variables from database
|
||||
load variables from database
|
||||
|
||||
:param conversation_id: the conversation id
|
||||
:param conversation_id: the conversation id
|
||||
"""
|
||||
self.variables = variables
|
||||
|
||||
def set_image_variable(self, variable_name: str, image_key: str) -> None:
|
||||
"""
|
||||
set an image variable
|
||||
set an image variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return
|
||||
@@ -106,7 +107,7 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
def set_text_variable(self, variable_name: str, text: str) -> None:
|
||||
"""
|
||||
set a text variable
|
||||
set a text variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return
|
||||
@@ -115,10 +116,10 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
def get_variable(self, name: Union[str, Enum]) -> Optional[ToolRuntimeVariable]:
|
||||
"""
|
||||
get a variable
|
||||
get a variable
|
||||
|
||||
:param name: the name of the variable
|
||||
:return: the variable
|
||||
:param name: the name of the variable
|
||||
:return: the variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return None
|
||||
@@ -134,9 +135,9 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
def get_default_image_variable(self) -> Optional[ToolRuntimeVariable]:
|
||||
"""
|
||||
get the default image variable
|
||||
get the default image variable
|
||||
|
||||
:return: the image variable
|
||||
:return: the image variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return None
|
||||
@@ -145,10 +146,10 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
def get_variable_file(self, name: Union[str, Enum]) -> Optional[bytes]:
|
||||
"""
|
||||
get a variable file
|
||||
get a variable file
|
||||
|
||||
:param name: the name of the variable
|
||||
:return: the variable file
|
||||
:param name: the name of the variable
|
||||
:return: the variable file
|
||||
"""
|
||||
variable = self.get_variable(name)
|
||||
if not variable:
|
||||
@@ -167,9 +168,9 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
def list_variables(self) -> list[ToolRuntimeVariable]:
|
||||
"""
|
||||
list all variables
|
||||
list all variables
|
||||
|
||||
:return: the variables
|
||||
:return: the variables
|
||||
"""
|
||||
if not self.variables:
|
||||
return []
|
||||
@@ -178,9 +179,9 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
def list_default_image_variables(self) -> list[ToolRuntimeVariable]:
|
||||
"""
|
||||
list all image variables
|
||||
list all image variables
|
||||
|
||||
:return: the image variables
|
||||
:return: the image variables
|
||||
"""
|
||||
if not self.variables:
|
||||
return []
|
||||
@@ -220,38 +221,42 @@ class Tool(BaseModel, ABC):
|
||||
result = deepcopy(tool_parameters)
|
||||
for parameter in self.parameters or []:
|
||||
if parameter.name in tool_parameters:
|
||||
result[parameter.name] = ToolParameterConverter.cast_parameter_by_type(tool_parameters[parameter.name], parameter.type)
|
||||
result[parameter.name] = ToolParameterConverter.cast_parameter_by_type(
|
||||
tool_parameters[parameter.name], parameter.type
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
@abstractmethod
|
||||
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||
def _invoke(
|
||||
self, user_id: str, tool_parameters: dict[str, Any]
|
||||
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||
pass
|
||||
|
||||
def validate_credentials(self, credentials: dict[str, Any], parameters: dict[str, Any]) -> None:
|
||||
"""
|
||||
validate the credentials
|
||||
validate the credentials
|
||||
|
||||
:param credentials: the credentials
|
||||
:param parameters: the parameters
|
||||
:param credentials: the credentials
|
||||
:param parameters: the parameters
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_runtime_parameters(self) -> list[ToolParameter]:
|
||||
"""
|
||||
get the runtime parameters
|
||||
get the runtime parameters
|
||||
|
||||
interface for developer to dynamic change the parameters of a tool depends on the variables pool
|
||||
interface for developer to dynamic change the parameters of a tool depends on the variables pool
|
||||
|
||||
:return: the runtime parameters
|
||||
:return: the runtime parameters
|
||||
"""
|
||||
return self.parameters or []
|
||||
|
||||
def get_all_runtime_parameters(self) -> list[ToolParameter]:
|
||||
"""
|
||||
get all runtime parameters
|
||||
get all runtime parameters
|
||||
|
||||
:return: all runtime parameters
|
||||
:return: all runtime parameters
|
||||
"""
|
||||
parameters = self.parameters or []
|
||||
parameters = parameters.copy()
|
||||
@@ -281,67 +286,49 @@ class Tool(BaseModel, ABC):
|
||||
|
||||
return parameters
|
||||
|
||||
def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessage:
|
||||
def create_image_message(self, image: str, save_as: str = "") -> ToolInvokeMessage:
|
||||
"""
|
||||
create an image message
|
||||
create an image message
|
||||
|
||||
:param image: the url of the image
|
||||
:return: the image message
|
||||
:param image: the url of the image
|
||||
:return: the image message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE,
|
||||
message=image,
|
||||
save_as=save_as)
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, message=image, save_as=save_as)
|
||||
|
||||
def create_file_var_message(self, file_var: "FileVar") -> ToolInvokeMessage:
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.FILE_VAR,
|
||||
message='',
|
||||
meta={
|
||||
'file_var': file_var
|
||||
},
|
||||
save_as='')
|
||||
|
||||
def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage:
|
||||
"""
|
||||
create a link message
|
||||
|
||||
:param link: the url of the link
|
||||
:return: the link message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK,
|
||||
message=link,
|
||||
save_as=save_as)
|
||||
|
||||
def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage:
|
||||
"""
|
||||
create a text message
|
||||
|
||||
:param text: the text
|
||||
:return: the text message
|
||||
"""
|
||||
return ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.TEXT,
|
||||
message=text,
|
||||
save_as=save_as
|
||||
type=ToolInvokeMessage.MessageType.FILE_VAR, message="", meta={"file_var": file_var}, save_as=""
|
||||
)
|
||||
|
||||
def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
|
||||
def create_link_message(self, link: str, save_as: str = "") -> ToolInvokeMessage:
|
||||
"""
|
||||
create a blob message
|
||||
create a link message
|
||||
|
||||
:param blob: the blob
|
||||
:return: the blob message
|
||||
:param link: the url of the link
|
||||
:return: the link message
|
||||
"""
|
||||
return ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB,
|
||||
message=blob, meta=meta,
|
||||
save_as=save_as
|
||||
)
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, message=link, save_as=save_as)
|
||||
|
||||
def create_text_message(self, text: str, save_as: str = "") -> ToolInvokeMessage:
|
||||
"""
|
||||
create a text message
|
||||
|
||||
:param text: the text
|
||||
:return: the text message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT, message=text, save_as=save_as)
|
||||
|
||||
def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = "") -> ToolInvokeMessage:
|
||||
"""
|
||||
create a blob message
|
||||
|
||||
:param blob: the blob
|
||||
:return: the blob message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB, message=blob, meta=meta, save_as=save_as)
|
||||
|
||||
def create_json_message(self, object: dict) -> ToolInvokeMessage:
|
||||
"""
|
||||
create a json message
|
||||
create a json message
|
||||
"""
|
||||
return ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.JSON,
|
||||
message=object
|
||||
)
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.JSON, message=object)
|
||||
|
||||
@@ -13,6 +13,7 @@ from models.workflow import Workflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkflowTool(Tool):
|
||||
workflow_app_id: str
|
||||
version: str
|
||||
@@ -25,11 +26,12 @@ class WorkflowTool(Tool):
|
||||
"""
|
||||
Workflow tool.
|
||||
"""
|
||||
|
||||
def tool_provider_type(self) -> ToolProviderType:
|
||||
"""
|
||||
get the tool provider type
|
||||
get the tool provider type
|
||||
|
||||
:return: the tool provider type
|
||||
:return: the tool provider type
|
||||
"""
|
||||
return ToolProviderType.WORKFLOW
|
||||
|
||||
@@ -37,7 +39,7 @@ class WorkflowTool(Tool):
|
||||
self, user_id: str, tool_parameters: dict[str, Any]
|
||||
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||
"""
|
||||
invoke the tool
|
||||
invoke the tool
|
||||
"""
|
||||
app = self._get_app(app_id=self.workflow_app_id)
|
||||
workflow = self._get_workflow(app_id=self.workflow_app_id, version=self.version)
|
||||
@@ -46,33 +48,31 @@ class WorkflowTool(Tool):
|
||||
tool_parameters, files = self._transform_args(tool_parameters)
|
||||
|
||||
from core.app.apps.workflow.app_generator import WorkflowAppGenerator
|
||||
|
||||
generator = WorkflowAppGenerator()
|
||||
result = generator.generate(
|
||||
app_model=app,
|
||||
workflow=workflow,
|
||||
user=self._get_user(user_id),
|
||||
args={
|
||||
'inputs': tool_parameters,
|
||||
'files': files
|
||||
},
|
||||
app_model=app,
|
||||
workflow=workflow,
|
||||
user=self._get_user(user_id),
|
||||
args={"inputs": tool_parameters, "files": files},
|
||||
invoke_from=self.runtime.invoke_from,
|
||||
stream=False,
|
||||
call_depth=self.workflow_call_depth + 1,
|
||||
workflow_thread_pool_id=self.thread_pool_id
|
||||
workflow_thread_pool_id=self.thread_pool_id,
|
||||
)
|
||||
|
||||
data = result.get('data', {})
|
||||
data = result.get("data", {})
|
||||
|
||||
if data.get("error"):
|
||||
raise Exception(data.get("error"))
|
||||
|
||||
if data.get('error'):
|
||||
raise Exception(data.get('error'))
|
||||
|
||||
result = []
|
||||
|
||||
outputs = data.get('outputs', {})
|
||||
outputs = data.get("outputs", {})
|
||||
outputs, files = self._extract_files(outputs)
|
||||
for file in files:
|
||||
result.append(self.create_file_var_message(file))
|
||||
|
||||
|
||||
result.append(self.create_text_message(json.dumps(outputs, ensure_ascii=False)))
|
||||
result.append(self.create_json_message(outputs))
|
||||
|
||||
@@ -80,7 +80,7 @@ class WorkflowTool(Tool):
|
||||
|
||||
def _get_user(self, user_id: str) -> Union[EndUser, Account]:
|
||||
"""
|
||||
get the user by user id
|
||||
get the user by user id
|
||||
"""
|
||||
|
||||
user = db.session.query(EndUser).filter(EndUser.id == user_id).first()
|
||||
@@ -88,16 +88,16 @@ class WorkflowTool(Tool):
|
||||
user = db.session.query(Account).filter(Account.id == user_id).first()
|
||||
|
||||
if not user:
|
||||
raise ValueError('user not found')
|
||||
raise ValueError("user not found")
|
||||
|
||||
return user
|
||||
|
||||
def fork_tool_runtime(self, runtime: dict[str, Any]) -> 'WorkflowTool':
|
||||
def fork_tool_runtime(self, runtime: dict[str, Any]) -> "WorkflowTool":
|
||||
"""
|
||||
fork a new tool with meta data
|
||||
fork a new tool with meta data
|
||||
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
"""
|
||||
return self.__class__(
|
||||
identity=deepcopy(self.identity),
|
||||
@@ -108,45 +108,44 @@ class WorkflowTool(Tool):
|
||||
workflow_entities=self.workflow_entities,
|
||||
workflow_call_depth=self.workflow_call_depth,
|
||||
version=self.version,
|
||||
label=self.label
|
||||
label=self.label,
|
||||
)
|
||||
|
||||
|
||||
def _get_workflow(self, app_id: str, version: str) -> Workflow:
|
||||
"""
|
||||
get the workflow by app id and version
|
||||
get the workflow by app id and version
|
||||
"""
|
||||
if not version:
|
||||
workflow = db.session.query(Workflow).filter(
|
||||
Workflow.app_id == app_id,
|
||||
Workflow.version != 'draft'
|
||||
).order_by(Workflow.created_at.desc()).first()
|
||||
workflow = (
|
||||
db.session.query(Workflow)
|
||||
.filter(Workflow.app_id == app_id, Workflow.version != "draft")
|
||||
.order_by(Workflow.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
else:
|
||||
workflow = db.session.query(Workflow).filter(
|
||||
Workflow.app_id == app_id,
|
||||
Workflow.version == version
|
||||
).first()
|
||||
workflow = db.session.query(Workflow).filter(Workflow.app_id == app_id, Workflow.version == version).first()
|
||||
|
||||
if not workflow:
|
||||
raise ValueError('workflow not found or not published')
|
||||
raise ValueError("workflow not found or not published")
|
||||
|
||||
return workflow
|
||||
|
||||
|
||||
def _get_app(self, app_id: str) -> App:
|
||||
"""
|
||||
get the app by app id
|
||||
get the app by app id
|
||||
"""
|
||||
app = db.session.query(App).filter(App.id == app_id).first()
|
||||
if not app:
|
||||
raise ValueError('app not found')
|
||||
raise ValueError("app not found")
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def _transform_args(self, tool_parameters: dict) -> tuple[dict, list[dict]]:
|
||||
"""
|
||||
transform the tool parameters
|
||||
transform the tool parameters
|
||||
|
||||
:param tool_parameters: the tool parameters
|
||||
:return: tool_parameters, files
|
||||
:param tool_parameters: the tool parameters
|
||||
:return: tool_parameters, files
|
||||
"""
|
||||
parameter_rules = self.get_all_runtime_parameters()
|
||||
parameters_result = {}
|
||||
@@ -159,15 +158,15 @@ class WorkflowTool(Tool):
|
||||
file_var_list = [FileVar(**f) for f in file]
|
||||
for file_var in file_var_list:
|
||||
file_dict = {
|
||||
'transfer_method': file_var.transfer_method.value,
|
||||
'type': file_var.type.value,
|
||||
"transfer_method": file_var.transfer_method.value,
|
||||
"type": file_var.type.value,
|
||||
}
|
||||
if file_var.transfer_method == FileTransferMethod.TOOL_FILE:
|
||||
file_dict['tool_file_id'] = file_var.related_id
|
||||
file_dict["tool_file_id"] = file_var.related_id
|
||||
elif file_var.transfer_method == FileTransferMethod.LOCAL_FILE:
|
||||
file_dict['upload_file_id'] = file_var.related_id
|
||||
file_dict["upload_file_id"] = file_var.related_id
|
||||
elif file_var.transfer_method == FileTransferMethod.REMOTE_URL:
|
||||
file_dict['url'] = file_var.preview_url
|
||||
file_dict["url"] = file_var.preview_url
|
||||
|
||||
files.append(file_dict)
|
||||
except Exception as e:
|
||||
@@ -176,13 +175,13 @@ class WorkflowTool(Tool):
|
||||
parameters_result[parameter.name] = tool_parameters.get(parameter.name)
|
||||
|
||||
return parameters_result, files
|
||||
|
||||
|
||||
def _extract_files(self, outputs: dict) -> tuple[dict, list[FileVar]]:
|
||||
"""
|
||||
extract files from the result
|
||||
extract files from the result
|
||||
|
||||
:param result: the result
|
||||
:return: the result, files
|
||||
:param result: the result
|
||||
:return: the result, files
|
||||
"""
|
||||
files = []
|
||||
result = {}
|
||||
@@ -190,7 +189,7 @@ class WorkflowTool(Tool):
|
||||
if isinstance(value, list):
|
||||
has_file = False
|
||||
for item in value:
|
||||
if isinstance(item, dict) and item.get('__variant') == 'FileVar':
|
||||
if isinstance(item, dict) and item.get("__variant") == "FileVar":
|
||||
try:
|
||||
files.append(FileVar(**item))
|
||||
has_file = True
|
||||
@@ -201,4 +200,4 @@ class WorkflowTool(Tool):
|
||||
|
||||
result[key] = value
|
||||
|
||||
return result, files
|
||||
return result, files
|
||||
|
||||
Reference in New Issue
Block a user