feat(large_language_model): Adds plugin-based token counting configuration option (#17706)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: Yeuoly <admin@srmxy.cn>
This commit is contained in:
-LAN-
2025-04-09 21:52:58 +09:00
committed by GitHub
parent 8b3be4224d
commit d3157b46ee
10 changed files with 32 additions and 60 deletions

View File

@@ -53,20 +53,6 @@ class AgentChatAppRunner(AppRunner):
query = application_generate_entity.query
files = application_generate_entity.files
# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=dict(inputs),
files=list(files),
query=query,
)
memory = None
if application_generate_entity.conversation_id:
# get memory of conversation (read-only)

View File

@@ -61,20 +61,6 @@ class ChatAppRunner(AppRunner):
)
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)
memory = None
if application_generate_entity.conversation_id:
# get memory of conversation (read-only)

View File

@@ -54,20 +54,6 @@ class CompletionAppRunner(AppRunner):
)
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)
# organize all inputs and template to prompt messages
# Include: prompt template, inputs, query(optional), files(optional)
prompt_messages, stop = self.organize_prompt_messages(

View File

@@ -192,7 +192,7 @@ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[Pr
```
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens and ensure environment variable `PLUGIN_BASED_TOKEN_COUNTING_ENABLED` is set to `true`, This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
- Model Credentials Validation

View File

@@ -179,7 +179,7 @@ provider_credential_schema:
"""
```
有时候也许你不需要直接返回0所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens这个方法位于`AIModel`基类中它会使用GPT2的Tokenizer进行计算但是只能作为替代方法并不完全准确。
有时候也许你不需要直接返回0所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens并确保环境变量`PLUGIN_BASED_TOKEN_COUNTING_ENABLED`设置为`true`这个方法位于`AIModel`基类中它会使用GPT2的Tokenizer进行计算但是只能作为替代方法并不完全准确。
- 模型凭据校验

View File

@@ -295,18 +295,20 @@ class LargeLanguageModel(AIModel):
:param tools: tools for tool calling
:return:
"""
plugin_model_manager = PluginModelManager()
return plugin_model_manager.get_llm_num_tokens(
tenant_id=self.tenant_id,
user_id="unknown",
plugin_id=self.plugin_id,
provider=self.provider_name,
model_type=self.model_type.value,
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
tools=tools,
)
if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED:
plugin_model_manager = PluginModelManager()
return plugin_model_manager.get_llm_num_tokens(
tenant_id=self.tenant_id,
user_id="unknown",
plugin_id=self.plugin_id,
provider=self.provider_name,
model_type=self.model_type.value,
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
tools=tools,
)
return 0
def _calc_response_usage(
self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int