mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-10 19:36:53 +08:00
feat: backend model load balancing support (#4927)
This commit is contained in:
@@ -73,10 +73,8 @@ class ModelInvocationUtils:
|
||||
if not model_instance:
|
||||
raise InvokeModelError('Model not found')
|
||||
|
||||
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
|
||||
|
||||
# get tokens
|
||||
tokens = llm_model.get_num_tokens(model_instance.model, model_instance.credentials, prompt_messages)
|
||||
tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
return tokens
|
||||
|
||||
@@ -108,13 +106,8 @@ class ModelInvocationUtils:
|
||||
tenant_id=tenant_id, model_type=ModelType.LLM,
|
||||
)
|
||||
|
||||
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
|
||||
|
||||
# get model credentials
|
||||
model_credentials = model_instance.credentials
|
||||
|
||||
# get prompt tokens
|
||||
prompt_tokens = llm_model.get_num_tokens(model_instance.model, model_credentials, prompt_messages)
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
model_parameters = {
|
||||
'temperature': 0.8,
|
||||
@@ -144,9 +137,7 @@ class ModelInvocationUtils:
|
||||
db.session.commit()
|
||||
|
||||
try:
|
||||
response: LLMResult = llm_model.invoke(
|
||||
model=model_instance.model,
|
||||
credentials=model_credentials,
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=prompt_messages,
|
||||
model_parameters=model_parameters,
|
||||
tools=[], stop=[], stream=False, user=user_id, callbacks=[]
|
||||
@@ -176,4 +167,4 @@ class ModelInvocationUtils:
|
||||
|
||||
db.session.commit()
|
||||
|
||||
return response
|
||||
return response
|
||||
|
||||
Reference in New Issue
Block a user