feat: backend model load balancing support (#4927)

This commit is contained in:
takatost
2024-06-05 00:13:04 +08:00
committed by GitHub
parent 52ec152dd3
commit d1dbbc1e33
47 changed files with 2191 additions and 256 deletions

View File

@@ -73,10 +73,8 @@ class ModelInvocationUtils:
if not model_instance:
raise InvokeModelError('Model not found')
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
# get tokens
tokens = llm_model.get_num_tokens(model_instance.model, model_instance.credentials, prompt_messages)
tokens = model_instance.get_llm_num_tokens(prompt_messages)
return tokens
@@ -108,13 +106,8 @@ class ModelInvocationUtils:
tenant_id=tenant_id, model_type=ModelType.LLM,
)
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
# get model credentials
model_credentials = model_instance.credentials
# get prompt tokens
prompt_tokens = llm_model.get_num_tokens(model_instance.model, model_credentials, prompt_messages)
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
model_parameters = {
'temperature': 0.8,
@@ -144,9 +137,7 @@ class ModelInvocationUtils:
db.session.commit()
try:
response: LLMResult = llm_model.invoke(
model=model_instance.model,
credentials=model_credentials,
response: LLMResult = model_instance.invoke_llm(
prompt_messages=prompt_messages,
model_parameters=model_parameters,
tools=[], stop=[], stream=False, user=user_id, callbacks=[]
@@ -176,4 +167,4 @@ class ModelInvocationUtils:
db.session.commit()
return response
return response