feat: xinference rerank model support (#1615)

This commit is contained in:
takatost
2023-11-25 03:56:00 +08:00
committed by GitHub
parent ea35f1dce1
commit 0e627c920f
9 changed files with 215 additions and 6 deletions

View File

@@ -0,0 +1,58 @@
import logging
from typing import Optional, List
from langchain.schema import Document
from xinference_client.client.restful.restful_client import Client
from core.model_providers.error import LLMBadRequestError
from core.model_providers.models.reranking.base import BaseReranking
from core.model_providers.providers.base import BaseModelProvider
class XinferenceReranking(BaseReranking):
def __init__(self, model_provider: BaseModelProvider, name: str):
self.credentials = model_provider.get_model_credentials(
model_name=name,
model_type=self.type
)
client = Client(self.credentials['server_url'])
super().__init__(model_provider, client, name)
def rerank(self, query: str, documents: List[Document], score_threshold: Optional[float], top_k: Optional[int]) -> Optional[List[Document]]:
docs = []
doc_id = []
for document in documents:
if document.metadata['doc_id'] not in doc_id:
doc_id.append(document.metadata['doc_id'])
docs.append(document.page_content)
model = self.client.get_model(self.credentials['model_uid'])
response = model.rerank(query=query, documents=docs, top_n=top_k)
rerank_documents = []
for idx, result in enumerate(response['results']):
# format document
index = result['index']
rerank_document = Document(
page_content=result['document'],
metadata={
"doc_id": documents[index].metadata['doc_id'],
"doc_hash": documents[index].metadata['doc_hash'],
"document_id": documents[index].metadata['document_id'],
"dataset_id": documents[index].metadata['dataset_id'],
'score': result['relevance_score']
}
)
# score threshold check
if score_threshold is not None:
if result.relevance_score >= score_threshold:
rerank_documents.append(rerank_document)
else:
rerank_documents.append(rerank_document)
return rerank_documents
def handle_exceptions(self, ex: Exception) -> Exception:
return LLMBadRequestError(f"Xinference rerank: {str(ex)}")

View File

@@ -2,11 +2,13 @@ import json
from typing import Type
import requests
from xinference_client.client.restful.restful_client import Client
from core.helper import encrypter
from core.model_providers.models.embedding.xinference_embedding import XinferenceEmbedding
from core.model_providers.models.entity.model_params import KwargRule, ModelKwargsRules, ModelType, ModelMode
from core.model_providers.models.llm.xinference_model import XinferenceModel
from core.model_providers.models.reranking.xinference_reranking import XinferenceReranking
from core.model_providers.providers.base import BaseModelProvider, CredentialsValidateFailedError
from core.model_providers.models.base import BaseProviderModel
@@ -40,6 +42,8 @@ class XinferenceProvider(BaseModelProvider):
model_class = XinferenceModel
elif model_type == ModelType.EMBEDDINGS:
model_class = XinferenceEmbedding
elif model_type == ModelType.RERANKING:
model_class = XinferenceReranking
else:
raise NotImplementedError
@@ -113,6 +117,10 @@ class XinferenceProvider(BaseModelProvider):
)
embedding.embed_query("ping")
elif model_type == ModelType.RERANKING:
rerank_client = Client(credential_kwargs['server_url'])
model = rerank_client.get_model(credential_kwargs['model_uid'])
model.rerank(query="ping", documents=["ping", "pong"], top_n=2)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))

View File

@@ -6,6 +6,7 @@
"model_flexibility": "configurable",
"supported_model_types": [
"text-generation",
"embeddings"
"embeddings",
"reranking"
]
}