chore(api/services): apply ruff reformatting (#7599)

Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
Bowen Liang
2024-08-26 13:43:57 +08:00
committed by GitHub
parent 979422cdc6
commit 17fd773a30
49 changed files with 2630 additions and 2655 deletions

View File

@@ -9,14 +9,11 @@ from models.account import Account
from models.dataset import Dataset, DatasetQuery, DocumentSegment
default_retrieval_model = {
'search_method': RetrievalMethod.SEMANTIC_SEARCH.value,
'reranking_enable': False,
'reranking_model': {
'reranking_provider_name': '',
'reranking_model_name': ''
},
'top_k': 2,
'score_threshold_enabled': False
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
"reranking_enable": False,
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
"top_k": 2,
"score_threshold_enabled": False,
}
@@ -27,9 +24,9 @@ class HitTestingService:
return {
"query": {
"content": query,
"tsne_position": {'x': 0, 'y': 0},
"tsne_position": {"x": 0, "y": 0},
},
"records": []
"records": [],
}
start = time.perf_counter()
@@ -38,28 +35,28 @@ class HitTestingService:
if not retrieval_model:
retrieval_model = dataset.retrieval_model if dataset.retrieval_model else default_retrieval_model
all_documents = RetrievalService.retrieve(retrival_method=retrieval_model.get('search_method', 'semantic_search'),
dataset_id=dataset.id,
query=cls.escape_query_for_search(query),
top_k=retrieval_model.get('top_k', 2),
score_threshold=retrieval_model.get('score_threshold', .0)
if retrieval_model['score_threshold_enabled'] else None,
reranking_model=retrieval_model.get('reranking_model', None)
if retrieval_model['reranking_enable'] else None,
reranking_mode=retrieval_model.get('reranking_mode')
if retrieval_model.get('reranking_mode') else 'reranking_model',
weights=retrieval_model.get('weights', None),
)
all_documents = RetrievalService.retrieve(
retrival_method=retrieval_model.get("search_method", "semantic_search"),
dataset_id=dataset.id,
query=cls.escape_query_for_search(query),
top_k=retrieval_model.get("top_k", 2),
score_threshold=retrieval_model.get("score_threshold", 0.0)
if retrieval_model["score_threshold_enabled"]
else None,
reranking_model=retrieval_model.get("reranking_model", None)
if retrieval_model["reranking_enable"]
else None,
reranking_mode=retrieval_model.get("reranking_mode")
if retrieval_model.get("reranking_mode")
else "reranking_model",
weights=retrieval_model.get("weights", None),
)
end = time.perf_counter()
logging.debug(f"Hit testing retrieve in {end - start:0.4f} seconds")
dataset_query = DatasetQuery(
dataset_id=dataset.id,
content=query,
source='hit_testing',
created_by_role='account',
created_by=account.id
dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id
)
db.session.add(dataset_query)
@@ -72,14 +69,18 @@ class HitTestingService:
i = 0
records = []
for document in documents:
index_node_id = document.metadata['doc_id']
index_node_id = document.metadata["doc_id"]
segment = db.session.query(DocumentSegment).filter(
DocumentSegment.dataset_id == dataset.id,
DocumentSegment.enabled == True,
DocumentSegment.status == 'completed',
DocumentSegment.index_node_id == index_node_id
).first()
segment = (
db.session.query(DocumentSegment)
.filter(
DocumentSegment.dataset_id == dataset.id,
DocumentSegment.enabled == True,
DocumentSegment.status == "completed",
DocumentSegment.index_node_id == index_node_id,
)
.first()
)
if not segment:
i += 1
@@ -87,7 +88,7 @@ class HitTestingService:
record = {
"segment": segment,
"score": document.metadata.get('score', None),
"score": document.metadata.get("score", None),
}
records.append(record)
@@ -98,15 +99,15 @@ class HitTestingService:
"query": {
"content": query,
},
"records": records
"records": records,
}
@classmethod
def hit_testing_args_check(cls, args):
query = args['query']
query = args["query"]
if not query or len(query) > 250:
raise ValueError('Query is required and cannot exceed 250 characters')
raise ValueError("Query is required and cannot exceed 250 characters")
@staticmethod
def escape_query_for_search(query: str) -> str: