feat: mypy for all type check (#10921)

This commit is contained in:
yihong
2024-12-24 18:38:51 +08:00
committed by GitHub
parent c91e8b1737
commit 56e15d09a9
584 changed files with 3975 additions and 2826 deletions

View File

@@ -39,7 +39,7 @@ class WeightRerankRunner(BaseRerankRunner):
unique_documents = []
doc_ids = set()
for document in documents:
if document.metadata["doc_id"] not in doc_ids:
if document.metadata is not None and document.metadata["doc_id"] not in doc_ids:
doc_ids.add(document.metadata["doc_id"])
unique_documents.append(document)
@@ -56,10 +56,11 @@ class WeightRerankRunner(BaseRerankRunner):
)
if score_threshold and score < score_threshold:
continue
document.metadata["score"] = score
rerank_documents.append(document)
if document.metadata is not None:
document.metadata["score"] = score
rerank_documents.append(document)
rerank_documents.sort(key=lambda x: x.metadata["score"], reverse=True)
rerank_documents.sort(key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
return rerank_documents[:top_n] if top_n else rerank_documents
def _calculate_keyword_score(self, query: str, documents: list[Document]) -> list[float]:
@@ -76,8 +77,9 @@ class WeightRerankRunner(BaseRerankRunner):
for document in documents:
# get the document keywords
document_keywords = keyword_table_handler.extract_keywords(document.page_content, None)
document.metadata["keywords"] = document_keywords
documents_keywords.append(document_keywords)
if document.metadata is not None:
document.metadata["keywords"] = document_keywords
documents_keywords.append(document_keywords)
# Counter query keywords(TF)
query_keyword_counts = Counter(query_keywords)
@@ -162,7 +164,7 @@ class WeightRerankRunner(BaseRerankRunner):
query_vector = cache_embedding.embed_query(query)
for document in documents:
# calculate cosine similarity
if "score" in document.metadata:
if document.metadata and "score" in document.metadata:
query_vector_scores.append(document.metadata["score"])
else:
# transform to NumPy