Feat/add retriever rerank (#1560)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong
2023-11-17 22:13:37 +08:00
committed by GitHub
parent a4f37220a0
commit 4588831bff
44 changed files with 1899 additions and 164 deletions

View File

@@ -49,14 +49,14 @@ class IndexingRunner:
if not dataset:
raise ValueError("no dataset found")
# load file
text_docs = self._load_data(dataset_document)
# get the process rule
processing_rule = db.session.query(DatasetProcessRule). \
filter(DatasetProcessRule.id == dataset_document.dataset_process_rule_id). \
first()
# load file
text_docs = self._load_data(dataset_document)
# get splitter
splitter = self._get_splitter(processing_rule)
@@ -380,7 +380,7 @@ class IndexingRunner:
"preview": preview_texts
}
def _load_data(self, dataset_document: DatasetDocument) -> List[Document]:
def _load_data(self, dataset_document: DatasetDocument, automatic: bool = False) -> List[Document]:
# load file
if dataset_document.data_source_type not in ["upload_file", "notion_import"]:
return []
@@ -396,7 +396,7 @@ class IndexingRunner:
one_or_none()
if file_detail:
text_docs = FileExtractor.load(file_detail)
text_docs = FileExtractor.load(file_detail, is_automatic=False)
elif dataset_document.data_source_type == 'notion_import':
loader = NotionLoader.from_document(dataset_document)
text_docs = loader.load()