Fix/ignore economy dataset (#1043)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong
2023-08-29 03:37:45 +08:00
committed by GitHub
parent f9bec1edf8
commit a55ba6e614
13 changed files with 320 additions and 205 deletions

View File

@@ -67,12 +67,13 @@ class DatesetDocumentStore:
if max_position is None:
max_position = 0
embedding_model = ModelFactory.get_embedding_model(
tenant_id=self._dataset.tenant_id,
model_provider_name=self._dataset.embedding_model_provider,
model_name=self._dataset.embedding_model
)
embedding_model = None
if self._dataset.indexing_technique == 'high_quality':
embedding_model = ModelFactory.get_embedding_model(
tenant_id=self._dataset.tenant_id,
model_provider_name=self._dataset.embedding_model_provider,
model_name=self._dataset.embedding_model
)
for doc in docs:
if not isinstance(doc, Document):
@@ -88,7 +89,7 @@ class DatesetDocumentStore:
)
# calc embedding use tokens
tokens = embedding_model.get_num_tokens(doc.page_content)
tokens = embedding_model.get_num_tokens(doc.page_content) if embedding_model else 0
if not segment_document:
max_position += 1