add keyword table s3 storage support (#3065)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong
2024-04-01 20:19:30 +08:00
committed by GitHub
parent 5e591fc1b7
commit a94d86da6d
6 changed files with 95 additions and 8 deletions

View File

@@ -1,4 +1,5 @@
import json
import logging
import pickle
from json import JSONDecodeError
@@ -6,6 +7,7 @@ from sqlalchemy import func
from sqlalchemy.dialects.postgresql import JSONB, UUID
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.account import Account
from models.model import App, UploadFile
@@ -441,6 +443,7 @@ class DatasetKeywordTable(db.Model):
id = db.Column(UUID, primary_key=True, server_default=db.text('uuid_generate_v4()'))
dataset_id = db.Column(UUID, nullable=False, unique=True)
keyword_table = db.Column(db.Text, nullable=False)
data_source_type = db.Column(db.String(255), nullable=False, server_default=db.text("'database'::character varying"))
@property
def keyword_table_dict(self):
@@ -454,8 +457,24 @@ class DatasetKeywordTable(db.Model):
if isinstance(node_idxs, list):
dct[keyword] = set(node_idxs)
return dct
return json.loads(self.keyword_table, cls=SetDecoder) if self.keyword_table else None
# get dataset
dataset = Dataset.query.filter_by(
id=self.dataset_id
).first()
if not dataset:
return None
if self.data_source_type == 'database':
return json.loads(self.keyword_table, cls=SetDecoder) if self.keyword_table else None
else:
file_key = 'keyword_files/' + dataset.tenant_id + '/' + self.dataset_id + '.txt'
try:
keyword_table_text = storage.load_once(file_key)
if keyword_table_text:
return json.loads(keyword_table_text.decode('utf-8'), cls=SetDecoder)
return None
except Exception as e:
logging.exception(str(e))
return None
class Embedding(db.Model):