mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-10 03:16:51 +08:00
chore: refurish python code by applying Pylint linter rules (#8322)
This commit is contained in:
@@ -51,7 +51,7 @@ class ElasticSearchVector(BaseVector):
|
||||
def _init_client(self, config: ElasticSearchConfig) -> Elasticsearch:
|
||||
try:
|
||||
parsed_url = urlparse(config.host)
|
||||
if parsed_url.scheme in ["http", "https"]:
|
||||
if parsed_url.scheme in {"http", "https"}:
|
||||
hosts = f"{config.host}:{config.port}"
|
||||
else:
|
||||
hosts = f"http://{config.host}:{config.port}"
|
||||
@@ -94,7 +94,7 @@ class ElasticSearchVector(BaseVector):
|
||||
return uuids
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
return self._client.exists(index=self._collection_name, id=id).__bool__()
|
||||
return bool(self._client.exists(index=self._collection_name, id=id))
|
||||
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
for id in ids:
|
||||
|
||||
@@ -35,7 +35,7 @@ class MyScaleVector(BaseVector):
|
||||
super().__init__(collection_name)
|
||||
self._config = config
|
||||
self._metric = metric
|
||||
self._vec_order = SortOrder.ASC if metric.upper() in ["COSINE", "L2"] else SortOrder.DESC
|
||||
self._vec_order = SortOrder.ASC if metric.upper() in {"COSINE", "L2"} else SortOrder.DESC
|
||||
self._client = get_client(
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
@@ -92,7 +92,7 @@ class MyScaleVector(BaseVector):
|
||||
|
||||
@staticmethod
|
||||
def escape_str(value: Any) -> str:
|
||||
return "".join(" " if c in ("\\", "'") else c for c in str(value))
|
||||
return "".join(" " if c in {"\\", "'"} else c for c in str(value))
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
results = self._client.query(f"SELECT id FROM {self._config.database}.{self._collection_name} WHERE id='{id}'")
|
||||
|
||||
@@ -223,15 +223,7 @@ class OracleVector(BaseVector):
|
||||
words = pseg.cut(query)
|
||||
current_entity = ""
|
||||
for word, pos in words:
|
||||
if (
|
||||
pos == "nr"
|
||||
or pos == "Ng"
|
||||
or pos == "eng"
|
||||
or pos == "nz"
|
||||
or pos == "n"
|
||||
or pos == "ORG"
|
||||
or pos == "v"
|
||||
): # nr: 人名, ns: 地名, nt: 机构名
|
||||
if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}: # nr: 人名, ns: 地名, nt: 机构名
|
||||
current_entity += word
|
||||
else:
|
||||
if current_entity:
|
||||
|
||||
@@ -98,17 +98,17 @@ class ExtractProcessor:
|
||||
unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
|
||||
unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY
|
||||
if etl_type == "Unstructured":
|
||||
if file_extension == ".xlsx" or file_extension == ".xls":
|
||||
if file_extension in {".xlsx", ".xls"}:
|
||||
extractor = ExcelExtractor(file_path)
|
||||
elif file_extension == ".pdf":
|
||||
extractor = PdfExtractor(file_path)
|
||||
elif file_extension in [".md", ".markdown"]:
|
||||
elif file_extension in {".md", ".markdown"}:
|
||||
extractor = (
|
||||
UnstructuredMarkdownExtractor(file_path, unstructured_api_url)
|
||||
if is_automatic
|
||||
else MarkdownExtractor(file_path, autodetect_encoding=True)
|
||||
)
|
||||
elif file_extension in [".htm", ".html"]:
|
||||
elif file_extension in {".htm", ".html"}:
|
||||
extractor = HtmlExtractor(file_path)
|
||||
elif file_extension == ".docx":
|
||||
extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
|
||||
@@ -134,13 +134,13 @@ class ExtractProcessor:
|
||||
else TextExtractor(file_path, autodetect_encoding=True)
|
||||
)
|
||||
else:
|
||||
if file_extension == ".xlsx" or file_extension == ".xls":
|
||||
if file_extension in {".xlsx", ".xls"}:
|
||||
extractor = ExcelExtractor(file_path)
|
||||
elif file_extension == ".pdf":
|
||||
extractor = PdfExtractor(file_path)
|
||||
elif file_extension in [".md", ".markdown"]:
|
||||
elif file_extension in {".md", ".markdown"}:
|
||||
extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
|
||||
elif file_extension in [".htm", ".html"]:
|
||||
elif file_extension in {".htm", ".html"}:
|
||||
extractor = HtmlExtractor(file_path)
|
||||
elif file_extension == ".docx":
|
||||
extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
|
||||
|
||||
@@ -32,7 +32,7 @@ class FirecrawlApp:
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||
|
||||
elif response.status_code in [402, 409, 500]:
|
||||
elif response.status_code in {402, 409, 500}:
|
||||
error_message = response.json().get("error", "Unknown error occurred")
|
||||
raise Exception(f"Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}")
|
||||
else:
|
||||
|
||||
@@ -103,12 +103,12 @@ class NotionExtractor(BaseExtractor):
|
||||
multi_select_list = property_value[type]
|
||||
for multi_select in multi_select_list:
|
||||
value.append(multi_select["name"])
|
||||
elif type == "rich_text" or type == "title":
|
||||
elif type in {"rich_text", "title"}:
|
||||
if len(property_value[type]) > 0:
|
||||
value = property_value[type][0]["plain_text"]
|
||||
else:
|
||||
value = ""
|
||||
elif type == "select" or type == "status":
|
||||
elif type in {"select", "status"}:
|
||||
if property_value[type]:
|
||||
value = property_value[type]["name"]
|
||||
else:
|
||||
|
||||
@@ -115,7 +115,7 @@ class DatasetRetrieval:
|
||||
|
||||
available_datasets.append(dataset)
|
||||
all_documents = []
|
||||
user_from = "account" if invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER] else "end_user"
|
||||
user_from = "account" if invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end_user"
|
||||
if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE:
|
||||
all_documents = self.single_retrieve(
|
||||
app_id,
|
||||
|
||||
@@ -35,7 +35,7 @@ def _split_text_with_regex(text: str, separator: str, keep_separator: bool) -> l
|
||||
splits = re.split(separator, text)
|
||||
else:
|
||||
splits = list(text)
|
||||
return [s for s in splits if (s != "" and s != "\n")]
|
||||
return [s for s in splits if (s not in {"", "\n"})]
|
||||
|
||||
|
||||
class TextSplitter(BaseDocumentTransformer, ABC):
|
||||
|
||||
Reference in New Issue
Block a user