chore: refurish python code by applying Pylint linter rules (#8322)

2025-12-10 03:16:51 +08:00 · 2024-09-13 22:42:08 +08:00
parent 1ab81b4972
commit a1104ab97e
126 changed files with 253 additions and 272 deletions
--- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
+++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
@@ -51,7 +51,7 @@ class ElasticSearchVector(BaseVector):
    def _init_client(self, config: ElasticSearchConfig) -> Elasticsearch:
        try:
            parsed_url = urlparse(config.host)
-            if parsed_url.scheme in ["http", "https"]:
+            if parsed_url.scheme in {"http", "https"}:
                hosts = f"{config.host}:{config.port}"
            else:
                hosts = f"http://{config.host}:{config.port}"
@@ -94,7 +94,7 @@ class ElasticSearchVector(BaseVector):
        return uuids

    def text_exists(self, id: str) -> bool:
-        return self._client.exists(index=self._collection_name, id=id).__bool__()
+        return bool(self._client.exists(index=self._collection_name, id=id))

    def delete_by_ids(self, ids: list[str]) -> None:
        for id in ids:
--- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py
+++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py
@@ -35,7 +35,7 @@ class MyScaleVector(BaseVector):
        super().__init__(collection_name)
        self._config = config
        self._metric = metric
-        self._vec_order = SortOrder.ASC if metric.upper() in ["COSINE", "L2"] else SortOrder.DESC
+        self._vec_order = SortOrder.ASC if metric.upper() in {"COSINE", "L2"} else SortOrder.DESC
        self._client = get_client(
            host=config.host,
            port=config.port,
@@ -92,7 +92,7 @@ class MyScaleVector(BaseVector):

    @staticmethod
    def escape_str(value: Any) -> str:
-        return "".join(" " if c in ("\\", "'") else c for c in str(value))
+        return "".join(" " if c in {"\\", "'"} else c for c in str(value))

    def text_exists(self, id: str) -> bool:
        results = self._client.query(f"SELECT id FROM {self._config.database}.{self._collection_name} WHERE id='{id}'")
--- a/api/core/rag/datasource/vdb/oracle/oraclevector.py
+++ b/api/core/rag/datasource/vdb/oracle/oraclevector.py
@@ -223,15 +223,7 @@ class OracleVector(BaseVector):
                words = pseg.cut(query)
                current_entity = ""
                for word, pos in words:
-                    if (
-                        pos == "nr"
-                        or pos == "Ng"
-                        or pos == "eng"
-                        or pos == "nz"
-                        or pos == "n"
-                        or pos == "ORG"
-                        or pos == "v"
-                    ):  # nr: 人名, ns: 地名, nt: 机构名
+                    if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}:  # nr: 人名, ns: 地名, nt: 机构名
                        current_entity += word
                    else:
                        if current_entity:
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -98,17 +98,17 @@ class ExtractProcessor:
                unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
                unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY
                if etl_type == "Unstructured":
-                    if file_extension == ".xlsx" or file_extension == ".xls":
+                    if file_extension in {".xlsx", ".xls"}:
                        extractor = ExcelExtractor(file_path)
                    elif file_extension == ".pdf":
                        extractor = PdfExtractor(file_path)
-                    elif file_extension in [".md", ".markdown"]:
+                    elif file_extension in {".md", ".markdown"}:
                        extractor = (
                            UnstructuredMarkdownExtractor(file_path, unstructured_api_url)
                            if is_automatic
                            else MarkdownExtractor(file_path, autodetect_encoding=True)
                        )
-                    elif file_extension in [".htm", ".html"]:
+                    elif file_extension in {".htm", ".html"}:
                        extractor = HtmlExtractor(file_path)
                    elif file_extension == ".docx":
                        extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
@@ -134,13 +134,13 @@ class ExtractProcessor:
                            else TextExtractor(file_path, autodetect_encoding=True)
                        )
                else:
-                    if file_extension == ".xlsx" or file_extension == ".xls":
+                    if file_extension in {".xlsx", ".xls"}:
                        extractor = ExcelExtractor(file_path)
                    elif file_extension == ".pdf":
                        extractor = PdfExtractor(file_path)
-                    elif file_extension in [".md", ".markdown"]:
+                    elif file_extension in {".md", ".markdown"}:
                        extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
-                    elif file_extension in [".htm", ".html"]:
+                    elif file_extension in {".htm", ".html"}:
                        extractor = HtmlExtractor(file_path)
                    elif file_extension == ".docx":
                        extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
--- a/api/core/rag/extractor/firecrawl/firecrawl_app.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_app.py
@@ -32,7 +32,7 @@ class FirecrawlApp:
            else:
                raise Exception(f'Failed to scrape URL. Error: {response["error"]}')

-        elif response.status_code in [402, 409, 500]:
+        elif response.status_code in {402, 409, 500}:
            error_message = response.json().get("error", "Unknown error occurred")
            raise Exception(f"Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}")
        else:
--- a/api/core/rag/extractor/notion_extractor.py
+++ b/api/core/rag/extractor/notion_extractor.py
@@ -103,12 +103,12 @@ class NotionExtractor(BaseExtractor):
                    multi_select_list = property_value[type]
                    for multi_select in multi_select_list:
                        value.append(multi_select["name"])
-                elif type == "rich_text" or type == "title":
+                elif type in {"rich_text", "title"}:
                    if len(property_value[type]) > 0:
                        value = property_value[type][0]["plain_text"]
                    else:
                        value = ""
-                elif type == "select" or type == "status":
+                elif type in {"select", "status"}:
                    if property_value[type]:
                        value = property_value[type]["name"]
                    else:
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@@ -115,7 +115,7 @@ class DatasetRetrieval:

            available_datasets.append(dataset)
        all_documents = []
-        user_from = "account" if invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER] else "end_user"
+        user_from = "account" if invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end_user"
        if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE:
            all_documents = self.single_retrieve(
                app_id,
--- a/api/core/rag/splitter/text_splitter.py
+++ b/api/core/rag/splitter/text_splitter.py
@@ -35,7 +35,7 @@ def _split_text_with_regex(text: str, separator: str, keep_separator: bool) -> l
            splits = re.split(separator, text)
    else:
        splits = list(text)
-    return [s for s in splits if (s != "" and s != "\n")]
+    return [s for s in splits if (s not in {"", "\n"})]


 class TextSplitter(BaseDocumentTransformer, ABC):