chore: refurish python code by applying Pylint linter rules (#8322)

2025-12-08 02:16:51 +08:00 · 2024-09-13 22:42:08 +08:00
parent 1ab81b4972
commit a1104ab97e
126 changed files with 253 additions and 272 deletions
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -98,17 +98,17 @@ class ExtractProcessor:
                unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
                unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY
                if etl_type == "Unstructured":
-                    if file_extension == ".xlsx" or file_extension == ".xls":
+                    if file_extension in {".xlsx", ".xls"}:
                        extractor = ExcelExtractor(file_path)
                    elif file_extension == ".pdf":
                        extractor = PdfExtractor(file_path)
-                    elif file_extension in [".md", ".markdown"]:
+                    elif file_extension in {".md", ".markdown"}:
                        extractor = (
                            UnstructuredMarkdownExtractor(file_path, unstructured_api_url)
                            if is_automatic
                            else MarkdownExtractor(file_path, autodetect_encoding=True)
                        )
-                    elif file_extension in [".htm", ".html"]:
+                    elif file_extension in {".htm", ".html"}:
                        extractor = HtmlExtractor(file_path)
                    elif file_extension == ".docx":
                        extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
@@ -134,13 +134,13 @@ class ExtractProcessor:
                            else TextExtractor(file_path, autodetect_encoding=True)
                        )
                else:
-                    if file_extension == ".xlsx" or file_extension == ".xls":
+                    if file_extension in {".xlsx", ".xls"}:
                        extractor = ExcelExtractor(file_path)
                    elif file_extension == ".pdf":
                        extractor = PdfExtractor(file_path)
-                    elif file_extension in [".md", ".markdown"]:
+                    elif file_extension in {".md", ".markdown"}:
                        extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
-                    elif file_extension in [".htm", ".html"]:
+                    elif file_extension in {".htm", ".html"}:
                        extractor = HtmlExtractor(file_path)
                    elif file_extension == ".docx":
                        extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
--- a/api/core/rag/extractor/firecrawl/firecrawl_app.py
+++ b/api/core/rag/extractor/firecrawl/firecrawl_app.py
@@ -32,7 +32,7 @@ class FirecrawlApp:
            else:
                raise Exception(f'Failed to scrape URL. Error: {response["error"]}')

-        elif response.status_code in [402, 409, 500]:
+        elif response.status_code in {402, 409, 500}:
            error_message = response.json().get("error", "Unknown error occurred")
            raise Exception(f"Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}")
        else:
--- a/api/core/rag/extractor/notion_extractor.py
+++ b/api/core/rag/extractor/notion_extractor.py
@@ -103,12 +103,12 @@ class NotionExtractor(BaseExtractor):
                    multi_select_list = property_value[type]
                    for multi_select in multi_select_list:
                        value.append(multi_select["name"])
-                elif type == "rich_text" or type == "title":
+                elif type in {"rich_text", "title"}:
                    if len(property_value[type]) > 0:
                        value = property_value[type][0]["plain_text"]
                    else:
                        value = ""
-                elif type == "select" or type == "status":
+                elif type in {"select", "status"}:
                    if property_value[type]:
                        value = property_value[type]["name"]
                    else: