chore: refurish python code by applying Pylint linter rules (#8322)

This commit is contained in:
Bowen Liang
2024-09-13 22:42:08 +08:00
committed by GitHub
parent 1ab81b4972
commit a1104ab97e
126 changed files with 253 additions and 272 deletions

View File

@@ -51,7 +51,7 @@ class ElasticSearchVector(BaseVector):
def _init_client(self, config: ElasticSearchConfig) -> Elasticsearch:
try:
parsed_url = urlparse(config.host)
if parsed_url.scheme in ["http", "https"]:
if parsed_url.scheme in {"http", "https"}:
hosts = f"{config.host}:{config.port}"
else:
hosts = f"http://{config.host}:{config.port}"
@@ -94,7 +94,7 @@ class ElasticSearchVector(BaseVector):
return uuids
def text_exists(self, id: str) -> bool:
return self._client.exists(index=self._collection_name, id=id).__bool__()
return bool(self._client.exists(index=self._collection_name, id=id))
def delete_by_ids(self, ids: list[str]) -> None:
for id in ids:

View File

@@ -35,7 +35,7 @@ class MyScaleVector(BaseVector):
super().__init__(collection_name)
self._config = config
self._metric = metric
self._vec_order = SortOrder.ASC if metric.upper() in ["COSINE", "L2"] else SortOrder.DESC
self._vec_order = SortOrder.ASC if metric.upper() in {"COSINE", "L2"} else SortOrder.DESC
self._client = get_client(
host=config.host,
port=config.port,
@@ -92,7 +92,7 @@ class MyScaleVector(BaseVector):
@staticmethod
def escape_str(value: Any) -> str:
return "".join(" " if c in ("\\", "'") else c for c in str(value))
return "".join(" " if c in {"\\", "'"} else c for c in str(value))
def text_exists(self, id: str) -> bool:
results = self._client.query(f"SELECT id FROM {self._config.database}.{self._collection_name} WHERE id='{id}'")

View File

@@ -223,15 +223,7 @@ class OracleVector(BaseVector):
words = pseg.cut(query)
current_entity = ""
for word, pos in words:
if (
pos == "nr"
or pos == "Ng"
or pos == "eng"
or pos == "nz"
or pos == "n"
or pos == "ORG"
or pos == "v"
): # nr: 人名, ns: 地名, nt: 机构名
if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}: # nr: 人名, ns: 地名, nt: 机构名
current_entity += word
else:
if current_entity:

View File

@@ -98,17 +98,17 @@ class ExtractProcessor:
unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY
if etl_type == "Unstructured":
if file_extension == ".xlsx" or file_extension == ".xls":
if file_extension in {".xlsx", ".xls"}:
extractor = ExcelExtractor(file_path)
elif file_extension == ".pdf":
extractor = PdfExtractor(file_path)
elif file_extension in [".md", ".markdown"]:
elif file_extension in {".md", ".markdown"}:
extractor = (
UnstructuredMarkdownExtractor(file_path, unstructured_api_url)
if is_automatic
else MarkdownExtractor(file_path, autodetect_encoding=True)
)
elif file_extension in [".htm", ".html"]:
elif file_extension in {".htm", ".html"}:
extractor = HtmlExtractor(file_path)
elif file_extension == ".docx":
extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
@@ -134,13 +134,13 @@ class ExtractProcessor:
else TextExtractor(file_path, autodetect_encoding=True)
)
else:
if file_extension == ".xlsx" or file_extension == ".xls":
if file_extension in {".xlsx", ".xls"}:
extractor = ExcelExtractor(file_path)
elif file_extension == ".pdf":
extractor = PdfExtractor(file_path)
elif file_extension in [".md", ".markdown"]:
elif file_extension in {".md", ".markdown"}:
extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
elif file_extension in [".htm", ".html"]:
elif file_extension in {".htm", ".html"}:
extractor = HtmlExtractor(file_path)
elif file_extension == ".docx":
extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)

View File

@@ -32,7 +32,7 @@ class FirecrawlApp:
else:
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
elif response.status_code in [402, 409, 500]:
elif response.status_code in {402, 409, 500}:
error_message = response.json().get("error", "Unknown error occurred")
raise Exception(f"Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}")
else:

View File

@@ -103,12 +103,12 @@ class NotionExtractor(BaseExtractor):
multi_select_list = property_value[type]
for multi_select in multi_select_list:
value.append(multi_select["name"])
elif type == "rich_text" or type == "title":
elif type in {"rich_text", "title"}:
if len(property_value[type]) > 0:
value = property_value[type][0]["plain_text"]
else:
value = ""
elif type == "select" or type == "status":
elif type in {"select", "status"}:
if property_value[type]:
value = property_value[type]["name"]
else:

View File

@@ -115,7 +115,7 @@ class DatasetRetrieval:
available_datasets.append(dataset)
all_documents = []
user_from = "account" if invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER] else "end_user"
user_from = "account" if invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end_user"
if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE:
all_documents = self.single_retrieve(
app_id,

View File

@@ -35,7 +35,7 @@ def _split_text_with_regex(text: str, separator: str, keep_separator: bool) -> l
splits = re.split(separator, text)
else:
splits = list(text)
return [s for s in splits if (s != "" and s != "\n")]
return [s for s in splits if (s not in {"", "\n"})]
class TextSplitter(BaseDocumentTransformer, ABC):