chore(api/core): apply ruff reformatting (#7624)

This commit is contained in:
Bowen Liang
2024-09-10 17:00:20 +08:00
committed by GitHub
parent 178730266d
commit 2cf1187b32
724 changed files with 21180 additions and 21123 deletions

View File

@@ -8,13 +8,13 @@ from core.tools.provider.builtin_tool_provider import BuiltinToolProviderControl
class SpiderProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
try:
app = Spider(api_key=credentials['spider_api_key'])
app.scrape_url(url='https://spider.cloud')
app = Spider(api_key=credentials["spider_api_key"])
app.scrape_url(url="https://spider.cloud")
except AttributeError as e:
# Handle cases where NoneType is not iterable, which might indicate API issues
if 'NoneType' in str(e) and 'not iterable' in str(e):
raise ToolProviderCredentialValidationError('API is currently down, try again in 15 minutes', str(e))
if "NoneType" in str(e) and "not iterable" in str(e):
raise ToolProviderCredentialValidationError("API is currently down, try again in 15 minutes", str(e))
else:
raise ToolProviderCredentialValidationError('An unexpected error occurred.', str(e))
raise ToolProviderCredentialValidationError("An unexpected error occurred.", str(e))
except Exception as e:
raise ToolProviderCredentialValidationError('An unexpected error occurred.', str(e))
raise ToolProviderCredentialValidationError("An unexpected error occurred.", str(e))

View File

@@ -65,9 +65,7 @@ class Spider:
:return: The JSON response or the raw response stream if stream is True.
"""
headers = self._prepare_headers(content_type)
response = self._post_request(
f"https://api.spider.cloud/v1/{endpoint}", data, headers, stream
)
response = self._post_request(f"https://api.spider.cloud/v1/{endpoint}", data, headers, stream)
if stream:
return response
@@ -76,9 +74,7 @@ class Spider:
else:
self._handle_error(response, f"post to {endpoint}")
def api_get(
self, endpoint: str, stream: bool, content_type: str = "application/json"
):
def api_get(self, endpoint: str, stream: bool, content_type: str = "application/json"):
"""
Send a GET request to the specified endpoint.
@@ -86,9 +82,7 @@ class Spider:
:return: The JSON decoded response.
"""
headers = self._prepare_headers(content_type)
response = self._get_request(
f"https://api.spider.cloud/v1/{endpoint}", headers, stream
)
response = self._get_request(f"https://api.spider.cloud/v1/{endpoint}", headers, stream)
if response.status_code == 200:
return response.json()
else:
@@ -120,14 +114,12 @@ class Spider:
# Add { "return_format": "markdown" } to the params if not already present
if "return_format" not in params:
params["return_format"] = "markdown"
params["return_format"] = "markdown"
# Set limit to 1
params["limit"] = 1
return self.api_post(
"crawl", {"url": url, **(params or {})}, stream, content_type
)
return self.api_post("crawl", {"url": url, **(params or {})}, stream, content_type)
def crawl_url(
self,
@@ -150,9 +142,7 @@ class Spider:
if "return_format" not in params:
params["return_format"] = "markdown"
return self.api_post(
"crawl", {"url": url, **(params or {})}, stream, content_type
)
return self.api_post("crawl", {"url": url, **(params or {})}, stream, content_type)
def links(
self,
@@ -168,9 +158,7 @@ class Spider:
:param params: Optional parameters for the link retrieval request.
:return: JSON response containing the links.
"""
return self.api_post(
"links", {"url": url, **(params or {})}, stream, content_type
)
return self.api_post("links", {"url": url, **(params or {})}, stream, content_type)
def extract_contacts(
self,
@@ -207,9 +195,7 @@ class Spider:
:param params: Optional parameters to guide the labeling process.
:return: JSON response with labeled data.
"""
return self.api_post(
"pipeline/label", {"url": url, **(params or {})}, stream, content_type
)
return self.api_post("pipeline/label", {"url": url, **(params or {})}, stream, content_type)
def _prepare_headers(self, content_type: str = "application/json"):
return {
@@ -230,10 +216,6 @@ class Spider:
def _handle_error(self, response, action):
if response.status_code in [402, 409, 500]:
error_message = response.json().get("error", "Unknown error occurred")
raise Exception(
f"Failed to {action}. Status code: {response.status_code}. Error: {error_message}"
)
raise Exception(f"Failed to {action}. Status code: {response.status_code}. Error: {error_message}")
else:
raise Exception(
f"Unexpected error occurred while trying to {action}. Status code: {response.status_code}"
)
raise Exception(f"Unexpected error occurred while trying to {action}. Status code: {response.status_code}")

View File

@@ -6,41 +6,43 @@ from core.tools.tool.builtin_tool import BuiltinTool
class ScrapeTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
def _invoke(
self, user_id: str, tool_parameters: dict[str, Any]
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
# initialize the app object with the api key
app = Spider(api_key=self.runtime.credentials['spider_api_key'])
app = Spider(api_key=self.runtime.credentials["spider_api_key"])
url = tool_parameters["url"]
mode = tool_parameters["mode"]
url = tool_parameters['url']
mode = tool_parameters['mode']
options = {
'limit': tool_parameters.get('limit', 0),
'depth': tool_parameters.get('depth', 0),
'blacklist': tool_parameters.get('blacklist', '').split(',') if tool_parameters.get('blacklist') else [],
'whitelist': tool_parameters.get('whitelist', '').split(',') if tool_parameters.get('whitelist') else [],
'readability': tool_parameters.get('readability', False),
"limit": tool_parameters.get("limit", 0),
"depth": tool_parameters.get("depth", 0),
"blacklist": tool_parameters.get("blacklist", "").split(",") if tool_parameters.get("blacklist") else [],
"whitelist": tool_parameters.get("whitelist", "").split(",") if tool_parameters.get("whitelist") else [],
"readability": tool_parameters.get("readability", False),
}
result = ""
try:
if mode == 'scrape':
if mode == "scrape":
scrape_result = app.scrape_url(
url=url,
url=url,
params=options,
)
for i in scrape_result:
result += "URL: " + i.get('url', '') + "\n"
result += "CONTENT: " + i.get('content', '') + "\n\n"
elif mode == 'crawl':
result += "URL: " + i.get("url", "") + "\n"
result += "CONTENT: " + i.get("content", "") + "\n\n"
elif mode == "crawl":
crawl_result = app.crawl_url(
url=tool_parameters['url'],
url=tool_parameters["url"],
params=options,
)
for i in crawl_result:
result += "URL: " + i.get('url', '') + "\n"
result += "CONTENT: " + i.get('content', '') + "\n\n"
result += "URL: " + i.get("url", "") + "\n"
result += "CONTENT: " + i.get("content", "") + "\n\n"
except Exception as e:
return self.create_text_message("An error occurred", str(e))