Optimize webscraper (#4392)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
Co-authored-by: crazywoola <427733928@qq.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
Charlie.Wei
2024-05-15 15:23:16 +08:00
committed by GitHub
parent c0fe414e0a
commit 97b65f9b4b
4 changed files with 61 additions and 11 deletions

View File

@@ -7,9 +7,9 @@ from core.tools.tool.builtin_tool import BuiltinTool
class WebscraperTool(BuiltinTool):
def _invoke(self,
user_id: str,
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
user_id: str,
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
"""
invoke tools
"""
@@ -18,12 +18,15 @@ class WebscraperTool(BuiltinTool):
user_agent = tool_parameters.get('user_agent', '')
if not url:
return self.create_text_message('Please input url')
# get webpage
result = self.get_url(url, user_agent=user_agent)
# summarize and return
return self.create_text_message(self.summary(user_id=user_id, content=result))
if tool_parameters.get('generate_summary'):
# summarize and return
return self.create_text_message(self.summary(user_id=user_id, content=result))
else:
# return full webpage
return self.create_text_message(result)
except Exception as e:
raise ToolInvokeError(str(e))

View File

@@ -38,3 +38,23 @@ parameters:
pt_BR: used for identifying the browser.
form: form
default: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.1000.0 Safari/537.36
- name: generate_summary
type: boolean
required: false
label:
en_US: Whether to generate summary
zh_Hans: 是否生成摘要
human_description:
en_US: If true, the crawler will only return the page summary content.
zh_Hans: 如果启用,爬虫将仅返回页面摘要内容。
form: form
options:
- value: true
label:
en_US: Yes
zh_Hans:
- value: false
label:
en_US: No
zh_Hans:
default: false