Feat/firecrawl data source (#5232)

Co-authored-by: Nicolas <nicolascamara29@gmail.com>
Co-authored-by: chenhe <guchenhe@gmail.com>
Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
Jyong
2024-06-15 02:46:02 +08:00
committed by GitHub
parent 918ebe1620
commit ba5f8afaa8
36 changed files with 1174 additions and 64 deletions

View File

@@ -0,0 +1,33 @@
import os
from unittest import mock
from core.rag.extractor.firecrawl.firecrawl_app import FirecrawlApp
from core.rag.extractor.firecrawl.firecrawl_web_extractor import FirecrawlWebExtractor
from core.rag.models.document import Document
from tests.unit_tests.core.rag.extractor.test_notion_extractor import _mock_response
def test_firecrawl_web_extractor_crawl_mode(mocker):
url = "https://firecrawl.dev"
api_key = os.getenv('FIRECRAWL_API_KEY') or 'fc-'
base_url = 'https://api.firecrawl.dev'
firecrawl_app = FirecrawlApp(api_key=api_key,
base_url=base_url)
params = {
'crawlerOptions': {
"includes": [],
"excludes": [],
"generateImgAltText": True,
"maxDepth": 1,
"limit": 1,
'returnOnlyUrls': False,
}
}
mocked_firecrawl = {
"jobId": "test",
}
mocker.patch("requests.post", return_value=_mock_response(mocked_firecrawl))
job_id = firecrawl_app.crawl_url(url, params)
print(job_id)
assert isinstance(job_id, str)

View File