mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-25 10:43:01 +08:00
feat(website-crawl): add jina reader as additional alternative for website crawling (#8761)
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import s from './index.module.css'
|
||||
import NoData from './no-data'
|
||||
import Firecrawl from './firecrawl'
|
||||
import JinaReader from './jina-reader'
|
||||
import cn from '@/utils/classnames'
|
||||
import { useModalContext } from '@/context/modal-context'
|
||||
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
||||
import { fetchDataSources } from '@/service/datasets'
|
||||
@@ -12,6 +16,7 @@ type Props = {
|
||||
onPreview: (payload: CrawlResultItem) => void
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onCrawlProviderChange: (provider: DataSourceProvider) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
crawlOptions: CrawlOptions
|
||||
onCrawlOptionsChange: (payload: CrawlOptions) => void
|
||||
@@ -21,17 +26,32 @@ const Website: FC<Props> = ({
|
||||
onPreview,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onCrawlProviderChange,
|
||||
onJobIdChange,
|
||||
crawlOptions,
|
||||
onCrawlOptionsChange,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const { setShowAccountSettingModal } = useModalContext()
|
||||
const [isLoaded, setIsLoaded] = useState(false)
|
||||
const [isSetFirecrawlApiKey, setIsSetFirecrawlApiKey] = useState(false)
|
||||
const [selectedProvider, setSelectedProvider] = useState<DataSourceProvider>(DataSourceProvider.jinaReader)
|
||||
const [sources, setSources] = useState<DataSourceItem[]>([])
|
||||
|
||||
useEffect(() => {
|
||||
onCrawlProviderChange(selectedProvider)
|
||||
}, [selectedProvider, onCrawlProviderChange])
|
||||
|
||||
const checkSetApiKey = useCallback(async () => {
|
||||
const res = await fetchDataSources() as any
|
||||
const isFirecrawlSet = res.sources.some((item: DataSourceItem) => item.provider === DataSourceProvider.fireCrawl)
|
||||
setIsSetFirecrawlApiKey(isFirecrawlSet)
|
||||
setSources(res.sources)
|
||||
|
||||
// If users have configured one of the providers, select it.
|
||||
const availableProviders = res.sources.filter((item: DataSourceItem) =>
|
||||
[DataSourceProvider.jinaReader, DataSourceProvider.fireCrawl].includes(item.provider),
|
||||
)
|
||||
|
||||
if (availableProviders.length > 0)
|
||||
setSelectedProvider(availableProviders[0].provider)
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
@@ -52,20 +72,66 @@ const Website: FC<Props> = ({
|
||||
|
||||
return (
|
||||
<div>
|
||||
{isSetFirecrawlApiKey
|
||||
? (
|
||||
<Firecrawl
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<NoData onConfig={handleOnConfig} />
|
||||
)}
|
||||
<div className="mb-4">
|
||||
<div className="font-medium text-gray-700 mb-2 h-6">
|
||||
{t('datasetCreation.stepOne.website.chooseProvider')}
|
||||
</div>
|
||||
<div className="flex space-x-2">
|
||||
<button
|
||||
className={`px-4 py-2 text-sm font-medium rounded-md flex items-center justify-center ${
|
||||
selectedProvider === DataSourceProvider.jinaReader
|
||||
? 'bg-primary-50 text-primary-600'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-gray-200'
|
||||
}`}
|
||||
onClick={() => setSelectedProvider(DataSourceProvider.jinaReader)}
|
||||
>
|
||||
<span className={cn(s.jinaLogo, 'mr-2')} />
|
||||
<span>Jina Reader</span>
|
||||
</button>
|
||||
<button
|
||||
className={`px-4 py-2 text-sm font-medium rounded-md ${
|
||||
selectedProvider === DataSourceProvider.fireCrawl
|
||||
? 'bg-primary-50 text-primary-600'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-gray-200'
|
||||
}`}
|
||||
onClick={() => setSelectedProvider(DataSourceProvider.fireCrawl)}
|
||||
>
|
||||
🔥 Firecrawl
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{
|
||||
selectedProvider === DataSourceProvider.fireCrawl
|
||||
? sources.find(source => source.provider === DataSourceProvider.fireCrawl)
|
||||
? (
|
||||
<Firecrawl
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<NoData onConfig={handleOnConfig} provider={selectedProvider} />
|
||||
)
|
||||
: sources.find(source => source.provider === DataSourceProvider.jinaReader)
|
||||
? (
|
||||
<JinaReader
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<NoData onConfig={handleOnConfig} provider={selectedProvider} />
|
||||
)
|
||||
}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user