mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-09 10:56:52 +08:00
feat: Integrate WaterCrawl.dev as a new knowledge base provider (#16396)
Co-authored-by: crazywoola <427733928@qq.com>
This commit is contained in:
committed by
GitHub
parent
0afad94378
commit
f54905e685
20
web/app/components/datasets/create/assets/watercrawl.svg
Normal file
20
web/app/components/datasets/create/assets/watercrawl.svg
Normal file
@@ -0,0 +1,20 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 500 500">
|
||||
<path style="fill: rgb(0, 23, 87); stroke: rgb(13, 14, 52);" d="M 247.794 213.903 L 246.81 76.976 L 254.345 76.963 L 254.592 213.989 L 247.794 213.903 Z"/>
|
||||
<ellipse style="fill: rgb(0, 23, 87); stroke: rgb(0, 23, 87);" cx="250.025" cy="43.859" rx="33.966" ry="33.906"/>
|
||||
<path style="fill: rgb(30, 141, 166); stroke: rgb(30, 141, 166);" d="M 282.472 260.389 L 414.181 330.253 L 410.563 336.234 L 279.38 265.739 L 282.472 260.389 Z"/>
|
||||
<path style="fill: rgb(15, 17, 57); stroke: rgb(13, 14, 52);" d="M 255.105 281.394 L 254.485 417.656 L 246.156 417.691 L 246.688 280.51 L 255.105 281.394 Z"/>
|
||||
<path style="paint-order: fill; fill: rgb(30, 141, 166); stroke: rgb(30, 141, 166);" d="M 279.486 229.517 L 410.351 160.07 L 413.923 167.04 L 283.727 235.998 L 279.486 229.517 Z"/>
|
||||
<path style="fill: rgb(15, 164, 161); stroke: rgb(15, 164, 161);" d="M 88.545 164.884 L 219.797 236.07 L 222.867 229.568 L 90.887 159.47 L 88.545 164.884 Z"/>
|
||||
<path style="fill: rgb(15, 164, 161); stroke: rgb(15, 164, 161);" d="M 224.76 266.9 L 95.55 334.829 L 92.878 328.37 L 219.955 261.275 L 224.76 266.9 Z"/>
|
||||
<ellipse style="paint-order: fill; fill: rgb(2, 181, 225); stroke: rgb(2, 181, 225);" cx="251.242" cy="247.466" rx="33.966" ry="33.906"/>
|
||||
<path style="fill: rgb(13, 14, 52); stroke: rgb(13, 14, 52);" d="M 279.502 433.617 L 408.666 359.443 C 408.666 359.443 412.398 366.965 412.398 366.916 C 412.398 366.867 281.544 440.217 281.544 440.217 L 279.502 433.617 Z"/>
|
||||
<path style="fill: rgb(13, 14, 52); stroke: rgb(13, 14, 52);" d="M 223.119 431.408 L 96.643 361.068 L 93.265 368.047 L 218.895 438.099 L 223.119 431.408 Z"/>
|
||||
<ellipse style="fill: rgb(0, 23, 87); stroke: rgb(0, 23, 87);" cx="250.504" cy="451.168" rx="33.966" ry="33.906"/>
|
||||
<path style="fill: rgb(90, 191, 187); stroke: rgb(90, 191, 187);" d="M 435.665 180.895 L 435.859 316.869 L 443.103 315.579 L 442.56 180.697 L 435.665 180.895 Z"/>
|
||||
<ellipse style="fill: rgb(0, 23, 87); stroke: rgb(0, 23, 87);" cx="441.06" cy="349.665" rx="33.966" ry="33.906"/>
|
||||
<ellipse style="fill: rgb(2, 181, 225); stroke: rgb(2, 181, 225);" cx="441.512" cy="147.767" rx="33.966" ry="33.906"/>
|
||||
<path style="fill: rgb(84, 187, 181); stroke: rgb(84, 187, 181);" d="M 64.755 314.523 L 57.928 315.006 L 58.307 182.961 L 65.169 182.865 L 64.755 314.523 Z"/>
|
||||
<ellipse style="fill: rgb(0, 23, 87); stroke: rgb(0, 23, 87);" cx="58.177" cy="149.757" rx="33.966" ry="33.906"/>
|
||||
<ellipse style="fill: rgb(61, 224, 203); stroke: rgb(61, 224, 203);" cx="65.909" cy="348.17" rx="33.966" ry="33.906"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.6 KiB |
@@ -4,3 +4,10 @@
|
||||
background-image: url(../assets/jina.png);
|
||||
background-size: 16px;
|
||||
}
|
||||
|
||||
.watercrawlLogo {
|
||||
@apply w-5 h-5 bg-center bg-no-repeat inline-block;
|
||||
/*background-color: #F5FAFF;*/
|
||||
background-image: url(../assets/watercrawl.svg);
|
||||
background-size: 16px;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import { useTranslation } from 'react-i18next'
|
||||
import s from './index.module.css'
|
||||
import NoData from './no-data'
|
||||
import Firecrawl from './firecrawl'
|
||||
import Watercrawl from './watercrawl'
|
||||
import JinaReader from './jina-reader'
|
||||
import cn from '@/utils/classnames'
|
||||
import { useModalContext } from '@/context/modal-context'
|
||||
@@ -47,7 +48,11 @@ const Website: FC<Props> = ({
|
||||
|
||||
// If users have configured one of the providers, select it.
|
||||
const availableProviders = res.sources.filter((item: DataSourceItem) =>
|
||||
[DataSourceProvider.jinaReader, DataSourceProvider.fireCrawl].includes(item.provider),
|
||||
[
|
||||
DataSourceProvider.jinaReader,
|
||||
DataSourceProvider.fireCrawl,
|
||||
DataSourceProvider.waterCrawl,
|
||||
].includes(item.provider),
|
||||
)
|
||||
|
||||
if (availableProviders.length > 0)
|
||||
@@ -70,6 +75,8 @@ const Website: FC<Props> = ({
|
||||
if (!isLoaded)
|
||||
return null
|
||||
|
||||
const source = sources.find(source => source.provider === selectedProvider)
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className="mb-4">
|
||||
@@ -86,7 +93,7 @@ const Website: FC<Props> = ({
|
||||
)}
|
||||
onClick={() => setSelectedProvider(DataSourceProvider.jinaReader)}
|
||||
>
|
||||
<span className={cn(s.jinaLogo, 'mr-2')} />
|
||||
<span className={cn(s.jinaLogo, 'mr-2')}/>
|
||||
<span>Jina Reader</span>
|
||||
</button>
|
||||
<button
|
||||
@@ -100,40 +107,53 @@ const Website: FC<Props> = ({
|
||||
>
|
||||
🔥 Firecrawl
|
||||
</button>
|
||||
<button
|
||||
className={cn('flex items-center justify-center rounded-lg px-4 py-2',
|
||||
selectedProvider === DataSourceProvider.waterCrawl
|
||||
? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary'
|
||||
: `system-sm-regular border border-components-option-card-option-border bg-components-option-card-option-bg text-text-secondary
|
||||
hover:border-components-option-card-option-border-hover hover:bg-components-option-card-option-bg-hover hover:shadow-xs hover:shadow-shadow-shadow-3`,
|
||||
)}
|
||||
onClick={() => setSelectedProvider(DataSourceProvider.waterCrawl)}
|
||||
>
|
||||
<span className={cn(s.watercrawlLogo, 'mr-2')}/>
|
||||
<span>WaterCrawl</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{
|
||||
selectedProvider === DataSourceProvider.fireCrawl
|
||||
? sources.find(source => source.provider === DataSourceProvider.fireCrawl)
|
||||
? (
|
||||
<Firecrawl
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<NoData onConfig={handleOnConfig} provider={selectedProvider} />
|
||||
)
|
||||
: sources.find(source => source.provider === DataSourceProvider.jinaReader)
|
||||
? (
|
||||
<JinaReader
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<NoData onConfig={handleOnConfig} provider={selectedProvider} />
|
||||
)
|
||||
}
|
||||
{source && selectedProvider === DataSourceProvider.fireCrawl && (
|
||||
<Firecrawl
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)}
|
||||
{source && selectedProvider === DataSourceProvider.waterCrawl && (
|
||||
<Watercrawl
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)}
|
||||
{source && selectedProvider === DataSourceProvider.jinaReader && (
|
||||
<JinaReader
|
||||
onPreview={onPreview}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={onCrawlOptionsChange}
|
||||
/>
|
||||
)}
|
||||
{!source && (
|
||||
<NoData onConfig={handleOnConfig} provider={selectedProvider}/>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -31,6 +31,11 @@ const NoData: FC<Props> = ({
|
||||
title: t(`${I18N_PREFIX}.fireCrawlNotConfigured`),
|
||||
description: t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`),
|
||||
},
|
||||
[DataSourceProvider.waterCrawl]: {
|
||||
emoji: <span className={s.watercrawlLogo} />,
|
||||
title: t(`${I18N_PREFIX}.waterCrawlNotConfigured`),
|
||||
description: t(`${I18N_PREFIX}.waterCrawlNotConfiguredDescription`),
|
||||
},
|
||||
}
|
||||
|
||||
const currentProvider = providerConfig[provider]
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import React from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { RiBookOpenLine, RiEqualizer2Line } from '@remixicon/react'
|
||||
import Button from '@/app/components/base/button'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
type Props = {
|
||||
onSetting: () => void
|
||||
}
|
||||
|
||||
const Header: FC<Props> = ({
|
||||
onSetting,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className='flex h-6 items-center justify-between'>
|
||||
<div className='flex items-center'>
|
||||
<div className='text-base font-medium text-text-secondary'>{t(`${I18N_PREFIX}.watercrawlTitle`)}</div>
|
||||
<div className='ml-2 mr-2 w-px h-3.5 bg-divider-regular' />
|
||||
<Button className='flex items-center gap-x-[1px] h-6 px-1.5' onClick={onSetting}>
|
||||
<RiEqualizer2Line className='w-3.5 h-3.5 text-components-button-secondary-text' />
|
||||
<span className='text-components-button-secondary-text text-xs font-medium px-[3px]'>
|
||||
{t(`${I18N_PREFIX}.configureWatercrawl`)}
|
||||
</span>
|
||||
</Button>
|
||||
</div>
|
||||
<a
|
||||
href='https://docs.watercrawl.dev/'
|
||||
target='_blank'
|
||||
rel='noopener noreferrer'
|
||||
className='inline-flex items-center gap-x-1 text-xs font-medium text-text-accent'
|
||||
>
|
||||
<RiBookOpenLine className='w-3.5 h-3.5 text-text-accent' />
|
||||
<span>{t(`${I18N_PREFIX}.watercrawlDoc`)}</span>
|
||||
</a>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
export default React.memo(Header)
|
||||
217
web/app/components/datasets/create/website/watercrawl/index.tsx
Normal file
217
web/app/components/datasets/create/website/watercrawl/index.tsx
Normal file
@@ -0,0 +1,217 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import UrlInput from '../base/url-input'
|
||||
import OptionsWrap from '../base/options-wrap'
|
||||
import CrawledResult from '../base/crawled-result'
|
||||
import Crawling from '../base/crawling'
|
||||
import ErrorMessage from '../base/error-message'
|
||||
import Header from './header'
|
||||
import Options from './options'
|
||||
import { useModalContext } from '@/context/modal-context'
|
||||
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import { checkWatercrawlTaskStatus, createWatercrawlTask } from '@/service/datasets'
|
||||
import { sleep } from '@/utils'
|
||||
|
||||
const ERROR_I18N_PREFIX = 'common.errorMsg'
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
type Props = {
|
||||
onPreview: (payload: CrawlResultItem) => void
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
crawlOptions: CrawlOptions
|
||||
onCrawlOptionsChange: (payload: CrawlOptions) => void
|
||||
}
|
||||
|
||||
enum Step {
|
||||
init = 'init',
|
||||
running = 'running',
|
||||
finished = 'finished',
|
||||
}
|
||||
|
||||
const WaterCrawl: FC<Props> = ({
|
||||
onPreview,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
crawlOptions,
|
||||
onCrawlOptionsChange,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const [step, setStep] = useState<Step>(Step.init)
|
||||
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
||||
useEffect(() => {
|
||||
if (step !== Step.init)
|
||||
setControlFoldOptions(Date.now())
|
||||
}, [step])
|
||||
const { setShowAccountSettingModal } = useModalContext()
|
||||
const handleSetting = useCallback(() => {
|
||||
setShowAccountSettingModal({
|
||||
payload: 'data-source',
|
||||
})
|
||||
}, [setShowAccountSettingModal])
|
||||
|
||||
const checkValid = useCallback((url: string) => {
|
||||
let errorMsg = ''
|
||||
if (!url) {
|
||||
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: 'url',
|
||||
})
|
||||
}
|
||||
|
||||
if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
|
||||
errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
|
||||
|
||||
if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
|
||||
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
||||
field: t(`${I18N_PREFIX}.limit`),
|
||||
})
|
||||
}
|
||||
|
||||
return {
|
||||
isValid: !errorMsg,
|
||||
errorMsg,
|
||||
}
|
||||
}, [crawlOptions, t])
|
||||
|
||||
const isInit = step === Step.init
|
||||
const isCrawlFinished = step === Step.finished
|
||||
const isRunning = step === Step.running
|
||||
const [crawlResult, setCrawlResult] = useState<{
|
||||
current: number
|
||||
total: number
|
||||
data: CrawlResultItem[]
|
||||
time_consuming: number | string
|
||||
} | undefined>(undefined)
|
||||
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
||||
const showError = isCrawlFinished && crawlErrorMessage
|
||||
|
||||
const waitForCrawlFinished = useCallback(async (jobId: string): Promise<any> => {
|
||||
try {
|
||||
const res = await checkWatercrawlTaskStatus(jobId) as any
|
||||
if (res.status === 'completed') {
|
||||
return {
|
||||
isError: false,
|
||||
data: {
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
},
|
||||
}
|
||||
}
|
||||
if (res.status === 'error' || !res.status) {
|
||||
// can't get the error message from the watercrawl api
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: res.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
// update the progress
|
||||
setCrawlResult({
|
||||
...res,
|
||||
total: Math.min(res.total, Number.parseFloat(crawlOptions.limit as string)),
|
||||
})
|
||||
onCheckedCrawlResultChange(res.data || []) // default select the crawl result
|
||||
await sleep(2500)
|
||||
return await waitForCrawlFinished(jobId)
|
||||
}
|
||||
catch (e: any) {
|
||||
const errorBody = await e.json()
|
||||
return {
|
||||
isError: true,
|
||||
errorMessage: errorBody.message,
|
||||
data: {
|
||||
data: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
}, [crawlOptions.limit])
|
||||
|
||||
const handleRun = useCallback(async (url: string) => {
|
||||
const { isValid, errorMsg } = checkValid(url)
|
||||
if (!isValid) {
|
||||
Toast.notify({
|
||||
message: errorMsg!,
|
||||
type: 'error',
|
||||
})
|
||||
return
|
||||
}
|
||||
setStep(Step.running)
|
||||
try {
|
||||
const passToServerCrawlOptions: any = {
|
||||
...crawlOptions,
|
||||
}
|
||||
if (crawlOptions.max_depth === '')
|
||||
delete passToServerCrawlOptions.max_depth
|
||||
|
||||
const res = await createWatercrawlTask({
|
||||
url,
|
||||
options: passToServerCrawlOptions,
|
||||
}) as any
|
||||
const jobId = res.job_id
|
||||
onJobIdChange(jobId)
|
||||
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
|
||||
if (isError) {
|
||||
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
|
||||
}
|
||||
else {
|
||||
setCrawlResult(data)
|
||||
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
|
||||
setCrawlErrorMessage('')
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
|
||||
console.log(e)
|
||||
}
|
||||
finally {
|
||||
setStep(Step.finished)
|
||||
}
|
||||
}, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Header onSetting={handleSetting} />
|
||||
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle p-4 pb-0'>
|
||||
<UrlInput onRun={handleRun} isRunning={isRunning} />
|
||||
<OptionsWrap
|
||||
className='mt-4'
|
||||
controlFoldOptions={controlFoldOptions}
|
||||
>
|
||||
<Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
|
||||
</OptionsWrap>
|
||||
|
||||
{!isInit && (
|
||||
<div className='relative left-[-16px] mt-3 w-[calc(100%_+_32px)] rounded-b-xl'>
|
||||
{isRunning
|
||||
&& <Crawling
|
||||
className='mt-2'
|
||||
crawledNum={crawlResult?.current || 0}
|
||||
totalNum={crawlResult?.total || Number.parseFloat(crawlOptions.limit as string) || 0}
|
||||
/>}
|
||||
{showError && (
|
||||
<ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
|
||||
)}
|
||||
{isCrawlFinished && !showError
|
||||
&& <CrawledResult
|
||||
className='mb-2'
|
||||
list={crawlResult?.data || []}
|
||||
checkedList={checkedCrawlResult}
|
||||
onSelectedChange={onCheckedCrawlResultChange}
|
||||
onPreview={onPreview}
|
||||
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
||||
/>
|
||||
}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
export default React.memo(WaterCrawl)
|
||||
@@ -0,0 +1,85 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import React, { useCallback } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import CheckboxWithLabel from '../base/checkbox-with-label'
|
||||
import Field from '../base/field'
|
||||
import cn from '@/utils/classnames'
|
||||
import type { CrawlOptions } from '@/models/datasets'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
type Props = {
|
||||
className?: string
|
||||
payload: CrawlOptions
|
||||
onChange: (payload: CrawlOptions) => void
|
||||
}
|
||||
|
||||
const Options: FC<Props> = ({
|
||||
className = '',
|
||||
payload,
|
||||
onChange,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const handleChange = useCallback((key: keyof CrawlOptions) => {
|
||||
return (value: any) => {
|
||||
onChange({
|
||||
...payload,
|
||||
[key]: value,
|
||||
})
|
||||
}
|
||||
}, [payload, onChange])
|
||||
return (
|
||||
<div className={cn(className, ' space-y-2')}>
|
||||
<CheckboxWithLabel
|
||||
label={t(`${I18N_PREFIX}.crawlSubPage`)}
|
||||
isChecked={payload.crawl_sub_pages}
|
||||
onChange={handleChange('crawl_sub_pages')}
|
||||
labelClassName='text-[13px] leading-[16px] font-medium text-text-secondary'
|
||||
/>
|
||||
<div className='flex justify-between space-x-4'>
|
||||
<Field
|
||||
className='shrink-0 grow'
|
||||
label={t(`${I18N_PREFIX}.limit`)}
|
||||
value={payload.limit}
|
||||
onChange={handleChange('limit')}
|
||||
isNumber
|
||||
isRequired
|
||||
/>
|
||||
<Field
|
||||
className='shrink-0 grow'
|
||||
label={t(`${I18N_PREFIX}.maxDepth`)}
|
||||
value={payload.max_depth}
|
||||
onChange={handleChange('max_depth')}
|
||||
isNumber
|
||||
tooltip={t(`${I18N_PREFIX}.maxDepthTooltip`)!}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className='flex justify-between space-x-4'>
|
||||
<Field
|
||||
className='shrink-0 grow'
|
||||
label={t(`${I18N_PREFIX}.excludePaths`)}
|
||||
value={payload.excludes}
|
||||
onChange={handleChange('excludes')}
|
||||
placeholder='blog/*, /about/*'
|
||||
/>
|
||||
<Field
|
||||
className='shrink-0 grow'
|
||||
label={t(`${I18N_PREFIX}.includeOnlyPaths`)}
|
||||
value={payload.includes}
|
||||
onChange={handleChange('includes')}
|
||||
placeholder='articles/*'
|
||||
/>
|
||||
</div>
|
||||
<CheckboxWithLabel
|
||||
label={t(`${I18N_PREFIX}.extractOnlyMainContent`)}
|
||||
isChecked={payload.only_main_content}
|
||||
onChange={handleChange('only_main_content')}
|
||||
labelClassName='text-[13px] leading-[16px] font-medium text-text-secondary'
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
export default React.memo(Options)
|
||||
@@ -0,0 +1,161 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import React, { useCallback, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import {
|
||||
PortalToFollowElem,
|
||||
PortalToFollowElemContent,
|
||||
} from '@/app/components/base/portal-to-follow-elem'
|
||||
import { Lock01 } from '@/app/components/base/icons/src/vender/solid/security'
|
||||
import Button from '@/app/components/base/button'
|
||||
import type { WatercrawlConfig } from '@/models/common'
|
||||
import Field from '@/app/components/datasets/create/website/base/field'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import { createDataSourceApiKeyBinding } from '@/service/datasets'
|
||||
import { LinkExternal02 } from '@/app/components/base/icons/src/vender/line/general'
|
||||
type Props = {
|
||||
onCancel: () => void
|
||||
onSaved: () => void
|
||||
}
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.watercrawl'
|
||||
|
||||
const DEFAULT_BASE_URL = 'https://app.watercrawl.dev'
|
||||
|
||||
const ConfigWatercrawlModal: FC<Props> = ({
|
||||
onCancel,
|
||||
onSaved,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const [isSaving, setIsSaving] = useState(false)
|
||||
const [config, setConfig] = useState<WatercrawlConfig>({
|
||||
api_key: '',
|
||||
base_url: '',
|
||||
})
|
||||
|
||||
const handleConfigChange = useCallback((key: string) => {
|
||||
return (value: string | number) => {
|
||||
setConfig(prev => ({ ...prev, [key]: value as string }))
|
||||
}
|
||||
}, [])
|
||||
|
||||
const handleSave = useCallback(async () => {
|
||||
if (isSaving)
|
||||
return
|
||||
let errorMsg = ''
|
||||
if (config.base_url && !((config.base_url.startsWith('http://') || config.base_url.startsWith('https://'))))
|
||||
errorMsg = t('common.errorMsg.urlError')
|
||||
if (!errorMsg) {
|
||||
if (!config.api_key) {
|
||||
errorMsg = t('common.errorMsg.fieldRequired', {
|
||||
field: 'API Key',
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (errorMsg) {
|
||||
Toast.notify({
|
||||
type: 'error',
|
||||
message: errorMsg,
|
||||
})
|
||||
return
|
||||
}
|
||||
const postData = {
|
||||
category: 'website',
|
||||
provider: 'watercrawl',
|
||||
credentials: {
|
||||
auth_type: 'x-api-key',
|
||||
config: {
|
||||
api_key: config.api_key,
|
||||
base_url: config.base_url || DEFAULT_BASE_URL,
|
||||
},
|
||||
},
|
||||
}
|
||||
try {
|
||||
setIsSaving(true)
|
||||
await createDataSourceApiKeyBinding(postData)
|
||||
Toast.notify({
|
||||
type: 'success',
|
||||
message: t('common.api.success'),
|
||||
})
|
||||
}
|
||||
finally {
|
||||
setIsSaving(false)
|
||||
}
|
||||
|
||||
onSaved()
|
||||
}, [config.api_key, config.base_url, onSaved, t, isSaving])
|
||||
|
||||
return (
|
||||
<PortalToFollowElem open>
|
||||
<PortalToFollowElemContent className='w-full h-full z-[60]'>
|
||||
<div className='fixed inset-0 flex items-center justify-center bg-background-overlay'>
|
||||
<div className='mx-2 w-[640px] max-h-[calc(100vh-120px)] bg-components-panel-bg shadow-xl rounded-2xl overflow-y-auto'>
|
||||
<div className='px-8 pt-8'>
|
||||
<div className='flex justify-between items-center mb-4'>
|
||||
<div className='system-xl-semibold text-text-primary'>{t(`${I18N_PREFIX}.configWatercrawl`)}</div>
|
||||
</div>
|
||||
|
||||
<div className='space-y-4'>
|
||||
<Field
|
||||
label='API Key'
|
||||
labelClassName='!text-sm'
|
||||
isRequired
|
||||
value={config.api_key}
|
||||
onChange={handleConfigChange('api_key')}
|
||||
placeholder={t(`${I18N_PREFIX}.apiKeyPlaceholder`)!}
|
||||
/>
|
||||
<Field
|
||||
label='Base URL'
|
||||
labelClassName='!text-sm'
|
||||
value={config.base_url}
|
||||
onChange={handleConfigChange('base_url')}
|
||||
placeholder={DEFAULT_BASE_URL}
|
||||
/>
|
||||
</div>
|
||||
<div className='my-8 flex justify-between items-center h-8'>
|
||||
<a className='flex items-center space-x-1 leading-[18px] text-xs font-normal text-text-accent' target='_blank' href='https://app.watercrawl.dev/'>
|
||||
<span>{t(`${I18N_PREFIX}.getApiKeyLinkText`)}</span>
|
||||
<LinkExternal02 className='w-3 h-3' />
|
||||
</a>
|
||||
<div className='flex'>
|
||||
<Button
|
||||
size='large'
|
||||
className='mr-2'
|
||||
onClick={onCancel}
|
||||
>
|
||||
{t('common.operation.cancel')}
|
||||
</Button>
|
||||
<Button
|
||||
variant='primary'
|
||||
size='large'
|
||||
onClick={handleSave}
|
||||
loading={isSaving}
|
||||
>
|
||||
{t('common.operation.save')}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div className='border-t-[0.5px] border-t-divider-regular'>
|
||||
<div className='flex justify-center items-center py-3 bg-background-section-burn text-xs text-text-tertiary'>
|
||||
<Lock01 className='mr-1 w-3 h-3 text-text-tertiary' />
|
||||
{t('common.modelProvider.encrypted.front')}
|
||||
<a
|
||||
className='text-text-accent mx-1'
|
||||
target='_blank' rel='noopener noreferrer'
|
||||
href='https://pycryptodome.readthedocs.io/en/latest/src/cipher/oaep.html'
|
||||
>
|
||||
PKCS1_OAEP
|
||||
</a>
|
||||
{t('common.modelProvider.encrypted.back')}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</PortalToFollowElemContent>
|
||||
</PortalToFollowElem>
|
||||
)
|
||||
}
|
||||
export default React.memo(ConfigWatercrawlModal)
|
||||
@@ -5,19 +5,15 @@ import { useTranslation } from 'react-i18next'
|
||||
import Panel from '../panel'
|
||||
import { DataSourceType } from '../panel/types'
|
||||
import ConfigFirecrawlModal from './config-firecrawl-modal'
|
||||
import ConfigWatercrawlModal from './config-watercrawl-modal'
|
||||
import ConfigJinaReaderModal from './config-jina-reader-modal'
|
||||
import cn from '@/utils/classnames'
|
||||
import s from '@/app/components/datasets/create/website/index.module.css'
|
||||
import { fetchDataSources, removeDataSourceApiKeyBinding } from '@/service/datasets'
|
||||
|
||||
import type {
|
||||
DataSourceItem,
|
||||
} from '@/models/common'
|
||||
import type { DataSourceItem } from '@/models/common'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import { useAppContext } from '@/context/app-context'
|
||||
|
||||
import {
|
||||
DataSourceProvider,
|
||||
} from '@/models/common'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
|
||||
type Props = {
|
||||
@@ -58,6 +54,16 @@ const DataSourceWebsite: FC<Props> = ({ provider }) => {
|
||||
return source?.id
|
||||
}
|
||||
|
||||
const getProviderName = (provider: DataSourceProvider): string => {
|
||||
if (provider === DataSourceProvider.fireCrawl)
|
||||
return 'Firecrawl'
|
||||
|
||||
if (provider === DataSourceProvider.waterCrawl)
|
||||
return 'WaterCrawl'
|
||||
|
||||
return 'Jina Reader'
|
||||
}
|
||||
|
||||
const handleRemove = useCallback((provider: DataSourceProvider) => {
|
||||
return async () => {
|
||||
const dataSourceId = getIdByProvider(provider)
|
||||
@@ -82,27 +88,42 @@ const DataSourceWebsite: FC<Props> = ({ provider }) => {
|
||||
readOnly={!isCurrentWorkspaceManager}
|
||||
configuredList={sources.filter(item => item.provider === provider).map(item => ({
|
||||
id: item.id,
|
||||
logo: ({ className }: { className: string }) => (
|
||||
item.provider === DataSourceProvider.fireCrawl
|
||||
? (
|
||||
<div className={cn(className, 'ml-3 flex h-5 w-5 items-center justify-center rounded border border-divider-subtle !bg-background-default text-xs font-medium text-text-tertiary')}>🔥</div>
|
||||
logo: ({ className }: { className: string }) => {
|
||||
if (item.provider === DataSourceProvider.fireCrawl) {
|
||||
return (
|
||||
<div
|
||||
className={cn(className, 'ml-3 flex h-5 w-5 items-center justify-center rounded border border-divider-subtle !bg-background-default text-xs font-medium text-text-tertiary')}>🔥</div>
|
||||
)
|
||||
: (
|
||||
<div className={cn(className, 'ml-3 flex h-5 w-5 items-center justify-center rounded border border-divider-subtle !bg-background-default text-xs font-medium text-text-tertiary')}>
|
||||
<span className={s.jinaLogo} />
|
||||
}
|
||||
|
||||
if (item.provider === DataSourceProvider.waterCrawl) {
|
||||
return (
|
||||
<div
|
||||
className={cn(className, 'ml-3 flex h-5 w-5 items-center justify-center rounded border border-divider-subtle !bg-background-default text-xs font-medium text-text-tertiary')}>
|
||||
<span className={s.watercrawlLogo}/>
|
||||
</div>
|
||||
)
|
||||
),
|
||||
name: item.provider === DataSourceProvider.fireCrawl ? 'Firecrawl' : 'Jina Reader',
|
||||
}
|
||||
return (
|
||||
<div
|
||||
className={cn(className, 'ml-3 flex h-5 w-5 items-center justify-center rounded border border-divider-subtle !bg-background-default text-xs font-medium text-text-tertiary')}>
|
||||
<span className={s.jinaLogo}/>
|
||||
</div>
|
||||
)
|
||||
},
|
||||
name: getProviderName(item.provider),
|
||||
isActive: true,
|
||||
}))}
|
||||
onRemove={handleRemove(provider)}
|
||||
/>
|
||||
{configTarget === DataSourceProvider.fireCrawl && (
|
||||
<ConfigFirecrawlModal onSaved={handleAdded} onCancel={hideConfig} />
|
||||
<ConfigFirecrawlModal onSaved={handleAdded} onCancel={hideConfig}/>
|
||||
)}
|
||||
{configTarget === DataSourceProvider.waterCrawl && (
|
||||
<ConfigWatercrawlModal onSaved={handleAdded} onCancel={hideConfig}/>
|
||||
)}
|
||||
{configTarget === DataSourceProvider.jinaReader && (
|
||||
<ConfigJinaReaderModal onSaved={handleAdded} onCancel={hideConfig} />
|
||||
<ConfigJinaReaderModal onSaved={handleAdded} onCancel={hideConfig}/>
|
||||
)}
|
||||
</>
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ export default function DataSourcePage() {
|
||||
<DataSourceNotion workspaces={notionWorkspaces} />
|
||||
<DataSourceWebsite provider={DataSourceProvider.jinaReader} />
|
||||
<DataSourceWebsite provider={DataSourceProvider.fireCrawl} />
|
||||
<DataSourceWebsite provider={DataSourceProvider.waterCrawl} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -41,6 +41,12 @@ const Panel: FC<Props> = ({
|
||||
const isNotion = type === DataSourceType.notion
|
||||
const isWebsite = type === DataSourceType.website
|
||||
|
||||
const getProviderName = (): string => {
|
||||
if (provider === DataSourceProvider.fireCrawl) return '🔥 Firecrawl'
|
||||
if (provider === DataSourceProvider.waterCrawl) return 'WaterCrawl'
|
||||
return 'Jina Reader'
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='mb-2 rounded-xl bg-background-section-burn'>
|
||||
<div className='flex items-center px-3 py-[9px]'>
|
||||
@@ -50,7 +56,7 @@ const Panel: FC<Props> = ({
|
||||
<div className='text-sm font-medium text-text-primary'>{t(`common.dataSource.${type}.title`)}</div>
|
||||
{isWebsite && (
|
||||
<div className='ml-1 rounded-md bg-components-badge-white-to-dark px-1.5 text-xs font-medium leading-[18px] text-text-secondary'>
|
||||
<span className='text-text-tertiary'>{t('common.dataSource.website.with')}</span> { provider === DataSourceProvider.fireCrawl ? '🔥 Firecrawl' : 'Jina Reader'}
|
||||
<span className='text-text-tertiary'>{t('common.dataSource.website.with')}</span> {getProviderName()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user