mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-10 03:16:51 +08:00
Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: AkaraChen <akarachen@outlook.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Warren Chen <warren.chen830@gmail.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: KVOJJJin <jzongcode@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: Charlie.Wei <luowei@cvte.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: huayaoyue6 <huayaoyue@163.com> Co-authored-by: kurokobo <kuro664@gmail.com> Co-authored-by: Matsuda <yiyth.fcb6@gmail.com> Co-authored-by: shirochan <s.yusuke0711@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: Huỳnh Gia Bôi <boihuynh147@gmail.com> Co-authored-by: Julian Huynh <julian.huynh@immersio.io> Co-authored-by: Hash Brown <hi@xzd.me> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Kazuki Takamatsu <kazuki.takamatsu@chowagiken.co.jp> Co-authored-by: Trey Dong <1346650911@qq.com> Co-authored-by: VoidIsVoid <343750470@qq.com> Co-authored-by: Gimling <huangjl@ruyi.ai> Co-authored-by: xiandan-erizo <xiandan.erizo@gmail.com> Co-authored-by: Muneyuki Noguchi <nogu.dev@gmail.com> Co-authored-by: zhaobingshuang <1475195565@qq.com> Co-authored-by: zhaobs <zhaobs@cailian.net> Co-authored-by: suzuki.sh <s2terminal@users.noreply.github.com> Co-authored-by: Yingchun Lai <laiyingchun@apache.org> Co-authored-by: huanshare <huanshare@live.com> Co-authored-by: huanshare <liuhuan101@longfor.com> Co-authored-by: orangeclk <orangeclk@users.noreply.github.com> Co-authored-by: 문정현 <120004247+JungHyunMoon@users.noreply.github.com> Co-authored-by: barabicu <kztk533@gmail.com> Co-authored-by: Wei Mingzhi <whistler_wmz@users.sf.net> Co-authored-by: Paul van Oorschot <20116814+pvoo@users.noreply.github.com> Co-authored-by: zkyTech <zhangkunyuan@hotmail.com> Co-authored-by: zhangkunyuan <zhangkunyuan@cmhi.chinamobile.com> Co-authored-by: Tommy <34446820+Asterovim@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Novice <857526207@qq.com> Co-authored-by: Novice Lee <novicelee@NovicedeMacBook-Pro.local> Co-authored-by: Novice Lee <novicelee@NoviPro.local> Co-authored-by: zxhlyh <16177003+zxhlyh@users.noreply.github.com> Co-authored-by: liuzhenghua <1090179900@qq.com> Co-authored-by: Jiang <65766008+AlwaysBluer@users.noreply.github.com> Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com> Co-authored-by: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Co-authored-by: Alok Shrivastwa <alok.shrivastwa@gmail.com> Co-authored-by: Alok Shrivastwa <Alok.Shrivastwa@microland.com> Co-authored-by: JasonVV <jasonwangiii@outlook.com> Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com> Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: NFish <douxc512@gmail.com> Co-authored-by: Junyan Qin <1010553892@qq.com> Co-authored-by: IWAI, Masaharu <iwaim.sub@gmail.com> Co-authored-by: IWAI, Masaharu <iwai_masaharu@funkit.co.jp> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: luckylhb90 <luckylhb90@gmail.com> Co-authored-by: hobo.l <hobo.l@binance.com> Co-authored-by: douxc <7553076+douxc@users.noreply.github.com>
232 lines
7.0 KiB
TypeScript
232 lines
7.0 KiB
TypeScript
'use client'
|
|
import type { FC } from 'react'
|
|
import React, { useCallback, useEffect, useState } from 'react'
|
|
import { useTranslation } from 'react-i18next'
|
|
import UrlInput from '../base/url-input'
|
|
import OptionsWrap from '../base/options-wrap'
|
|
import CrawledResult from '../base/crawled-result'
|
|
import Crawling from '../base/crawling'
|
|
import ErrorMessage from '../base/error-message'
|
|
import Header from './header'
|
|
import Options from './options'
|
|
import cn from '@/utils/classnames'
|
|
import { useModalContext } from '@/context/modal-context'
|
|
import Toast from '@/app/components/base/toast'
|
|
import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datasets'
|
|
import { sleep } from '@/utils'
|
|
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
|
|
|
const ERROR_I18N_PREFIX = 'common.errorMsg'
|
|
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
|
|
|
type Props = {
|
|
onPreview: (payload: CrawlResultItem) => void
|
|
checkedCrawlResult: CrawlResultItem[]
|
|
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
|
onJobIdChange: (jobId: string) => void
|
|
crawlOptions: CrawlOptions
|
|
onCrawlOptionsChange: (payload: CrawlOptions) => void
|
|
}
|
|
|
|
enum Step {
|
|
init = 'init',
|
|
running = 'running',
|
|
finished = 'finished',
|
|
}
|
|
|
|
const JinaReader: FC<Props> = ({
|
|
onPreview,
|
|
checkedCrawlResult,
|
|
onCheckedCrawlResultChange,
|
|
onJobIdChange,
|
|
crawlOptions,
|
|
onCrawlOptionsChange,
|
|
}) => {
|
|
const { t } = useTranslation()
|
|
const [step, setStep] = useState<Step>(Step.init)
|
|
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
|
useEffect(() => {
|
|
if (step !== Step.init)
|
|
setControlFoldOptions(Date.now())
|
|
}, [step])
|
|
const { setShowAccountSettingModal } = useModalContext()
|
|
const handleSetting = useCallback(() => {
|
|
setShowAccountSettingModal({
|
|
payload: 'data-source',
|
|
})
|
|
}, [setShowAccountSettingModal])
|
|
|
|
const checkValid = useCallback((url: string) => {
|
|
let errorMsg = ''
|
|
if (!url) {
|
|
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
|
field: 'url',
|
|
})
|
|
}
|
|
|
|
if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
|
|
errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
|
|
|
|
if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
|
|
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
|
|
field: t(`${I18N_PREFIX}.limit`),
|
|
})
|
|
}
|
|
|
|
return {
|
|
isValid: !errorMsg,
|
|
errorMsg,
|
|
}
|
|
}, [crawlOptions, t])
|
|
|
|
const isInit = step === Step.init
|
|
const isCrawlFinished = step === Step.finished
|
|
const isRunning = step === Step.running
|
|
const [crawlResult, setCrawlResult] = useState<{
|
|
current: number
|
|
total: number
|
|
data: CrawlResultItem[]
|
|
time_consuming: number | string
|
|
} | undefined>(undefined)
|
|
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
|
const showError = isCrawlFinished && crawlErrorMessage
|
|
|
|
const waitForCrawlFinished = useCallback(async (jobId: string) => {
|
|
try {
|
|
const res = await checkJinaReaderTaskStatus(jobId) as any
|
|
if (res.status === 'completed') {
|
|
return {
|
|
isError: false,
|
|
data: {
|
|
...res,
|
|
total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
|
|
},
|
|
}
|
|
}
|
|
if (res.status === 'failed' || !res.status) {
|
|
return {
|
|
isError: true,
|
|
errorMessage: res.message,
|
|
data: {
|
|
data: [],
|
|
},
|
|
}
|
|
}
|
|
// update the progress
|
|
setCrawlResult({
|
|
...res,
|
|
total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
|
|
})
|
|
onCheckedCrawlResultChange(res.data || []) // default select the crawl result
|
|
await sleep(2500)
|
|
return await waitForCrawlFinished(jobId)
|
|
}
|
|
catch (e: any) {
|
|
const errorBody = await e.json()
|
|
return {
|
|
isError: true,
|
|
errorMessage: errorBody.message,
|
|
data: {
|
|
data: [],
|
|
},
|
|
}
|
|
}
|
|
}, [crawlOptions.limit])
|
|
|
|
const handleRun = useCallback(async (url: string) => {
|
|
const { isValid, errorMsg } = checkValid(url)
|
|
if (!isValid) {
|
|
Toast.notify({
|
|
message: errorMsg!,
|
|
type: 'error',
|
|
})
|
|
return
|
|
}
|
|
setStep(Step.running)
|
|
try {
|
|
const startTime = Date.now()
|
|
const res = await createJinaReaderTask({
|
|
url,
|
|
options: crawlOptions,
|
|
}) as any
|
|
|
|
if (res.data) {
|
|
const data = {
|
|
current: 1,
|
|
total: 1,
|
|
data: [{
|
|
title: res.data.title,
|
|
markdown: res.data.content,
|
|
description: res.data.description,
|
|
source_url: res.data.url,
|
|
}],
|
|
time_consuming: (Date.now() - startTime) / 1000,
|
|
}
|
|
setCrawlResult(data)
|
|
onCheckedCrawlResultChange(data.data || [])
|
|
setCrawlErrorMessage('')
|
|
}
|
|
else if (res.job_id) {
|
|
const jobId = res.job_id
|
|
onJobIdChange(jobId)
|
|
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
|
|
if (isError) {
|
|
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
|
|
}
|
|
else {
|
|
setCrawlResult(data)
|
|
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
|
|
setCrawlErrorMessage('')
|
|
}
|
|
}
|
|
}
|
|
catch (e) {
|
|
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
|
|
console.log(e)
|
|
}
|
|
finally {
|
|
setStep(Step.finished)
|
|
}
|
|
}, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
|
|
|
|
return (
|
|
<div>
|
|
<Header onSetting={handleSetting} />
|
|
<div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}>
|
|
<UrlInput onRun={handleRun} isRunning={isRunning} />
|
|
<OptionsWrap
|
|
className={cn('mt-4')}
|
|
controlFoldOptions={controlFoldOptions}
|
|
>
|
|
<Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
|
|
</OptionsWrap>
|
|
|
|
{!isInit && (
|
|
<div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'>
|
|
{isRunning
|
|
&& <Crawling
|
|
className='mt-2'
|
|
crawledNum={crawlResult?.current || 0}
|
|
totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0}
|
|
/>}
|
|
{showError && (
|
|
<ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
|
|
)}
|
|
{isCrawlFinished && !showError
|
|
&& <CrawledResult
|
|
className='mb-2'
|
|
list={crawlResult?.data || []}
|
|
checkedList={checkedCrawlResult}
|
|
onSelectedChange={onCheckedCrawlResultChange}
|
|
onPreview={onPreview}
|
|
usedTime={parseFloat(crawlResult?.time_consuming as string) || 0}
|
|
/>
|
|
}
|
|
</div>
|
|
)}
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
export default React.memo(JinaReader)
|