feat: support firecrawl frontend code (#5226)

This commit is contained in:
Joel
2024-06-14 22:02:41 +08:00
committed by GitHub
parent 8d1386df0f
commit 28554350de
51 changed files with 1979 additions and 145 deletions

View File

@@ -8,7 +8,7 @@ import StepOne from './step-one'
import StepTwo from './step-two'
import StepThree from './step-three'
import { DataSourceType } from '@/models/datasets'
import type { DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
import { fetchDataSource } from '@/service/common'
import { fetchDatasetDetail } from '@/service/datasets'
import type { NotionPage } from '@/models/common'
@@ -19,6 +19,15 @@ type DatasetUpdateFormProps = {
datasetId?: string
}
const DEFAULT_CRAWL_OPTIONS: CrawlOptions = {
crawl_sub_pages: true,
only_main_content: true,
includes: '',
excludes: '',
limit: 10,
max_depth: '',
}
const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
const { t } = useTranslation()
const { setShowAccountSettingModal } = useModalContext()
@@ -36,9 +45,13 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
setNotionPages(value)
}
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
const [crawlOptions, setCrawlOptions] = useState<CrawlOptions>(DEFAULT_CRAWL_OPTIONS)
const updateFileList = (preparedFiles: FileItem[]) => {
setFiles(preparedFiles)
}
const [fireCrawlJobId, setFireCrawlJobId] = useState('')
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
@@ -108,20 +121,27 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
<StepsNavBar step={step} datasetId={datasetId} />
</div>
<div className="grow bg-white">
{step === 1 && <StepOne
hasConnection={hasConnection}
onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
datasetId={datasetId}
dataSourceType={dataSourceType}
dataSourceTypeDisable={!!detail?.data_source_type}
changeType={setDataSourceType}
files={fileList}
updateFile={updateFile}
updateFileList={updateFileList}
notionPages={notionPages}
updateNotionPages={updateNotionPages}
onStepChange={nextStep}
/>}
<div className={step === 1 ? 'block h-full' : 'hidden'}>
<StepOne
hasConnection={hasConnection}
onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
datasetId={datasetId}
dataSourceType={dataSourceType}
dataSourceTypeDisable={!!detail?.data_source_type}
changeType={setDataSourceType}
files={fileList}
updateFile={updateFile}
updateFileList={updateFileList}
notionPages={notionPages}
updateNotionPages={updateNotionPages}
onStepChange={nextStep}
websitePages={websitePages}
updateWebsitePages={setWebsitePages}
onFireCrawlJobIdChange={setFireCrawlJobId}
crawlOptions={crawlOptions}
onCrawlOptionsChange={setCrawlOptions}
/>
</div>
{(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo
isAPIKeySet={!!embeddingsDefaultModel}
onSetting={() => setShowAccountSettingModal({ payload: 'provider' })}
@@ -130,9 +150,12 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
dataSourceType={dataSourceType}
files={fileList.map(file => file.file)}
notionPages={notionPages}
websitePages={websitePages}
fireCrawlJobId={fireCrawlJobId}
onStepChange={changeStep}
updateIndexingTypeCache={updateIndexingTypeCache}
updateResultCache={updateResultCache}
crawlOptions={crawlOptions}
/>}
{step === 3 && <StepThree
datasetId={datasetId}

View File

@@ -6,8 +6,10 @@ import FilePreview from '../file-preview'
import FileUploader from '../file-uploader'
import NotionPagePreview from '../notion-page-preview'
import EmptyDatasetCreationModal from '../empty-dataset-creation-modal'
import Website from '../website'
import WebsitePreview from '../website/preview'
import s from './index.module.css'
import type { FileItem } from '@/models/datasets'
import type { CrawlOptions, CrawlResultItem, FileItem } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
import { DataSourceType } from '@/models/datasets'
import Button from '@/app/components/base/button'
@@ -29,6 +31,11 @@ type IStepOneProps = {
updateNotionPages: (value: NotionPage[]) => void
onStepChange: () => void
changeType: (type: DataSourceType) => void
websitePages?: CrawlResultItem[]
updateWebsitePages: (value: CrawlResultItem[]) => void
onFireCrawlJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
}
type NotionConnectorProps = {
@@ -49,7 +56,7 @@ export const NotionConnector = ({ onSetting }: NotionConnectorProps) => {
const StepOne = ({
datasetId,
dataSourceType,
dataSourceType: inCreatePageDataSourceType,
dataSourceTypeDisable,
changeType,
hasConnection,
@@ -60,11 +67,17 @@ const StepOne = ({
updateFile,
notionPages = [],
updateNotionPages,
websitePages = [],
updateWebsitePages,
onFireCrawlJobIdChange,
crawlOptions,
onCrawlOptionsChange,
}: IStepOneProps) => {
const { dataset } = useDatasetDetailContext()
const [showModal, setShowModal] = useState(false)
const [currentFile, setCurrentFile] = useState<File | undefined>()
const [currentNotionPage, setCurrentNotionPage] = useState<NotionPage | undefined>()
const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>()
const { t } = useTranslation()
const modalShowHandle = () => setShowModal(true)
@@ -85,8 +98,13 @@ const StepOne = ({
setCurrentNotionPage(undefined)
}
const shouldShowDataSourceTypeList = !datasetId || (datasetId && !dataset?.data_source_type)
const hideWebsitePreview = () => {
setCurrentWebsite(undefined)
}
const shouldShowDataSourceTypeList = !datasetId || (datasetId && !dataset?.data_source_type)
const isInCreatePage = shouldShowDataSourceTypeList
const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : dataset?.data_source_type
const { plan, enableBilling } = useProviderContext()
const allFileLoaded = (files.length > 0 && files.every(file => file.file.id))
const hasNotin = notionPages.length > 0
@@ -150,10 +168,13 @@ const StepOne = ({
{t('datasetCreation.stepOne.dataSourceType.notion')}
</div>
<div
className={cn(s.dataSourceItem, s.disabled, dataSourceType === DataSourceType.WEB && s.active)}
// onClick={() => changeType(DataSourceType.WEB)}
className={cn(
s.dataSourceItem,
dataSourceType === DataSourceType.WEB && s.active,
dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled,
)}
onClick={() => changeType(DataSourceType.WEB)}
>
<span className={s.comingTag}>Coming soon</span>
<span className={cn(s.datasetIcon, s.web)} />
{t('datasetCreation.stepOne.dataSourceType.web')}
</div>
@@ -201,6 +222,26 @@ const StepOne = ({
)}
</>
)}
{dataSourceType === DataSourceType.WEB && (
<>
<div className={cn('mb-8 w-[640px]', !shouldShowDataSourceTypeList && 'mt-12')}>
<Website
onPreview={setCurrentWebsite}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={updateWebsitePages}
onJobIdChange={onFireCrawlJobIdChange}
crawlOptions={crawlOptions}
onCrawlOptionsChange={onCrawlOptionsChange}
/>
</div>
{isShowVectorSpaceFull && (
<div className='max-w-[640px] mb-4'>
<VectorSpaceFull />
</div>
)}
<Button disabled={isShowVectorSpaceFull || !websitePages.length} className={s.submitButton} type='primary' onClick={onStepChange}>{t('datasetCreation.stepOne.button')}</Button>
</>
)}
{!datasetId && (
<>
<div className={s.dividerLine} />
@@ -212,6 +253,7 @@ const StepOne = ({
</div>
{currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
{currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
{currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
</div>
)
}

View File

@@ -323,6 +323,7 @@
}
.sourceContent {
width: 0;
flex: 1 1 auto;
}

View File

@@ -12,7 +12,7 @@ import RetrievalMethodInfo from '../../common/retrieval-method-info'
import PreviewItem, { PreviewType } from './preview-item'
import LanguageSelect from './language-select'
import s from './index.module.css'
import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
createDocument,
createFirstDocument,
@@ -44,6 +44,7 @@ import TooltipPlus from '@/app/components/base/tooltip-plus'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { LanguagesSupported } from '@/i18n/language'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
type ValueOf<T> = T[keyof T]
type StepTwoProps = {
@@ -56,6 +57,9 @@ type StepTwoProps = {
dataSourceType: DataSourceType
files: CustomFile[]
notionPages?: NotionPage[]
websitePages?: CrawlResultItem[]
crawlOptions?: CrawlOptions
fireCrawlJobId?: string
onStepChange?: (delta: number) => void
updateIndexingTypeCache?: (type: string) => void
updateResultCache?: (res: createDocumentResponse) => void
@@ -79,9 +83,12 @@ const StepTwo = ({
onSetting,
datasetId,
indexingType,
dataSourceType,
dataSourceType: inCreatePageDataSourceType,
files,
notionPages = [],
websitePages = [],
crawlOptions,
fireCrawlJobId = '',
onStepChange,
updateIndexingTypeCache,
updateResultCache,
@@ -94,6 +101,8 @@ const StepTwo = ({
const isMobile = media === MediaType.mobile
const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
const scrollRef = useRef<HTMLDivElement>(null)
const [scrolled, setScrolled] = useState(false)
const previewScrollRef = useRef<HTMLDivElement>(null)
@@ -242,6 +251,15 @@ const StepTwo = ({
}) as NotionInfo[]
}
const getWebsiteInfo = () => {
return {
provider: 'firecrawl',
job_id: fireCrawlJobId,
urls: websitePages.map(page => page.source_url),
only_main_content: crawlOptions?.only_main_content,
}
}
const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => {
if (dataSourceType === DataSourceType.FILE) {
return {
@@ -271,6 +289,19 @@ const StepTwo = ({
dataset_id: datasetId as string,
}
}
if (dataSourceType === DataSourceType.WEB) {
return {
info_list: {
data_source_type: dataSourceType,
website_info_list: getWebsiteInfo(),
},
indexing_technique: getIndexing_technique() as string,
process_rule: getProcessRule(),
doc_form: docForm,
doc_language: docLanguage,
dataset_id: datasetId as string,
}
}
}
const {
modelList: rerankModelList,
@@ -335,6 +366,9 @@ const StepTwo = ({
}
if (dataSourceType === DataSourceType.NOTION)
params.data_source.info_list.notion_info_list = getNotionInfo()
if (dataSourceType === DataSourceType.WEB)
params.data_source.info_list.website_info_list = getWebsiteInfo()
}
return params
}
@@ -819,6 +853,22 @@ const StepTwo = ({
</div>
</>
)}
{dataSourceType === DataSourceType.WEB && (
<>
<div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.websiteSource')}</div>
<div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
<Globe01 className='shrink-0 mr-1' />
<span className='grow w-0 truncate'>{websitePages[0].source_url}</span>
{websitePages.length > 1 && (
<span className={s.sourceCount}>
<span>{t('datasetCreation.stepTwo.other')}</span>
<span>{websitePages.length - 1}</span>
<span>{t('datasetCreation.stepTwo.webpageUnit')}</span>
</span>
)}
</div>
</>
)}
</div>
<div className={s.divider} />
<div className={s.segmentCount}>

View File

@@ -0,0 +1,29 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import cn from 'classnames'
import Checkbox from '@/app/components/base/checkbox'
type Props = {
className?: string
isChecked: boolean
onChange: (isChecked: boolean) => void
label: string
labelClassName?: string
}
const CheckboxWithLabel: FC<Props> = ({
className = '',
isChecked,
onChange,
label,
labelClassName,
}) => {
return (
<label className={cn(className, 'flex items-center h-7 space-x-2')}>
<Checkbox checked={isChecked} onCheck={() => onChange(!isChecked)} />
<div className={cn(labelClassName, 'text-sm font-normal text-gray-800')}>{label}</div>
</label>
)
}
export default React.memo(CheckboxWithLabel)

View File

@@ -0,0 +1,30 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import cn from 'classnames'
import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
type Props = {
className?: string
title: string
errorMsg?: string
}
const ErrorMessage: FC<Props> = ({
className,
title,
errorMsg,
}) => {
return (
<div className={cn(className, 'py-2 px-4 border-t border-gray-200 bg-[#FFFAEB]')}>
<div className='flex items-center h-5'>
<AlertTriangle className='mr-2 w-4 h-4 text-[#F79009]' />
<div className='text-sm font-medium text-[#DC6803]'>{title}</div>
</div>
{errorMsg && (
<div className='mt-1 pl-6 leading-[18px] text-xs font-normal text-gray-700'>{errorMsg}</div>
)}
</div>
)
}
export default React.memo(ErrorMessage)

View File

@@ -0,0 +1,54 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import cn from 'classnames'
import Input from './input'
import TooltipPlus from '@/app/components/base/tooltip-plus'
import { HelpCircle } from '@/app/components/base/icons/src/vender/line/general'
type Props = {
className?: string
label: string
labelClassName?: string
value: string | number
onChange: (value: string | number) => void
isRequired?: boolean
placeholder?: string
isNumber?: boolean
tooltip?: string
}
const Field: FC<Props> = ({
className,
label,
labelClassName,
value,
onChange,
isRequired = false,
placeholder = '',
isNumber = false,
tooltip,
}) => {
return (
<div className={cn(className)}>
<div className='flex py-[7px]'>
<div className={cn(labelClassName, 'flex items-center h-[18px] text-[13px] font-medium text-gray-900')}>{label} </div>
{isRequired && <span className='ml-0.5 text-xs font-semibold text-[#D92D20]'>*</span>}
{tooltip && (
<TooltipPlus popupContent={
<div className='w-[200px]'>{tooltip}</div>
}>
<HelpCircle className='relative top-[3px] w-3 h-3 ml-1 text-gray-500' />
</TooltipPlus>
)}
</div>
<Input
value={value}
onChange={onChange}
placeholder={placeholder}
isNumber={isNumber}
/>
</div>
)
}
export default React.memo(Field)

View File

@@ -0,0 +1,58 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
type Props = {
value: string | number
onChange: (value: string | number) => void
placeholder?: string
isNumber?: boolean
}
const MIN_VALUE = 1
const Input: FC<Props> = ({
value,
onChange,
placeholder = '',
isNumber = false,
}) => {
const handleChange = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
const value = e.target.value
if (isNumber) {
let numberValue = parseInt(value, 10) // integer only
if (isNaN(numberValue)) {
onChange('')
return
}
if (numberValue < MIN_VALUE)
numberValue = MIN_VALUE
onChange(numberValue)
return
}
onChange(value)
}, [isNumber, onChange])
const otherOption = (() => {
if (isNumber) {
return {
min: MIN_VALUE,
}
}
return {
}
})()
return (
<input
type={isNumber ? 'number' : 'text'}
{...otherOption}
value={value}
onChange={handleChange}
className='flex h-9 w-full py-1 px-2 rounded-lg text-xs leading-normal bg-gray-100 caret-primary-600 hover:bg-gray-100 focus:ring-1 focus:ring-inset focus:ring-gray-200 focus-visible:outline-none focus:bg-gray-50 placeholder:text-gray-400'
placeholder={placeholder}
/>
)
}
export default React.memo(Input)

View File

@@ -0,0 +1,55 @@
'use client'
import { useBoolean } from 'ahooks'
import type { FC } from 'react'
import React, { useEffect } from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import { Settings04 } from '@/app/components/base/icons/src/vender/line/general'
import { ChevronRight } from '@/app/components/base/icons/src/vender/line/arrows'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
className?: string
children: React.ReactNode
controlFoldOptions?: number
}
const OptionsWrap: FC<Props> = ({
className = '',
children,
controlFoldOptions,
}) => {
const { t } = useTranslation()
const [fold, {
toggle: foldToggle,
setTrue: foldHide,
}] = useBoolean(false)
useEffect(() => {
if (controlFoldOptions)
foldHide()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [controlFoldOptions])
return (
<div className={cn(className, !fold ? 'mb-0' : 'mb-3')}>
<div
className='flex justify-between items-center h-[26px] py-1 cursor-pointer select-none'
onClick={foldToggle}
>
<div className='flex items-center text-gray-700'>
<Settings04 className='mr-1 w-4 h-4' />
<div className='text-[13px] font-semibold text-gray-800 uppercase'>{t(`${I18N_PREFIX}.options`)}</div>
</div>
<ChevronRight className={cn(!fold && 'rotate-90', 'w-4 h-4 text-gray-500')} />
</div>
{!fold && (
<div className='mb-4'>
{children}
</div>
)}
</div>
)
}
export default React.memo(OptionsWrap)

View File

@@ -0,0 +1,48 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useState } from 'react'
import { useTranslation } from 'react-i18next'
import Input from './input'
import Button from '@/app/components/base/button'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
isRunning: boolean
onRun: (url: string) => void
}
const UrlInput: FC<Props> = ({
isRunning,
onRun,
}) => {
const { t } = useTranslation()
const [url, setUrl] = useState('')
const handleUrlChange = useCallback((url: string | number) => {
setUrl(url as string)
}, [])
const handleOnRun = useCallback(() => {
if (isRunning)
return
onRun(url)
}, [isRunning, onRun, url])
return (
<div className='flex items-center justify-between'>
<Input
value={url}
onChange={handleUrlChange}
placeholder='https://docs.dify.ai'
/>
<Button
type='primary'
onClick={handleOnRun}
className='ml-2 !h-8 text-[13px] font-medium'
loading={isRunning}
>
{!isRunning ? t(`${I18N_PREFIX}.run`) : ''}
</Button>
</div>
)
}
export default React.memo(UrlInput)

View File

@@ -0,0 +1,40 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import type { CrawlResultItem as CrawlResultItemType } from '@/models/datasets'
import Checkbox from '@/app/components/base/checkbox'
type Props = {
payload: CrawlResultItemType
isChecked: boolean
isPreview: boolean
onCheckChange: (checked: boolean) => void
onPreview: () => void
}
const CrawledResultItem: FC<Props> = ({
isPreview,
payload,
isChecked,
onCheckChange,
onPreview,
}) => {
const { t } = useTranslation()
const handleCheckChange = useCallback(() => {
onCheckChange(!isChecked)
}, [isChecked, onCheckChange])
return (
<div className={cn(isPreview ? 'border-[#D1E0FF] bg-primary-50 shadow-xs' : 'group hover:bg-gray-100', 'rounded-md px-2 py-[5px] cursor-pointer border border-transparent')}>
<div className='flex items-center h-5'>
<Checkbox className='group-hover:border-2 group-hover:border-primary-600 mr-2 shrink-0' checked={isChecked} onCheck={handleCheckChange} />
<div className='grow w-0 truncate text-sm font-medium text-gray-700' title={payload.title}>{payload.title}</div>
<div onClick={onPreview} className='hidden group-hover:flex items-center h-6 px-2 text-xs rounded-md font-medium text-gray-500 uppercase hover:bg-gray-50'>{t('datasetCreation.stepOne.website.preview')}</div>
</div>
<div className='mt-0.5 truncate pl-6 leading-[18px] text-xs font-normal text-gray-500' title={payload.source_url}>{payload.source_url}</div>
</div>
)
}
export default React.memo(CrawledResultItem)

View File

@@ -0,0 +1,87 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import CheckboxWithLabel from './base/checkbox-with-label'
import CrawledResultItem from './crawled-result-item'
import type { CrawlResultItem } from '@/models/datasets'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
className?: string
list: CrawlResultItem[]
checkedList: CrawlResultItem[]
onSelectedChange: (selected: CrawlResultItem[]) => void
onPreview: (payload: CrawlResultItem) => void
usedTime: number
}
const CrawledResult: FC<Props> = ({
className = '',
list,
checkedList,
onSelectedChange,
onPreview,
usedTime,
}) => {
const { t } = useTranslation()
const isCheckAll = checkedList.length === list.length
const handleCheckedAll = useCallback(() => {
if (!isCheckAll)
onSelectedChange(list)
else
onSelectedChange([])
}, [isCheckAll, list, onSelectedChange])
const handleItemCheckChange = useCallback((item: CrawlResultItem) => {
return (checked: boolean) => {
if (checked)
onSelectedChange([...checkedList, item])
else
onSelectedChange(checkedList.filter(checkedItem => checkedItem.source_url !== item.source_url))
}
}, [checkedList, onSelectedChange])
const [previewIndex, setPreviewIndex] = React.useState<number>(-1)
const handlePreview = useCallback((index: number) => {
return () => {
setPreviewIndex(index)
onPreview(list[index])
}
}, [list, onPreview])
return (
<div className={cn(className, 'border-t border-gray-200')}>
<div className='flex items-center justify-between h-[34px] px-4 bg-gray-50 shadow-xs border-b-[0.5px] border-black/8 text-xs font-normal text-gray-700'>
<CheckboxWithLabel
isChecked={isCheckAll}
onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
labelClassName='!font-medium'
/>
<div>{t(`${I18N_PREFIX}.scrapTimeInfo`, {
total: list.length,
time: usedTime.toFixed(1),
})}</div>
</div>
<div className='p-2'>
{list.map((item, index) => (
<CrawledResultItem
key={item.source_url}
isPreview={index === previewIndex}
onPreview={handlePreview(index)}
payload={item}
isChecked={checkedList.some(checkedItem => checkedItem.source_url === item.source_url)}
onCheckChange={handleItemCheckChange(item)}
/>
))}
</div>
</div>
)
}
export default React.memo(CrawledResult)

View File

@@ -0,0 +1,37 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import cn from 'classnames'
import { useTranslation } from 'react-i18next'
import { RowStruct } from '@/app/components/base/icons/src/public/other'
type Props = {
className?: string
crawledNum: number
totalNum: number
}
const Crawling: FC<Props> = ({
className = '',
crawledNum,
totalNum,
}) => {
const { t } = useTranslation()
return (
<div className={cn(className, 'border-t border-gray-200')}>
<div className='flex items-center h-[34px] px-4 bg-gray-50 shadow-xs border-b-[0.5px] border-black/8 text-xs font-normal text-gray-700'>
{t('datasetCreation.stepOne.website.totalPageScraped')} {crawledNum}/{totalNum}
</div>
<div className='p-2'>
{['', '', '', ''].map((item, index) => (
<div className='py-[5px]' key={index}>
<RowStruct />
</div>
))}
</div>
</div>
)
}
export default React.memo(Crawling)

View File

@@ -0,0 +1,42 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
import { BookOpen01 } from '@/app/components/base/icons/src/vender/line/education'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
onSetting: () => void
}
const Header: FC<Props> = ({
onSetting,
}) => {
const { t } = useTranslation()
return (
<div className='flex h-6 items-center justify-between'>
<div className='flex items-center'>
<div className='text-base font-medium text-gray-700'>{t(`${I18N_PREFIX}.firecrawlTitle`)}</div>
<div className='ml-2 mr-1 w-px h-3.5 bg-gray-200'></div>
<div
className='p-1 rounded-md hover:bg-black/5 cursor-pointer'
onClick={onSetting}
>
<Settings01 className='w-3.5 h-3.5 text-gray-500' />
</div>
</div>
<a
href='https://docs.firecrawl.dev/introduction'
target='_blank' rel='noopener noreferrer'
className='flex items-center text-xs text-primary-600'
>
<BookOpen01 className='mr-1 w-3.5 h-3.5 text-primary-600' />
{t(`${I18N_PREFIX}.firecrawlDoc`)}
</a>
</div>
)
}
export default React.memo(Header)

View File

@@ -0,0 +1,216 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import Header from './header'
import UrlInput from './base/url-input'
import OptionsWrap from './base/options-wrap'
import Options from './options'
import CrawledResult from './crawled-result'
import Crawling from './crawling'
import ErrorMessage from './base/error-message'
import { useModalContext } from '@/context/modal-context'
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
import Toast from '@/app/components/base/toast'
import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets'
import { sleep } from '@/utils'
const ERROR_I18N_PREFIX = 'common.errorMsg'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
onPreview: (payload: CrawlResultItem) => void
checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
}
enum Step {
init = 'init',
running = 'running',
finished = 'finished',
}
const FireCrawl: FC<Props> = ({
onPreview,
checkedCrawlResult,
onCheckedCrawlResultChange,
onJobIdChange,
crawlOptions,
onCrawlOptionsChange,
}) => {
const { t } = useTranslation()
const [step, setStep] = useState<Step>(Step.init)
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
useEffect(() => {
if (step !== Step.init)
setControlFoldOptions(Date.now())
}, [step])
const { setShowAccountSettingModal } = useModalContext()
const handleSetting = useCallback(() => {
setShowAccountSettingModal({
payload: 'data-source',
})
}, [setShowAccountSettingModal])
const checkValid = useCallback((url: string) => {
let errorMsg = ''
if (!url) {
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
field: 'url',
})
}
if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
field: t(`${I18N_PREFIX}.limit`),
})
}
return {
isValid: !errorMsg,
errorMsg,
}
}, [crawlOptions, t])
const isInit = step === Step.init
const isCrawlFinished = step === Step.finished
const isRunning = step === Step.running
const [crawlResult, setCrawlResult] = useState<{
current: number
total: number
data: CrawlResultItem[]
time_consuming: number | string
} | undefined>(undefined)
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
const showError = isCrawlFinished && crawlErrorMessage
const waitForCrawlFinished = useCallback(async (jobId: string) => {
try {
const res = await checkFirecrawlTaskStatus(jobId) as any
if (res.status === 'completed') {
return {
isError: false,
data: {
...res,
total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
},
}
}
if (res.status === 'error' || !res.status) {
// can't get the error message from the firecrawl api
return {
isError: true,
errorMessage: res.message,
data: {
data: [],
},
}
}
// update the progress
setCrawlResult({
...res,
total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
})
await sleep(2500)
return await waitForCrawlFinished(jobId)
}
catch (e: any) {
const errorBody = await e.json()
return {
isError: true,
errorMessage: errorBody.message,
data: {
data: [],
},
}
}
}, [crawlOptions.limit])
const handleRun = useCallback(async (url: string) => {
const { isValid, errorMsg } = checkValid(url)
if (!isValid) {
Toast.notify({
message: errorMsg!,
type: 'error',
})
return
}
setStep(Step.running)
try {
const passToServerCrawlOptions: any = {
...crawlOptions,
}
if (crawlOptions.max_depth === '')
delete passToServerCrawlOptions.max_depth
const res = await createFirecrawlTask({
url,
options: passToServerCrawlOptions,
}) as any
const jobId = res.job_id
onJobIdChange(jobId)
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
if (isError) {
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
}
else {
setCrawlResult(data)
setCrawlErrorMessage('')
}
}
catch (e) {
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
console.log(e)
}
finally {
setStep(Step.finished)
}
}, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
return (
<div>
<Header onSetting={handleSetting} />
<div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}>
<UrlInput onRun={handleRun} isRunning={isRunning} />
<OptionsWrap
className={cn('mt-4')}
controlFoldOptions={controlFoldOptions}
>
<Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
</OptionsWrap>
{!isInit && (
<div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'>
{isRunning
&& <Crawling
className='mt-2'
crawledNum={crawlResult?.current || 0}
totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0}
/>}
{showError && (
<ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
)}
{isCrawlFinished && !showError
&& <CrawledResult
className='mb-2'
list={crawlResult?.data || []}
checkedList={checkedCrawlResult}
onSelectedChange={onCheckedCrawlResultChange}
onPreview={onPreview}
usedTime={parseFloat(crawlResult?.time_consuming as string) || 0}
/>
}
</div>
)}
</div>
</div>
)
}
export default React.memo(FireCrawl)

View File

@@ -0,0 +1,24 @@
import type { CrawlResultItem } from '@/models/datasets'
const result: CrawlResultItem[] = [
{
title: 'Start the frontend Docker container separately',
markdown: 'Markdown 1',
description: 'Description 1',
source_url: 'https://example.com/1',
},
{
title: 'Advanced Tool Integration',
markdown: 'Markdown 2',
description: 'Description 2',
source_url: 'https://example.com/2',
},
{
title: 'Local Source Code Start | English | Dify',
markdown: 'Markdown 3',
description: 'Description 3',
source_url: 'https://example.com/3',
},
]
export default result

View File

@@ -0,0 +1,83 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
import cn from 'classnames'
import { useTranslation } from 'react-i18next'
import CheckboxWithLabel from './base/checkbox-with-label'
import Field from './base/field'
import type { CrawlOptions } from '@/models/datasets'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
className?: string
payload: CrawlOptions
onChange: (payload: CrawlOptions) => void
}
const Options: FC<Props> = ({
className = '',
payload,
onChange,
}) => {
const { t } = useTranslation()
const handleChange = useCallback((key: keyof CrawlOptions) => {
return (value: any) => {
onChange({
...payload,
[key]: value,
})
}
}, [payload, onChange])
return (
<div className={cn(className, ' space-y-2')}>
<CheckboxWithLabel
label={t(`${I18N_PREFIX}.crawlSubPage`)}
isChecked={payload.crawl_sub_pages}
onChange={handleChange('crawl_sub_pages')}
/>
<div className='flex justify-between space-x-4'>
<Field
className='grow shrink-0'
label={t(`${I18N_PREFIX}.limit`)}
value={payload.limit}
onChange={handleChange('limit')}
isNumber
isRequired
/>
<Field
className='grow shrink-0'
label={t(`${I18N_PREFIX}.maxDepth`)}
value={payload.max_depth}
onChange={handleChange('max_depth')}
isNumber
tooltip={t(`${I18N_PREFIX}.maxDepthTooltip`)!}
/>
</div>
<div className='flex justify-between space-x-4'>
<Field
className='grow shrink-0'
label={t(`${I18N_PREFIX}.excludePaths`)}
value={payload.excludes}
onChange={handleChange('excludes')}
placeholder='blog/*, /about/*'
/>
<Field
className='grow shrink-0'
label={t(`${I18N_PREFIX}.includeOnlyPaths`)}
value={payload.includes}
onChange={handleChange('includes')}
placeholder='articles/*'
/>
</div>
<CheckboxWithLabel
label={t(`${I18N_PREFIX}.extractOnlyMainContent`)}
isChecked={payload.only_main_content}
onChange={handleChange('only_main_content')}
/>
</div>
)
}
export default React.memo(Options)

View File

@@ -0,0 +1,72 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import NoData from './no-data'
import Firecrawl from './firecrawl'
import { useModalContext } from '@/context/modal-context'
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
import { fetchFirecrawlApiKey } from '@/service/datasets'
import { type DataSourceWebsiteItem, WebsiteProvider } from '@/models/common'
type Props = {
onPreview: (payload: CrawlResultItem) => void
checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
}
const Website: FC<Props> = ({
onPreview,
checkedCrawlResult,
onCheckedCrawlResultChange,
onJobIdChange,
crawlOptions,
onCrawlOptionsChange,
}) => {
const { setShowAccountSettingModal } = useModalContext()
const [isLoaded, setIsLoaded] = useState(false)
const [isSetFirecrawlApiKey, setIsSetFirecrawlApiKey] = useState(false)
const checkSetApiKey = useCallback(async () => {
const res = await fetchFirecrawlApiKey() as any
const list = res.settings.filter((item: DataSourceWebsiteItem) => item.provider === WebsiteProvider.fireCrawl && !item.disabled)
setIsSetFirecrawlApiKey(list.length > 0)
}, [])
useEffect(() => {
checkSetApiKey().then(() => {
setIsLoaded(true)
})
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const handleOnConfig = useCallback(() => {
setShowAccountSettingModal({
payload: 'data-source',
onCancelCallback: checkSetApiKey,
})
}, [checkSetApiKey, setShowAccountSettingModal])
if (!isLoaded)
return null
return (
<div>
{isSetFirecrawlApiKey
? (
<Firecrawl
onPreview={onPreview}
checkedCrawlResult={checkedCrawlResult}
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
onJobIdChange={onJobIdChange}
crawlOptions={crawlOptions}
onCrawlOptionsChange={onCrawlOptionsChange}
/>
)
: (
<NoData onConfig={handleOnConfig} />
)}
</div>
)
}
export default React.memo(Website)

View File

@@ -0,0 +1,36 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others'
import Button from '@/app/components/base/button'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type Props = {
onConfig: () => void
}
const NoData: FC<Props> = ({
onConfig,
}) => {
const { t } = useTranslation()
return (
<div className='max-w-[640px] p-6 rounded-2xl bg-gray-50'>
<div className='flex w-11 h-11 items-center justify-center bg-gray-50 rounded-xl border-[0.5px] border-gray-100 shadow-lg'>
🔥
</div>
<div className='my-2'>
<span className='text-gray-700 font-semibold'>{t(`${I18N_PREFIX}.fireCrawlNotConfigured`)}<Icon3Dots className='inline relative -top-3 -left-1.5' /></span>
<div className='mt-1 pb-3 text-gray-500 text-[13px] font-normal'>
{t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`)}
</div>
</div>
<Button type='primary' onClick={onConfig} className='!h-8 text-[13px] font-medium ' >
{t(`${I18N_PREFIX}.configure`)}
</Button>
</div>
)
}
export default React.memo(NoData)

View File

@@ -0,0 +1,41 @@
'use client'
import React from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import { XMarkIcon } from '@heroicons/react/20/solid'
import s from '../file-preview/index.module.css'
import type { CrawlResultItem } from '@/models/datasets'
type IProps = {
payload: CrawlResultItem
hidePreview: () => void
}
const WebsitePreview = ({
payload,
hidePreview,
}: IProps) => {
const { t } = useTranslation()
return (
<div className={cn(s.filePreview)}>
<div className={cn(s.previewHeader)}>
<div className={cn(s.title)}>
<span>{t('datasetCreation.stepOne.pagePreview')}</span>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
</div>
</div>
<div className='leading-5 text-sm font-medium text-gray-900 break-words'>
{payload.title}
</div>
<div className='truncate leading-[18px] text-xs font-normal text-gray-500' title={payload.source_url}>{payload.source_url}</div>
</div>
<div className={cn(s.previewContent)}>
<div className={cn(s.fileContent)}>{payload.markdown}</div>
</div>
</div>
)
}
export default WebsitePreview