diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx index d9ab9d969..1aebec0b4 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx @@ -109,6 +109,9 @@ const AppDetailLayout: FC = (props) => { setAppDetail(res) setNavigation(getNavigations(appId, isCurrentWorkspaceManager, isCurrentWorkspaceEditor, res.mode)) } + }).catch((e: any) => { + if (e.status === 404) + router.replace('/apps') }) }, [appId, isCurrentWorkspaceManager, isCurrentWorkspaceEditor]) diff --git a/web/app/(commonLayout)/apps/Apps.tsx b/web/app/(commonLayout)/apps/Apps.tsx index ff79075fa..bf91d42fc 100644 --- a/web/app/(commonLayout)/apps/Apps.tsx +++ b/web/app/(commonLayout)/apps/Apps.tsx @@ -73,10 +73,10 @@ const Apps = () => { const anchorRef = useRef(null) const options = [ - { value: 'all', text: t('app.types.all'), icon: }, - { value: 'chat', text: t('app.types.chatbot'), icon: }, - { value: 'agent-chat', text: t('app.types.agent'), icon: }, - { value: 'workflow', text: t('app.types.workflow'), icon: }, + { value: 'all', text: t('app.types.all'), icon: }, + { value: 'chat', text: t('app.types.chatbot'), icon: }, + { value: 'agent-chat', text: t('app.types.agent'), icon: }, + { value: 'workflow', text: t('app.types.workflow'), icon: }, ] useEffect(() => { diff --git a/web/app/components/base/icons/assets/public/other/row-struct.svg b/web/app/components/base/icons/assets/public/other/row-struct.svg new file mode 100644 index 000000000..ba275ffee --- /dev/null +++ b/web/app/components/base/icons/assets/public/other/row-struct.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/web/app/components/base/icons/assets/vender/line/others/icon-3-dots.svg b/web/app/components/base/icons/assets/vender/line/others/icon-3-dots.svg new file mode 100644 index 000000000..bba42851f --- /dev/null +++ b/web/app/components/base/icons/assets/vender/line/others/icon-3-dots.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/web/app/components/base/icons/src/public/other/RowStruct.json b/web/app/components/base/icons/src/public/other/RowStruct.json new file mode 100644 index 000000000..0d1ef43f4 --- /dev/null +++ b/web/app/components/base/icons/src/public/other/RowStruct.json @@ -0,0 +1,56 @@ +{ + "icon": { + "type": "element", + "isRootNode": true, + "name": "svg", + "attributes": { + "width": "624", + "height": "48", + "viewBox": "0 0 624 48", + "fill": "none", + "xmlns": "http://www.w3.org/2000/svg" + }, + "children": [ + { + "type": "element", + "name": "rect", + "attributes": { + "x": "8", + "y": "7", + "width": "16", + "height": "16", + "rx": "5", + "fill": "#F2F4F7" + }, + "children": [] + }, + { + "type": "element", + "name": "rect", + "attributes": { + "x": "32", + "y": "10", + "width": "233", + "height": "10", + "rx": "3", + "fill": "#EAECF0" + }, + "children": [] + }, + { + "type": "element", + "name": "rect", + "attributes": { + "x": "32", + "y": "31", + "width": "345", + "height": "6", + "rx": "3", + "fill": "#F2F4F7" + }, + "children": [] + } + ] + }, + "name": "RowStruct" +} \ No newline at end of file diff --git a/web/app/components/base/icons/src/public/other/RowStruct.tsx b/web/app/components/base/icons/src/public/other/RowStruct.tsx new file mode 100644 index 000000000..ef5ab8c62 --- /dev/null +++ b/web/app/components/base/icons/src/public/other/RowStruct.tsx @@ -0,0 +1,16 @@ +// GENERATE BY script +// DON NOT EDIT IT MANUALLY + +import * as React from 'react' +import data from './RowStruct.json' +import IconBase from '@/app/components/base/icons/IconBase' +import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase' + +const Icon = React.forwardRef, Omit>(( + props, + ref, +) => ) + +Icon.displayName = 'RowStruct' + +export default Icon diff --git a/web/app/components/base/icons/src/public/other/index.ts b/web/app/components/base/icons/src/public/other/index.ts index adf723edb..257ba59b0 100644 --- a/web/app/components/base/icons/src/public/other/index.ts +++ b/web/app/components/base/icons/src/public/other/index.ts @@ -1,2 +1,3 @@ export { default as Icon3Dots } from './Icon3Dots' export { default as DefaultToolIcon } from './DefaultToolIcon' +export { default as RowStruct } from './RowStruct' diff --git a/web/app/components/base/icons/src/vender/line/others/Icon3Dots.json b/web/app/components/base/icons/src/vender/line/others/Icon3Dots.json new file mode 100644 index 000000000..0942222f3 --- /dev/null +++ b/web/app/components/base/icons/src/vender/line/others/Icon3Dots.json @@ -0,0 +1,39 @@ +{ + "icon": { + "type": "element", + "isRootNode": true, + "name": "svg", + "attributes": { + "width": "16", + "height": "16", + "viewBox": "0 0 16 16", + "fill": "none", + "xmlns": "http://www.w3.org/2000/svg" + }, + "children": [ + { + "type": "element", + "name": "g", + "attributes": { + "id": "Icon-3-dots" + }, + "children": [ + { + "type": "element", + "name": "path", + "attributes": { + "id": "Icon", + "d": "M5 6.5V5M8.93934 7.56066L10 6.5M10.0103 11.5H11.5103", + "stroke": "currentColor", + "stroke-width": "2", + "stroke-linecap": "round", + "stroke-linejoin": "round" + }, + "children": [] + } + ] + } + ] + }, + "name": "Icon3Dots" +} \ No newline at end of file diff --git a/web/app/components/base/icons/src/vender/line/others/Icon3Dots.tsx b/web/app/components/base/icons/src/vender/line/others/Icon3Dots.tsx new file mode 100644 index 000000000..1f9eb767a --- /dev/null +++ b/web/app/components/base/icons/src/vender/line/others/Icon3Dots.tsx @@ -0,0 +1,16 @@ +// GENERATE BY script +// DON NOT EDIT IT MANUALLY + +import * as React from 'react' +import data from './Icon3Dots.json' +import IconBase from '@/app/components/base/icons/IconBase' +import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase' + +const Icon = React.forwardRef, Omit>(( + props, + ref, +) => ) + +Icon.displayName = 'Icon3Dots' + +export default Icon diff --git a/web/app/components/base/icons/src/vender/line/others/index.ts b/web/app/components/base/icons/src/vender/line/others/index.ts index 648792f22..554f14b55 100644 --- a/web/app/components/base/icons/src/vender/line/others/index.ts +++ b/web/app/components/base/icons/src/vender/line/others/index.ts @@ -3,4 +3,5 @@ export { default as Colors } from './Colors' export { default as DragHandle } from './DragHandle' export { default as Exchange02 } from './Exchange02' export { default as FileCode } from './FileCode' +export { default as Icon3Dots } from './Icon3Dots' export { default as Tools } from './Tools' diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx index da5de3eb2..12c6284d8 100644 --- a/web/app/components/datasets/create/index.tsx +++ b/web/app/components/datasets/create/index.tsx @@ -8,7 +8,7 @@ import StepOne from './step-one' import StepTwo from './step-two' import StepThree from './step-three' import { DataSourceType } from '@/models/datasets' -import type { DataSet, FileItem, createDocumentResponse } from '@/models/datasets' +import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets' import { fetchDataSource } from '@/service/common' import { fetchDatasetDetail } from '@/service/datasets' import type { NotionPage } from '@/models/common' @@ -19,6 +19,15 @@ type DatasetUpdateFormProps = { datasetId?: string } +const DEFAULT_CRAWL_OPTIONS: CrawlOptions = { + crawl_sub_pages: true, + only_main_content: true, + includes: '', + excludes: '', + limit: 10, + max_depth: '', +} + const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { const { t } = useTranslation() const { setShowAccountSettingModal } = useModalContext() @@ -36,9 +45,13 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { setNotionPages(value) } + const [websitePages, setWebsitePages] = useState([]) + const [crawlOptions, setCrawlOptions] = useState(DEFAULT_CRAWL_OPTIONS) + const updateFileList = (preparedFiles: FileItem[]) => { setFiles(preparedFiles) } + const [fireCrawlJobId, setFireCrawlJobId] = useState('') const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => { const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID) @@ -108,20 +121,27 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
- {step === 1 && setShowAccountSettingModal({ payload: 'data-source' })} - datasetId={datasetId} - dataSourceType={dataSourceType} - dataSourceTypeDisable={!!detail?.data_source_type} - changeType={setDataSourceType} - files={fileList} - updateFile={updateFile} - updateFileList={updateFileList} - notionPages={notionPages} - updateNotionPages={updateNotionPages} - onStepChange={nextStep} - />} +
+ setShowAccountSettingModal({ payload: 'data-source' })} + datasetId={datasetId} + dataSourceType={dataSourceType} + dataSourceTypeDisable={!!detail?.data_source_type} + changeType={setDataSourceType} + files={fileList} + updateFile={updateFile} + updateFileList={updateFileList} + notionPages={notionPages} + updateNotionPages={updateNotionPages} + onStepChange={nextStep} + websitePages={websitePages} + updateWebsitePages={setWebsitePages} + onFireCrawlJobIdChange={setFireCrawlJobId} + crawlOptions={crawlOptions} + onCrawlOptionsChange={setCrawlOptions} + /> +
{(step === 2 && (!datasetId || (datasetId && !!detail))) && setShowAccountSettingModal({ payload: 'provider' })} @@ -130,9 +150,12 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { dataSourceType={dataSourceType} files={fileList.map(file => file.file)} notionPages={notionPages} + websitePages={websitePages} + fireCrawlJobId={fireCrawlJobId} onStepChange={changeStep} updateIndexingTypeCache={updateIndexingTypeCache} updateResultCache={updateResultCache} + crawlOptions={crawlOptions} />} {step === 3 && void onStepChange: () => void changeType: (type: DataSourceType) => void + websitePages?: CrawlResultItem[] + updateWebsitePages: (value: CrawlResultItem[]) => void + onFireCrawlJobIdChange: (jobId: string) => void + crawlOptions: CrawlOptions + onCrawlOptionsChange: (payload: CrawlOptions) => void } type NotionConnectorProps = { @@ -49,7 +56,7 @@ export const NotionConnector = ({ onSetting }: NotionConnectorProps) => { const StepOne = ({ datasetId, - dataSourceType, + dataSourceType: inCreatePageDataSourceType, dataSourceTypeDisable, changeType, hasConnection, @@ -60,11 +67,17 @@ const StepOne = ({ updateFile, notionPages = [], updateNotionPages, + websitePages = [], + updateWebsitePages, + onFireCrawlJobIdChange, + crawlOptions, + onCrawlOptionsChange, }: IStepOneProps) => { const { dataset } = useDatasetDetailContext() const [showModal, setShowModal] = useState(false) const [currentFile, setCurrentFile] = useState() const [currentNotionPage, setCurrentNotionPage] = useState() + const [currentWebsite, setCurrentWebsite] = useState() const { t } = useTranslation() const modalShowHandle = () => setShowModal(true) @@ -85,8 +98,13 @@ const StepOne = ({ setCurrentNotionPage(undefined) } - const shouldShowDataSourceTypeList = !datasetId || (datasetId && !dataset?.data_source_type) + const hideWebsitePreview = () => { + setCurrentWebsite(undefined) + } + const shouldShowDataSourceTypeList = !datasetId || (datasetId && !dataset?.data_source_type) + const isInCreatePage = shouldShowDataSourceTypeList + const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : dataset?.data_source_type const { plan, enableBilling } = useProviderContext() const allFileLoaded = (files.length > 0 && files.every(file => file.file.id)) const hasNotin = notionPages.length > 0 @@ -150,10 +168,13 @@ const StepOne = ({ {t('datasetCreation.stepOne.dataSourceType.notion')}
changeType(DataSourceType.WEB)} + className={cn( + s.dataSourceItem, + dataSourceType === DataSourceType.WEB && s.active, + dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled, + )} + onClick={() => changeType(DataSourceType.WEB)} > - Coming soon {t('datasetCreation.stepOne.dataSourceType.web')}
@@ -201,6 +222,26 @@ const StepOne = ({ )} )} + {dataSourceType === DataSourceType.WEB && ( + <> +
+ +
+ {isShowVectorSpaceFull && ( +
+ +
+ )} + + + )} {!datasetId && ( <>
@@ -212,6 +253,7 @@ const StepOne = ({
{currentFile && } {currentNotionPage && } + {currentWebsite && } ) } diff --git a/web/app/components/datasets/create/step-two/index.module.css b/web/app/components/datasets/create/step-two/index.module.css index b5089b5fa..24a62c8e3 100644 --- a/web/app/components/datasets/create/step-two/index.module.css +++ b/web/app/components/datasets/create/step-two/index.module.css @@ -323,6 +323,7 @@ } .sourceContent { + width: 0; flex: 1 1 auto; } diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 988b4798a..ad7bab978 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -12,7 +12,7 @@ import RetrievalMethodInfo from '../../common/retrieval-method-info' import PreviewItem, { PreviewType } from './preview-item' import LanguageSelect from './language-select' import s from './index.module.css' -import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import { createDocument, createFirstDocument, @@ -44,6 +44,7 @@ import TooltipPlus from '@/app/components/base/tooltip-plus' import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { LanguagesSupported } from '@/i18n/language' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' +import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel' type ValueOf = T[keyof T] type StepTwoProps = { @@ -56,6 +57,9 @@ type StepTwoProps = { dataSourceType: DataSourceType files: CustomFile[] notionPages?: NotionPage[] + websitePages?: CrawlResultItem[] + crawlOptions?: CrawlOptions + fireCrawlJobId?: string onStepChange?: (delta: number) => void updateIndexingTypeCache?: (type: string) => void updateResultCache?: (res: createDocumentResponse) => void @@ -79,9 +83,12 @@ const StepTwo = ({ onSetting, datasetId, indexingType, - dataSourceType, + dataSourceType: inCreatePageDataSourceType, files, notionPages = [], + websitePages = [], + crawlOptions, + fireCrawlJobId = '', onStepChange, updateIndexingTypeCache, updateResultCache, @@ -94,6 +101,8 @@ const StepTwo = ({ const isMobile = media === MediaType.mobile const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext() + const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) + const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type const scrollRef = useRef(null) const [scrolled, setScrolled] = useState(false) const previewScrollRef = useRef(null) @@ -242,6 +251,15 @@ const StepTwo = ({ }) as NotionInfo[] } + const getWebsiteInfo = () => { + return { + provider: 'firecrawl', + job_id: fireCrawlJobId, + urls: websitePages.map(page => page.source_url), + only_main_content: crawlOptions?.only_main_content, + } + } + const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => { if (dataSourceType === DataSourceType.FILE) { return { @@ -271,6 +289,19 @@ const StepTwo = ({ dataset_id: datasetId as string, } } + if (dataSourceType === DataSourceType.WEB) { + return { + info_list: { + data_source_type: dataSourceType, + website_info_list: getWebsiteInfo(), + }, + indexing_technique: getIndexing_technique() as string, + process_rule: getProcessRule(), + doc_form: docForm, + doc_language: docLanguage, + dataset_id: datasetId as string, + } + } } const { modelList: rerankModelList, @@ -335,6 +366,9 @@ const StepTwo = ({ } if (dataSourceType === DataSourceType.NOTION) params.data_source.info_list.notion_info_list = getNotionInfo() + + if (dataSourceType === DataSourceType.WEB) + params.data_source.info_list.website_info_list = getWebsiteInfo() } return params } @@ -819,6 +853,22 @@ const StepTwo = ({ )} + {dataSourceType === DataSourceType.WEB && ( + <> +
{t('datasetCreation.stepTwo.websiteSource')}
+
+ + {websitePages[0].source_url} + {websitePages.length > 1 && ( + + {t('datasetCreation.stepTwo.other')} + {websitePages.length - 1} + {t('datasetCreation.stepTwo.webpageUnit')} + + )} +
+ + )}
diff --git a/web/app/components/datasets/create/website/firecrawl/base/checkbox-with-label.tsx b/web/app/components/datasets/create/website/firecrawl/base/checkbox-with-label.tsx new file mode 100644 index 000000000..ed5d2efd5 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/base/checkbox-with-label.tsx @@ -0,0 +1,29 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import cn from 'classnames' +import Checkbox from '@/app/components/base/checkbox' + +type Props = { + className?: string + isChecked: boolean + onChange: (isChecked: boolean) => void + label: string + labelClassName?: string +} + +const CheckboxWithLabel: FC = ({ + className = '', + isChecked, + onChange, + label, + labelClassName, +}) => { + return ( + + ) +} +export default React.memo(CheckboxWithLabel) diff --git a/web/app/components/datasets/create/website/firecrawl/base/error-message.tsx b/web/app/components/datasets/create/website/firecrawl/base/error-message.tsx new file mode 100644 index 000000000..3af234e09 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/base/error-message.tsx @@ -0,0 +1,30 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import cn from 'classnames' +import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback' + +type Props = { + className?: string + title: string + errorMsg?: string +} + +const ErrorMessage: FC = ({ + className, + title, + errorMsg, +}) => { + return ( +
+
+ +
{title}
+
+ {errorMsg && ( +
{errorMsg}
+ )} +
+ ) +} +export default React.memo(ErrorMessage) diff --git a/web/app/components/datasets/create/website/firecrawl/base/field.tsx b/web/app/components/datasets/create/website/firecrawl/base/field.tsx new file mode 100644 index 000000000..6d31b2a4d --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/base/field.tsx @@ -0,0 +1,54 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import cn from 'classnames' +import Input from './input' +import TooltipPlus from '@/app/components/base/tooltip-plus' +import { HelpCircle } from '@/app/components/base/icons/src/vender/line/general' + +type Props = { + className?: string + label: string + labelClassName?: string + value: string | number + onChange: (value: string | number) => void + isRequired?: boolean + placeholder?: string + isNumber?: boolean + tooltip?: string +} + +const Field: FC = ({ + className, + label, + labelClassName, + value, + onChange, + isRequired = false, + placeholder = '', + isNumber = false, + tooltip, +}) => { + return ( +
+
+
{label}
+ {isRequired && *} + {tooltip && ( + {tooltip}
+ }> + + + )} +
+ +
+ ) +} +export default React.memo(Field) diff --git a/web/app/components/datasets/create/website/firecrawl/base/input.tsx b/web/app/components/datasets/create/website/firecrawl/base/input.tsx new file mode 100644 index 000000000..06249f57e --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/base/input.tsx @@ -0,0 +1,58 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' + +type Props = { + value: string | number + onChange: (value: string | number) => void + placeholder?: string + isNumber?: boolean +} + +const MIN_VALUE = 1 + +const Input: FC = ({ + value, + onChange, + placeholder = '', + isNumber = false, +}) => { + const handleChange = useCallback((e: React.ChangeEvent) => { + const value = e.target.value + if (isNumber) { + let numberValue = parseInt(value, 10) // integer only + if (isNaN(numberValue)) { + onChange('') + return + } + if (numberValue < MIN_VALUE) + numberValue = MIN_VALUE + + onChange(numberValue) + return + } + onChange(value) + }, [isNumber, onChange]) + + const otherOption = (() => { + if (isNumber) { + return { + min: MIN_VALUE, + } + } + return { + + } + })() + return ( + + ) +} +export default React.memo(Input) diff --git a/web/app/components/datasets/create/website/firecrawl/base/options-wrap.tsx b/web/app/components/datasets/create/website/firecrawl/base/options-wrap.tsx new file mode 100644 index 000000000..ca58fe6ca --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/base/options-wrap.tsx @@ -0,0 +1,55 @@ +'use client' +import { useBoolean } from 'ahooks' +import type { FC } from 'react' +import React, { useEffect } from 'react' +import { useTranslation } from 'react-i18next' +import cn from 'classnames' +import { Settings04 } from '@/app/components/base/icons/src/vender/line/general' +import { ChevronRight } from '@/app/components/base/icons/src/vender/line/arrows' +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + className?: string + children: React.ReactNode + controlFoldOptions?: number +} + +const OptionsWrap: FC = ({ + className = '', + children, + controlFoldOptions, +}) => { + const { t } = useTranslation() + + const [fold, { + toggle: foldToggle, + setTrue: foldHide, + }] = useBoolean(false) + + useEffect(() => { + if (controlFoldOptions) + foldHide() + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [controlFoldOptions]) + return ( +
+
+
+ +
{t(`${I18N_PREFIX}.options`)}
+
+ +
+ {!fold && ( +
+ {children} +
+ )} + +
+ ) +} +export default React.memo(OptionsWrap) diff --git a/web/app/components/datasets/create/website/firecrawl/base/url-input.tsx b/web/app/components/datasets/create/website/firecrawl/base/url-input.tsx new file mode 100644 index 000000000..2240a378d --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/base/url-input.tsx @@ -0,0 +1,48 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback, useState } from 'react' +import { useTranslation } from 'react-i18next' +import Input from './input' +import Button from '@/app/components/base/button' + +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + isRunning: boolean + onRun: (url: string) => void +} + +const UrlInput: FC = ({ + isRunning, + onRun, +}) => { + const { t } = useTranslation() + const [url, setUrl] = useState('') + const handleUrlChange = useCallback((url: string | number) => { + setUrl(url as string) + }, []) + const handleOnRun = useCallback(() => { + if (isRunning) + return + onRun(url) + }, [isRunning, onRun, url]) + + return ( +
+ + +
+ ) +} +export default React.memo(UrlInput) diff --git a/web/app/components/datasets/create/website/firecrawl/crawled-result-item.tsx b/web/app/components/datasets/create/website/firecrawl/crawled-result-item.tsx new file mode 100644 index 000000000..1730314b4 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/crawled-result-item.tsx @@ -0,0 +1,40 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import { useTranslation } from 'react-i18next' +import cn from 'classnames' +import type { CrawlResultItem as CrawlResultItemType } from '@/models/datasets' +import Checkbox from '@/app/components/base/checkbox' + +type Props = { + payload: CrawlResultItemType + isChecked: boolean + isPreview: boolean + onCheckChange: (checked: boolean) => void + onPreview: () => void +} + +const CrawledResultItem: FC = ({ + isPreview, + payload, + isChecked, + onCheckChange, + onPreview, +}) => { + const { t } = useTranslation() + + const handleCheckChange = useCallback(() => { + onCheckChange(!isChecked) + }, [isChecked, onCheckChange]) + return ( +
+
+ +
{payload.title}
+
{t('datasetCreation.stepOne.website.preview')}
+
+
{payload.source_url}
+
+ ) +} +export default React.memo(CrawledResultItem) diff --git a/web/app/components/datasets/create/website/firecrawl/crawled-result.tsx b/web/app/components/datasets/create/website/firecrawl/crawled-result.tsx new file mode 100644 index 000000000..ebda7952d --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/crawled-result.tsx @@ -0,0 +1,87 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import { useTranslation } from 'react-i18next' +import cn from 'classnames' +import CheckboxWithLabel from './base/checkbox-with-label' +import CrawledResultItem from './crawled-result-item' +import type { CrawlResultItem } from '@/models/datasets' + +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + className?: string + list: CrawlResultItem[] + checkedList: CrawlResultItem[] + onSelectedChange: (selected: CrawlResultItem[]) => void + onPreview: (payload: CrawlResultItem) => void + usedTime: number +} + +const CrawledResult: FC = ({ + className = '', + list, + checkedList, + onSelectedChange, + onPreview, + usedTime, +}) => { + const { t } = useTranslation() + + const isCheckAll = checkedList.length === list.length + + const handleCheckedAll = useCallback(() => { + if (!isCheckAll) + onSelectedChange(list) + + else + onSelectedChange([]) + }, [isCheckAll, list, onSelectedChange]) + + const handleItemCheckChange = useCallback((item: CrawlResultItem) => { + return (checked: boolean) => { + if (checked) + onSelectedChange([...checkedList, item]) + + else + onSelectedChange(checkedList.filter(checkedItem => checkedItem.source_url !== item.source_url)) + } + }, [checkedList, onSelectedChange]) + + const [previewIndex, setPreviewIndex] = React.useState(-1) + const handlePreview = useCallback((index: number) => { + return () => { + setPreviewIndex(index) + onPreview(list[index]) + } + }, [list, onPreview]) + + return ( +
+
+ +
{t(`${I18N_PREFIX}.scrapTimeInfo`, { + total: list.length, + time: usedTime.toFixed(1), + })}
+
+
+ {list.map((item, index) => ( + checkedItem.source_url === item.source_url)} + onCheckChange={handleItemCheckChange(item)} + /> + ))} +
+
+ ) +} +export default React.memo(CrawledResult) diff --git a/web/app/components/datasets/create/website/firecrawl/crawling.tsx b/web/app/components/datasets/create/website/firecrawl/crawling.tsx new file mode 100644 index 000000000..97b2b01d2 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/crawling.tsx @@ -0,0 +1,37 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import cn from 'classnames' +import { useTranslation } from 'react-i18next' +import { RowStruct } from '@/app/components/base/icons/src/public/other' + +type Props = { + className?: string + crawledNum: number + totalNum: number +} + +const Crawling: FC = ({ + className = '', + crawledNum, + totalNum, +}) => { + const { t } = useTranslation() + + return ( +
+
+ {t('datasetCreation.stepOne.website.totalPageScraped')} {crawledNum}/{totalNum} +
+ +
+ {['', '', '', ''].map((item, index) => ( +
+ +
+ ))} +
+
+ ) +} +export default React.memo(Crawling) diff --git a/web/app/components/datasets/create/website/firecrawl/header.tsx b/web/app/components/datasets/create/website/firecrawl/header.tsx new file mode 100644 index 000000000..c06b81612 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/header.tsx @@ -0,0 +1,42 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import { Settings01 } from '@/app/components/base/icons/src/vender/line/general' +import { BookOpen01 } from '@/app/components/base/icons/src/vender/line/education' + +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + onSetting: () => void +} + +const Header: FC = ({ + onSetting, +}) => { + const { t } = useTranslation() + + return ( +
+
+
{t(`${I18N_PREFIX}.firecrawlTitle`)}
+
+
+ +
+
+ + + {t(`${I18N_PREFIX}.firecrawlDoc`)} + +
+ ) +} +export default React.memo(Header) diff --git a/web/app/components/datasets/create/website/firecrawl/index.tsx b/web/app/components/datasets/create/website/firecrawl/index.tsx new file mode 100644 index 000000000..bdd99b6dc --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/index.tsx @@ -0,0 +1,216 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback, useEffect, useState } from 'react' +import { useTranslation } from 'react-i18next' +import cn from 'classnames' +import Header from './header' +import UrlInput from './base/url-input' +import OptionsWrap from './base/options-wrap' +import Options from './options' +import CrawledResult from './crawled-result' +import Crawling from './crawling' +import ErrorMessage from './base/error-message' +import { useModalContext } from '@/context/modal-context' +import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' +import Toast from '@/app/components/base/toast' +import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets' +import { sleep } from '@/utils' + +const ERROR_I18N_PREFIX = 'common.errorMsg' +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + onPreview: (payload: CrawlResultItem) => void + checkedCrawlResult: CrawlResultItem[] + onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void + onJobIdChange: (jobId: string) => void + crawlOptions: CrawlOptions + onCrawlOptionsChange: (payload: CrawlOptions) => void +} + +enum Step { + init = 'init', + running = 'running', + finished = 'finished', +} + +const FireCrawl: FC = ({ + onPreview, + checkedCrawlResult, + onCheckedCrawlResultChange, + onJobIdChange, + crawlOptions, + onCrawlOptionsChange, +}) => { + const { t } = useTranslation() + const [step, setStep] = useState(Step.init) + const [controlFoldOptions, setControlFoldOptions] = useState(0) + useEffect(() => { + if (step !== Step.init) + setControlFoldOptions(Date.now()) + }, [step]) + const { setShowAccountSettingModal } = useModalContext() + const handleSetting = useCallback(() => { + setShowAccountSettingModal({ + payload: 'data-source', + }) + }, [setShowAccountSettingModal]) + + const checkValid = useCallback((url: string) => { + let errorMsg = '' + if (!url) { + errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, { + field: 'url', + }) + } + + if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://')))) + errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`) + + if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) { + errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, { + field: t(`${I18N_PREFIX}.limit`), + }) + } + + return { + isValid: !errorMsg, + errorMsg, + } + }, [crawlOptions, t]) + + const isInit = step === Step.init + const isCrawlFinished = step === Step.finished + const isRunning = step === Step.running + const [crawlResult, setCrawlResult] = useState<{ + current: number + total: number + data: CrawlResultItem[] + time_consuming: number | string + } | undefined>(undefined) + const [crawlErrorMessage, setCrawlErrorMessage] = useState('') + const showError = isCrawlFinished && crawlErrorMessage + + const waitForCrawlFinished = useCallback(async (jobId: string) => { + try { + const res = await checkFirecrawlTaskStatus(jobId) as any + if (res.status === 'completed') { + return { + isError: false, + data: { + ...res, + total: Math.min(res.total, parseFloat(crawlOptions.limit as string)), + }, + } + } + if (res.status === 'error' || !res.status) { + // can't get the error message from the firecrawl api + return { + isError: true, + errorMessage: res.message, + data: { + data: [], + }, + } + } + // update the progress + setCrawlResult({ + ...res, + total: Math.min(res.total, parseFloat(crawlOptions.limit as string)), + }) + await sleep(2500) + return await waitForCrawlFinished(jobId) + } + catch (e: any) { + const errorBody = await e.json() + return { + isError: true, + errorMessage: errorBody.message, + data: { + data: [], + }, + } + } + }, [crawlOptions.limit]) + + const handleRun = useCallback(async (url: string) => { + const { isValid, errorMsg } = checkValid(url) + if (!isValid) { + Toast.notify({ + message: errorMsg!, + type: 'error', + }) + return + } + setStep(Step.running) + try { + const passToServerCrawlOptions: any = { + ...crawlOptions, + } + if (crawlOptions.max_depth === '') + delete passToServerCrawlOptions.max_depth + + const res = await createFirecrawlTask({ + url, + options: passToServerCrawlOptions, + }) as any + const jobId = res.job_id + onJobIdChange(jobId) + const { isError, data, errorMessage } = await waitForCrawlFinished(jobId) + if (isError) { + setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`)) + } + else { + setCrawlResult(data) + setCrawlErrorMessage('') + } + } + catch (e) { + setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!) + console.log(e) + } + finally { + setStep(Step.finished) + } + }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished]) + + return ( +
+
+
+ + + + + + {!isInit && ( +
+ {isRunning + && } + {showError && ( + + )} + {isCrawlFinished && !showError + && + } +
+ )} +
+
+ ) +} +export default React.memo(FireCrawl) diff --git a/web/app/components/datasets/create/website/firecrawl/mock-crawl-result.ts b/web/app/components/datasets/create/website/firecrawl/mock-crawl-result.ts new file mode 100644 index 000000000..8fd5e6636 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/mock-crawl-result.ts @@ -0,0 +1,24 @@ +import type { CrawlResultItem } from '@/models/datasets' + +const result: CrawlResultItem[] = [ + { + title: 'Start the frontend Docker container separately', + markdown: 'Markdown 1', + description: 'Description 1', + source_url: 'https://example.com/1', + }, + { + title: 'Advanced Tool Integration', + markdown: 'Markdown 2', + description: 'Description 2', + source_url: 'https://example.com/2', + }, + { + title: 'Local Source Code Start | English | Dify', + markdown: 'Markdown 3', + description: 'Description 3', + source_url: 'https://example.com/3', + }, +] + +export default result diff --git a/web/app/components/datasets/create/website/firecrawl/options.tsx b/web/app/components/datasets/create/website/firecrawl/options.tsx new file mode 100644 index 000000000..a06671105 --- /dev/null +++ b/web/app/components/datasets/create/website/firecrawl/options.tsx @@ -0,0 +1,83 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import cn from 'classnames' +import { useTranslation } from 'react-i18next' +import CheckboxWithLabel from './base/checkbox-with-label' +import Field from './base/field' +import type { CrawlOptions } from '@/models/datasets' + +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + className?: string + payload: CrawlOptions + onChange: (payload: CrawlOptions) => void +} + +const Options: FC = ({ + className = '', + payload, + onChange, +}) => { + const { t } = useTranslation() + + const handleChange = useCallback((key: keyof CrawlOptions) => { + return (value: any) => { + onChange({ + ...payload, + [key]: value, + }) + } + }, [payload, onChange]) + return ( +
+ +
+ + +
+ +
+ + +
+ +
+ ) +} +export default React.memo(Options) diff --git a/web/app/components/datasets/create/website/index.tsx b/web/app/components/datasets/create/website/index.tsx new file mode 100644 index 000000000..14ac40163 --- /dev/null +++ b/web/app/components/datasets/create/website/index.tsx @@ -0,0 +1,72 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback, useEffect, useState } from 'react' +import NoData from './no-data' +import Firecrawl from './firecrawl' +import { useModalContext } from '@/context/modal-context' +import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' +import { fetchFirecrawlApiKey } from '@/service/datasets' +import { type DataSourceWebsiteItem, WebsiteProvider } from '@/models/common' + +type Props = { + onPreview: (payload: CrawlResultItem) => void + checkedCrawlResult: CrawlResultItem[] + onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void + onJobIdChange: (jobId: string) => void + crawlOptions: CrawlOptions + onCrawlOptionsChange: (payload: CrawlOptions) => void +} + +const Website: FC = ({ + onPreview, + checkedCrawlResult, + onCheckedCrawlResultChange, + onJobIdChange, + crawlOptions, + onCrawlOptionsChange, +}) => { + const { setShowAccountSettingModal } = useModalContext() + const [isLoaded, setIsLoaded] = useState(false) + const [isSetFirecrawlApiKey, setIsSetFirecrawlApiKey] = useState(false) + const checkSetApiKey = useCallback(async () => { + const res = await fetchFirecrawlApiKey() as any + const list = res.settings.filter((item: DataSourceWebsiteItem) => item.provider === WebsiteProvider.fireCrawl && !item.disabled) + setIsSetFirecrawlApiKey(list.length > 0) + }, []) + + useEffect(() => { + checkSetApiKey().then(() => { + setIsLoaded(true) + }) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) + const handleOnConfig = useCallback(() => { + setShowAccountSettingModal({ + payload: 'data-source', + onCancelCallback: checkSetApiKey, + }) + }, [checkSetApiKey, setShowAccountSettingModal]) + + if (!isLoaded) + return null + + return ( +
+ {isSetFirecrawlApiKey + ? ( + + ) + : ( + + )} +
+ ) +} +export default React.memo(Website) diff --git a/web/app/components/datasets/create/website/no-data.tsx b/web/app/components/datasets/create/website/no-data.tsx new file mode 100644 index 000000000..35eb35f70 --- /dev/null +++ b/web/app/components/datasets/create/website/no-data.tsx @@ -0,0 +1,36 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' +import Button from '@/app/components/base/button' + +const I18N_PREFIX = 'datasetCreation.stepOne.website' + +type Props = { + onConfig: () => void +} + +const NoData: FC = ({ + onConfig, +}) => { + const { t } = useTranslation() + + return ( +
+
+ 🔥 +
+
+ {t(`${I18N_PREFIX}.fireCrawlNotConfigured`)} +
+ {t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`)} +
+
+ +
+ ) +} +export default React.memo(NoData) diff --git a/web/app/components/datasets/create/website/preview.tsx b/web/app/components/datasets/create/website/preview.tsx new file mode 100644 index 000000000..322ce43b1 --- /dev/null +++ b/web/app/components/datasets/create/website/preview.tsx @@ -0,0 +1,41 @@ +'use client' +import React from 'react' +import { useTranslation } from 'react-i18next' +import cn from 'classnames' +import { XMarkIcon } from '@heroicons/react/20/solid' +import s from '../file-preview/index.module.css' +import type { CrawlResultItem } from '@/models/datasets' + +type IProps = { + payload: CrawlResultItem + hidePreview: () => void +} + +const WebsitePreview = ({ + payload, + hidePreview, +}: IProps) => { + const { t } = useTranslation() + + return ( +
+
+
+ {t('datasetCreation.stepOne.pagePreview')} +
+ +
+
+
+ {payload.title} +
+
{payload.source_url}
+
+
+
{payload.markdown}
+
+
+ ) +} + +export default WebsitePreview diff --git a/web/app/components/datasets/documents/detail/settings/index.tsx b/web/app/components/datasets/documents/detail/settings/index.tsx index cab0c5d40..def00ad37 100644 --- a/web/app/components/datasets/documents/detail/settings/index.tsx +++ b/web/app/components/datasets/documents/detail/settings/index.tsx @@ -73,6 +73,16 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { datasetId={datasetId} dataSourceType={documentDetail.data_source_type} notionPages={[currentPage]} + websitePages={[ + { + title: documentDetail.name, + source_url: documentDetail.data_source_info?.url, + markdown: '', + description: '', + }, + ]} + fireCrawlJobId={documentDetail.data_source_info?.job_id} + crawlOptions={documentDetail.data_source_info} indexingType={indexingTechnique || ''} isSetting documentDetail={documentDetail} diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx index cf62d8852..192c81df7 100644 --- a/web/app/components/datasets/documents/index.tsx +++ b/web/app/components/datasets/documents/index.tsx @@ -83,6 +83,8 @@ const Documents: FC = ({ datasetId }) => { const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false) const [timerCanRun, setTimerCanRun] = useState(true) const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION + const isDataSourceWeb = dataset?.data_source_type === DataSourceType.WEB + const isDataSourceFile = dataset?.data_source_type === DataSourceType.FILE const embeddingAvailable = !!dataset?.embedding_available const query = useMemo(() => { @@ -211,7 +213,8 @@ const Documents: FC = ({ datasetId }) => { )}
diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 4566287fb..732f2a53d 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -13,6 +13,7 @@ import cn from 'classnames' import dayjs from 'dayjs' import { Edit03 } from '../../base/icons/src/vender/solid/general' import TooltipPlus from '../../base/tooltip-plus' +import { Globe01 } from '../../base/icons/src/vender/line/mapsAndTravel' import s from './style.module.css' import RenameModal from './rename-modal' import Switch from '@/app/components/base/switch' @@ -26,7 +27,7 @@ import type { IndicatorProps } from '@/app/components/header/indicator' import Indicator from '@/app/components/header/indicator' import { asyncRunSafe } from '@/utils' import { formatNumber } from '@/utils/format' -import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, unArchiveDocument } from '@/service/datasets' +import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets' import NotionIcon from '@/app/components/base/notion-icon' import ProgressBar from '@/app/components/base/progress-bar' import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' @@ -146,7 +147,12 @@ export const OperationAction: FC<{ opApi = disableDocument break case 'sync': - opApi = syncDocument + if (data_source_type === 'notion_import') + opApi = syncDocument + + else + opApi = syncWebsite + break default: opApi = deleteDocument @@ -249,7 +255,7 @@ export const OperationAction: FC<{ {t('datasetDocuments.list.action.settings')} - {data_source_type === 'notion_import' && ( + {['notion_import', DataSourceType.WEB].includes(data_source_type) && (
onOperate('sync')}> {t('datasetDocuments.list.action.sync')} @@ -282,7 +288,7 @@ export const OperationAction: FC<{
} btnClassName={open => cn(isListScene ? s.actionIconWrapperList : s.actionIconWrapperDetail, open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')} - className={`!w-[200px] h-fit !z-20 ${className}`} + className={`flex justify-end !w-[200px] h-fit !z-20 ${className}`} /> )} {showModal && setShowModal(false)} className={s.delModal} closable> @@ -418,10 +424,10 @@ const DocumentList: FC = ({ embeddingAvailable, documents =
- { - doc?.data_source_type === DataSourceType.NOTION - ? - :
+ {doc?.data_source_type === DataSourceType.NOTION && + } + {doc?.data_source_type === DataSourceType.FILE &&
} + {doc?.data_source_type === DataSourceType.WEB && } { doc.name diff --git a/web/app/components/header/account-setting/data-source-page/data-source-notion/index.tsx b/web/app/components/header/account-setting/data-source-page/data-source-notion/index.tsx index 8f7242145..f5541999a 100644 --- a/web/app/components/header/account-setting/data-source-page/data-source-notion/index.tsx +++ b/web/app/components/header/account-setting/data-source-page/data-source-notion/index.tsx @@ -1,23 +1,34 @@ -import { useEffect, useState } from 'react' +'use client' +import type { FC } from 'react' +import React, { useEffect, useState } from 'react' import useSWR from 'swr' -import { useTranslation } from 'react-i18next' -import { PlusIcon } from '@heroicons/react/24/solid' -import cn from 'classnames' -import Indicator from '../../../indicator' -import Operate from './operate' -import s from './style.module.css' -import NotionIcon from '@/app/components/base/notion-icon' +import Panel from '../panel' +import { DataSourceType } from '../panel/types' import type { DataSourceNotion as TDataSourceNotion } from '@/models/common' import { useAppContext } from '@/context/app-context' import { fetchNotionConnection } from '@/service/common' +import NotionIcon from '@/app/components/base/notion-icon' -type DataSourceNotionProps = { +const Icon: FC<{ + src: string + name: string + className: string +}> = ({ src, name, className }) => { + return ( + + ) +} +type Props = { workspaces: TDataSourceNotion[] } -const DataSourceNotion = ({ + +const DataSourceNotion: FC = ({ workspaces, -}: DataSourceNotionProps) => { - const { t } = useTranslation() +}) => { const { isCurrentWorkspaceManager } = useAppContext() const [canConnectNotion, setCanConnectNotion] = useState(false) const { data } = useSWR(canConnectNotion ? '/oauth/data-source/notion' : null, fetchNotionConnection) @@ -42,95 +53,32 @@ const DataSourceNotion = ({ if (data?.data) window.location.href = data.data }, [data]) - return ( -
-
-
-
-
- {t('common.dataSource.notion.title')} -
- { - !connected && ( -
- {t('common.dataSource.notion.description')} -
- ) - } -
- { - connected - ? ( -
- {t('common.dataSource.connect')} -
- ) - : ( -
- - {t('common.dataSource.notion.addWorkspace')} -
- ) - } -
- { - connected && ( -
-
- {t('common.dataSource.notion.connectedWorkspace')} -
-
-
- ) - } - { - connected && ( -
- { - workspaces.map(workspace => ( -
- -
{workspace.source_info.workspace_name}
- { - workspace.is_bound - ? - : - } -
- { - workspace.is_bound - ? t('common.dataSource.notion.connected') - : t('common.dataSource.notion.disconnected') - } -
-
- -
- )) - } -
- ) - } -
+ ({ + id: workspace.id, + logo: ({ className }: { className: string }) => ( + ), + name: workspace.source_info.workspace_name, + isActive: workspace.is_bound, + notionConfig: { + total: workspace.source_info.total || 0, + }, + }))} + onRemove={() => { }} // handled in operation/index.tsx + notionActions={{ + onChangeAuthorizedPage: handleAuthAgain, + }} + /> ) } - -export default DataSourceNotion +export default React.memo(DataSourceNotion) diff --git a/web/app/components/header/account-setting/data-source-page/data-source-notion/operate/index.tsx b/web/app/components/header/account-setting/data-source-page/data-source-notion/operate/index.tsx index e115034ff..7b20e5e0a 100644 --- a/web/app/components/header/account-setting/data-source-page/data-source-notion/operate/index.tsx +++ b/web/app/components/header/account-setting/data-source-page/data-source-notion/operate/index.tsx @@ -6,17 +6,19 @@ import { EllipsisHorizontalIcon } from '@heroicons/react/24/solid' import { Menu, Transition } from '@headlessui/react' import { syncDataSourceNotion, updateDataSourceNotionAction } from '@/service/common' import Toast from '@/app/components/base/toast' -import type { DataSourceNotion } from '@/models/common' import { FilePlus02 } from '@/app/components/base/icons/src/vender/line/files' import { RefreshCw05 } from '@/app/components/base/icons/src/vender/line/arrows' import { Trash03 } from '@/app/components/base/icons/src/vender/line/general' type OperateProps = { - workspace: DataSourceNotion + payload: { + id: string + total: number + } onAuthAgain: () => void } export default function Operate({ - workspace, + payload, onAuthAgain, }: OperateProps) { const itemClassName = ` @@ -37,11 +39,11 @@ export default function Operate({ mutate({ url: 'data-source/integrates' }) } const handleSync = async () => { - await syncDataSourceNotion({ url: `/oauth/data-source/notion/${workspace.id}/sync` }) + await syncDataSourceNotion({ url: `/oauth/data-source/notion/${payload.id}/sync` }) updateIntegrates() } const handleRemove = async () => { - await updateDataSourceNotionAction({ url: `/data-source/integrates/${workspace.id}/disable` }) + await updateDataSourceNotionAction({ url: `/data-source/integrates/${payload.id}/disable` }) updateIntegrates() } @@ -79,7 +81,7 @@ export default function Operate({
{t('common.dataSource.notion.changeAuthorizedPages')}
- {workspace.source_info.total} {t('common.dataSource.notion.pagesAuthorized')} + {payload.total} {t('common.dataSource.notion.pagesAuthorized')}
diff --git a/web/app/components/header/account-setting/data-source-page/data-source-website/config-firecrawl-modal.tsx b/web/app/components/header/account-setting/data-source-page/data-source-website/config-firecrawl-modal.tsx new file mode 100644 index 000000000..21277c8ec --- /dev/null +++ b/web/app/components/header/account-setting/data-source-page/data-source-website/config-firecrawl-modal.tsx @@ -0,0 +1,163 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback, useState } from 'react' +import { useTranslation } from 'react-i18next' +import { + PortalToFollowElem, + PortalToFollowElemContent, +} from '@/app/components/base/portal-to-follow-elem' +import { Lock01 } from '@/app/components/base/icons/src/vender/solid/security' +import Button from '@/app/components/base/button' +import type { FirecrawlConfig } from '@/models/common' +import Field from '@/app/components/datasets/create/website/firecrawl/base/field' +import Toast from '@/app/components/base/toast' +import { createFirecrawlApiKey } from '@/service/datasets' +import { LinkExternal02 } from '@/app/components/base/icons/src/vender/line/general' +type Props = { + onCancel: () => void + onSaved: () => void +} + +const I18N_PREFIX = 'datasetCreation.firecrawl' + +const DEFAULT_BASE_URL = 'https://api.firecrawl.dev' + +const ConfigFirecrawlModal: FC = ({ + onCancel, + onSaved, +}) => { + const { t } = useTranslation() + const [isSaving, setIsSaving] = useState(false) + const [config, setConfig] = useState({ + api_key: '', + base_url: '', + }) + + const handleConfigChange = useCallback((key: string) => { + return (value: string | number) => { + setConfig(prev => ({ ...prev, [key]: value as string })) + } + }, []) + + const handleSave = useCallback(async () => { + if (isSaving) + return + let errorMsg = '' + if (config.base_url && !((config.base_url.startsWith('http://') || config.base_url.startsWith('https://')))) + errorMsg = t('common.errorMsg.urlError') + if (!errorMsg) { + if (!config.api_key) { + errorMsg = t('common.errorMsg.fieldRequired', { + field: 'API Key', + }) + } + else if (!config.api_key.startsWith('fc-')) { + errorMsg = t(`${I18N_PREFIX}.apiKeyFormatError`) + } + } + + if (errorMsg) { + Toast.notify({ + type: 'error', + message: errorMsg, + }) + return + } + const postData = { + category: 'website', + provider: 'firecrawl', + credentials: { + auth_type: 'bearer', + config: { + api_key: config.api_key, + base_url: config.base_url || DEFAULT_BASE_URL, + }, + }, + } + try { + setIsSaving(true) + await createFirecrawlApiKey(postData) + Toast.notify({ + type: 'success', + message: t('common.api.success'), + }) + } + finally { + setIsSaving(false) + } + + onSaved() + }, [config.api_key, config.base_url, onSaved, t, isSaving]) + + return ( + + +
+
+
+
+
{t(`${I18N_PREFIX}.configFirecrawl`)}
+
+ +
+ + +
+
+ + {t(`${I18N_PREFIX}.getApiKeyLinkText`)} + + +
+ + +
+ +
+
+
+
+ + {t('common.modelProvider.encrypted.front')} + + PKCS1_OAEP + + {t('common.modelProvider.encrypted.back')} +
+
+
+
+
+
+ ) +} +export default React.memo(ConfigFirecrawlModal) diff --git a/web/app/components/header/account-setting/data-source-page/data-source-website/index.tsx b/web/app/components/header/account-setting/data-source-page/data-source-website/index.tsx new file mode 100644 index 000000000..b6ac22436 --- /dev/null +++ b/web/app/components/header/account-setting/data-source-page/data-source-website/index.tsx @@ -0,0 +1,82 @@ +'use client' +import type { FC } from 'react' +import React, { useCallback, useEffect, useState } from 'react' +import { useTranslation } from 'react-i18next' +import { useBoolean } from 'ahooks' +import cn from 'classnames' +import Panel from '../panel' +import { DataSourceType } from '../panel/types' +import ConfigFirecrawlModal from './config-firecrawl-modal' +import { fetchFirecrawlApiKey, removeFirecrawlApiKey } from '@/service/datasets' + +import type { + DataSourceWebsiteItem, +} from '@/models/common' +import { useAppContext } from '@/context/app-context' + +import { + WebsiteProvider, +} from '@/models/common' +import Toast from '@/app/components/base/toast' + +type Props = {} + +const DataSourceWebsite: FC = () => { + const { t } = useTranslation() + const { isCurrentWorkspaceManager } = useAppContext() + const [list, setList] = useState([]) + const checkSetApiKey = useCallback(async () => { + const res = await fetchFirecrawlApiKey() as any + const list = res.settings.filter((item: DataSourceWebsiteItem) => item.provider === WebsiteProvider.fireCrawl && !item.disabled) + setList(list) + }, []) + + useEffect(() => { + checkSetApiKey() + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) + + const [isShowConfig, { + setTrue: showConfig, + setFalse: hideConfig, + }] = useBoolean(false) + + const handleAdded = useCallback(() => { + checkSetApiKey() + hideConfig() + }, [checkSetApiKey, hideConfig]) + + const handleRemove = useCallback(async () => { + await removeFirecrawlApiKey(list[0].id) + setList([]) + Toast.notify({ + type: 'success', + message: t('common.api.remove'), + }) + }, [list, t]) + + return ( + <> + 0} + onConfigure={showConfig} + readonly={!isCurrentWorkspaceManager} + configuredList={list.map(item => ({ + id: item.id, + logo: ({ className }: { className: string }) => ( +
🔥
+ ), + name: 'FireCrawl', + isActive: true, + }))} + onRemove={handleRemove} + /> + {isShowConfig && ( + + )} + + + ) +} +export default React.memo(DataSourceWebsite) diff --git a/web/app/components/header/account-setting/data-source-page/index.tsx b/web/app/components/header/account-setting/data-source-page/index.tsx index 761d9cbfe..ede83152b 100644 --- a/web/app/components/header/account-setting/data-source-page/index.tsx +++ b/web/app/components/header/account-setting/data-source-page/index.tsx @@ -1,6 +1,7 @@ import useSWR from 'swr' import { useTranslation } from 'react-i18next' import DataSourceNotion from './data-source-notion' +import DataSourceWebsite from './data-source-website' import { fetchDataSource } from '@/service/common' export default function DataSourcePage() { @@ -12,6 +13,7 @@ export default function DataSourcePage() {
{t('common.dataSource.add')}
+
) } diff --git a/web/app/components/header/account-setting/data-source-page/panel/config-item.tsx b/web/app/components/header/account-setting/data-source-page/panel/config-item.tsx new file mode 100644 index 000000000..376c4aea7 --- /dev/null +++ b/web/app/components/header/account-setting/data-source-page/panel/config-item.tsx @@ -0,0 +1,78 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import cn from 'classnames' +import Indicator from '../../../indicator' +import Operate from '../data-source-notion/operate' +import { DataSourceType } from './types' +import s from './style.module.css' +import { Trash03 } from '@/app/components/base/icons/src/vender/line/general' + +export type ConfigItemType = { + id: string + logo: any + name: string + isActive: boolean + notionConfig?: { + total: number + } +} + +type Props = { + type: DataSourceType + payload: ConfigItemType + onRemove: () => void + notionActions?: { + onChangeAuthorizedPage: () => void + } +} + +const ConfigItem: FC = ({ + type, + payload, + onRemove, + notionActions, +}) => { + const { t } = useTranslation() + const isNotion = type === DataSourceType.notion + const isWebsite = type === DataSourceType.website + const onChangeAuthorizedPage = notionActions?.onChangeAuthorizedPage || function () { } + + return ( +
+ +
{payload.name}
+ { + payload.isActive + ? + : + } +
+ { + payload.isActive + ? t(isNotion ? 'common.dataSource.notion.connected' : 'common.dataSource.website.active') + : t(isNotion ? 'common.dataSource.notion.disconnected' : 'common.dataSource.website.inactive') + } +
+
+ {isNotion && ( + + )} + + { + isWebsite && ( +
+ +
+ ) + } + +
+ ) +} +export default React.memo(ConfigItem) diff --git a/web/app/components/header/account-setting/data-source-page/panel/index.tsx b/web/app/components/header/account-setting/data-source-page/panel/index.tsx new file mode 100644 index 000000000..b0f6f4ad1 --- /dev/null +++ b/web/app/components/header/account-setting/data-source-page/panel/index.tsx @@ -0,0 +1,138 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import { PlusIcon } from '@heroicons/react/24/solid' +import cn from 'classnames' +import type { ConfigItemType } from './config-item' +import ConfigItem from './config-item' + +import s from './style.module.css' +import { DataSourceType } from './types' + +type Props = { + type: DataSourceType + isConfigured: boolean + onConfigure: () => void + readonly: boolean + isSupportList?: boolean + configuredList: ConfigItemType[] + onRemove: () => void + notionActions?: { + onChangeAuthorizedPage: () => void + } +} + +const Panel: FC = ({ + type, + isConfigured, + onConfigure, + readonly, + configuredList, + isSupportList, + onRemove, + notionActions, +}) => { + const { t } = useTranslation() + const isNotion = type === DataSourceType.notion + const isWebsite = type === DataSourceType.website + + return ( +
+
+
+
+
+
{t(`common.dataSource.${type}.title`)}
+ {isWebsite && ( +
+ {t('common.dataSource.website.with')} 🔥 FireCrawl +
+ )} +
+ { + !isConfigured && ( +
+ {t(`common.dataSource.${type}.description`)} +
+ ) + } +
+ {isNotion && ( + <> + { + isConfigured + ? ( +
+ {t('common.dataSource.configure')} +
+ ) + : ( + <> + {isSupportList &&
+ + {t('common.dataSource.notion.addWorkspace')} +
} + + ) + } + + )} + + {isWebsite && !isConfigured && ( +
+ {t('common.dataSource.configure')} +
+ )} + +
+ { + isConfigured && ( +
+
+ {isNotion ? t('common.dataSource.notion.connectedWorkspace') : t('common.dataSource.website.configuredCrawlers')} +
+
+
+ ) + } + { + isConfigured && ( +
+ { + configuredList.map(item => ( + + )) + } +
+ ) + } +
+ ) +} +export default React.memo(Panel) diff --git a/web/app/components/header/account-setting/data-source-page/data-source-notion/style.module.css b/web/app/components/header/account-setting/data-source-page/panel/style.module.css similarity index 63% rename from web/app/components/header/account-setting/data-source-page/data-source-notion/style.module.css rename to web/app/components/header/account-setting/data-source-page/panel/style.module.css index ede323072..a11d4758f 100644 --- a/web/app/components/header/account-setting/data-source-page/data-source-notion/style.module.css +++ b/web/app/components/header/account-setting/data-source-page/panel/style.module.css @@ -3,6 +3,11 @@ background-size: 20px 20px; } +.website-icon { + background: #ffffff url(../../../../datasets/create/assets/web.svg) center center no-repeat; + background-size: 20px 20px; +} + .workspace-item { box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05); } diff --git a/web/app/components/header/account-setting/data-source-page/panel/types.ts b/web/app/components/header/account-setting/data-source-page/panel/types.ts new file mode 100644 index 000000000..345bc10f8 --- /dev/null +++ b/web/app/components/header/account-setting/data-source-page/panel/types.ts @@ -0,0 +1,4 @@ +export enum DataSourceType { + notion = 'notion', + website = 'website', +} diff --git a/web/i18n/en-US/common.ts b/web/i18n/en-US/common.ts index d978329eb..299edc480 100644 --- a/web/i18n/en-US/common.ts +++ b/web/i18n/en-US/common.ts @@ -37,6 +37,10 @@ const translation = { duplicate: 'Duplicate', rename: 'Rename', }, + errorMsg: { + fieldRequired: '{{field}} is required', + urlError: 'url should start with http:// or https://', + }, placeholder: { input: 'Please enter', select: 'Please select', @@ -360,6 +364,7 @@ const translation = { dataSource: { add: 'Add a data source', connect: 'Connect', + configure: 'Configure', notion: { title: 'Notion', description: 'Using Notion as a data source for the Knowledge.', @@ -379,6 +384,14 @@ const translation = { preview: 'PREVIEW', }, }, + website: { + title: 'Website', + description: 'Import content from websites using web crawler.', + with: 'With', + configuredCrawlers: 'Configured crawlers', + active: 'Active', + inactive: 'Inactive', + }, }, plugin: { serpapi: { diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts index c315b4c4a..ff884f1c1 100644 --- a/web/i18n/en-US/dataset-creation.ts +++ b/web/i18n/en-US/dataset-creation.ts @@ -11,6 +11,12 @@ const translation = { error: { unavailable: 'This Knowledge is not available', }, + firecrawl: { + configFirecrawl: 'Configure 🔥Firecrawl', + apiKeyPlaceholder: 'API key from firecrawl.dev, starting with "fc-"', + apiKeyFormatError: 'API key should start with "fc-"', + getApiKeyLinkText: 'Get your API key from firecrawl.dev', + }, stepOne: { filePreview: 'File Preview', pagePreview: 'Page Preview', @@ -50,6 +56,30 @@ const translation = { confirmButton: 'Create', failed: 'Creation failed', }, + website: { + fireCrawlNotConfigured: 'Firecrawl is not configured', + fireCrawlNotConfiguredDescription: 'Configure Firecrawl with API key to use it.', + configure: 'Configure', + run: 'Run', + firecrawlTitle: 'Extract web content with 🔥Firecrawl', + firecrawlDoc: 'Firecrawl docs', + firecrawlDocLink: 'https://docs.dify.ai/guides/knowledge-base/sync_from_website', + options: 'Options', + crawlSubPage: 'Crawl sub-pages', + limit: 'Limit', + maxDepth: 'Max depth', + excludePaths: 'Exclude paths', + includeOnlyPaths: 'Include only paths', + extractOnlyMainContent: 'Extract only main content (no headers, navs, footers, etc.)', + exceptionErrorTitle: 'An exception occurred while running Firecrawl job:', + unknownError: 'Unknown error', + totalPageScraped: 'Total pages scraped:', + selectAll: 'Select All', + resetAll: 'Reset All', + scrapTimeInfo: 'Scraped {{total}} pages in total within {{time}}s', + preview: 'Preview', + maxDepthTooltip: 'Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.', + }, }, stepTwo: { segmentation: 'Chunk settings', @@ -86,9 +116,11 @@ const translation = { calculating: 'Calculating...', fileSource: 'Preprocess documents', notionSource: 'Preprocess pages', + websiteSource: 'Preprocess website', other: 'and other ', fileUnit: ' files', notionUnit: ' pages', + webpageUnit: ' pages', previousStep: 'Previous step', nextStep: 'Save & Process', save: 'Save & Process', diff --git a/web/i18n/en-US/dataset-documents.ts b/web/i18n/en-US/dataset-documents.ts index b43196532..8988b9dc1 100644 --- a/web/i18n/en-US/dataset-documents.ts +++ b/web/i18n/en-US/dataset-documents.ts @@ -2,8 +2,9 @@ const translation = { list: { title: 'Documents', desc: 'All files of the Knowledge are shown here, and the entire Knowledge can be linked to Dify citations or indexed via the Chat plugin.', - addFile: 'add file', + addFile: 'Add file', addPages: 'Add Pages', + addUrl: 'Add URL', table: { header: { fileName: 'FILE NAME', diff --git a/web/i18n/zh-Hans/common.ts b/web/i18n/zh-Hans/common.ts index a819f9eeb..03eb111db 100644 --- a/web/i18n/zh-Hans/common.ts +++ b/web/i18n/zh-Hans/common.ts @@ -37,6 +37,10 @@ const translation = { duplicate: '复制', rename: '重命名', }, + errorMsg: { + fieldRequired: '{{field}} 为必填项', + urlError: 'url 应该以 http:// 或 https:// 开头', + }, placeholder: { input: '请输入', select: '请选择', @@ -356,6 +360,7 @@ const translation = { dataSource: { add: '添加数据源', connect: '绑定', + configure: '配置', notion: { title: 'Notion', description: '使用 Notion 作为知识库的数据源。', @@ -375,6 +380,14 @@ const translation = { preview: '预览', }, }, + website: { + title: '网站', + description: '使用网络爬虫从网站导入内容。', + with: '使用', + configuredCrawlers: '已配置的爬虫', + active: '可用', + inactive: '不可用', + }, }, plugin: { serpapi: { diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts index d36850dc3..27a070535 100644 --- a/web/i18n/zh-Hans/dataset-creation.ts +++ b/web/i18n/zh-Hans/dataset-creation.ts @@ -11,6 +11,12 @@ const translation = { error: { unavailable: '该知识库不可用', }, + firecrawl: { + configFirecrawl: '配置 🔥Firecrawl', + apiKeyPlaceholder: '从 firecrawl.dev 获取 API Key,以 "fc-" 开头', + apiKeyFormatError: 'API Key 应以 "fc-" 开头', + getApiKeyLinkText: '从 firecrawl.dev 获取您的 API Key', + }, stepOne: { filePreview: '文件预览', pagePreview: '页面预览', @@ -50,6 +56,30 @@ const translation = { confirmButton: '创建', failed: '创建失败', }, + website: { + fireCrawlNotConfigured: 'Firecrawl 未配置', + fireCrawlNotConfiguredDescription: '请配置 Firecrawl 的 API 密钥以使用它。', + configure: '配置', + run: '运行', + firecrawlTitle: '使用 🔥Firecrawl 提取网页内容', + firecrawlDoc: 'Firecrawl 文档', + firecrawlDocLink: 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/sync_from_website', + options: '选项', + crawlSubPage: '爬取子页面', + limit: '限制数量', + maxDepth: '最大深度', + excludePaths: '排除路径', + includeOnlyPaths: '仅包含路径', + extractOnlyMainContent: '仅提取主要内容(无标题、导航、页脚等)', + exceptionErrorTitle: '运行 Firecrawl 时发生异常:', + unknownError: '未知错误', + totalPageScraped: '抓取页面总数:', + selectAll: '全选', + resetAll: '重置全部', + scrapTimeInfo: '总共在 {{time}}秒 内抓取了 {{total}} 个页面', + preview: '预览', + maxDepthTooltip: '最大抓取深度。深度 1 表示 Base URL,深度 2 表示 Base URL及其直接子页面,依此类推。', + }, }, stepTwo: { segmentation: '分段设置', @@ -86,9 +116,11 @@ const translation = { calculating: '计算中...', fileSource: '预处理文档', notionSource: '预处理页面', + websiteSource: '预处理页面', other: '和其他 ', fileUnit: ' 个文件', notionUnit: ' 个页面', + webpageUnit: ' 个页面', previousStep: '上一步', nextStep: '保存并处理', save: '保存并处理', diff --git a/web/i18n/zh-Hans/dataset-documents.ts b/web/i18n/zh-Hans/dataset-documents.ts index 9ea5e7aa0..2f68f04d1 100644 --- a/web/i18n/zh-Hans/dataset-documents.ts +++ b/web/i18n/zh-Hans/dataset-documents.ts @@ -4,6 +4,7 @@ const translation = { desc: '知识库的所有文件都在这里显示,整个知识库都可以链接到 Dify 引用或通过 Chat 插件进行索引。', addFile: '添加文件', addPages: '添加页面', + addUrl: '添加 URL', table: { header: { fileName: '文件名', diff --git a/web/models/common.ts b/web/models/common.ts index f4fe99bf9..730cfee05 100644 --- a/web/models/common.ts +++ b/web/models/common.ts @@ -172,6 +172,39 @@ export type DataSourceNotion = { source_info: DataSourceNotionWorkspace } +export enum DataSourceCategory { + website = 'website', +} +export enum WebsiteProvider { + fireCrawl = 'firecrawl', +} + +export type WebsiteCredentials = { + auth_type: 'bearer' + config: { + base_url: string + api_key: string + } +} + +export type FirecrawlConfig = { + api_key: string + base_url: string +} + +export type DataSourceWebsiteItem = { + id: string + category: DataSourceCategory.website + provider: WebsiteProvider + credentials: WebsiteCredentials + disabled: boolean + created_at: number + updated_at: number +} +export type DataSourceWebsite = { + settings: DataSourceWebsiteItem[] +} + export type GithubRepo = { stargazers_count: number } diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 1f02a4318..a28798ba6 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -5,7 +5,7 @@ import type { Tag } from '@/app/components/base/tag-management/constant' export enum DataSourceType { FILE = 'upload_file', NOTION = 'notion_import', - WEB = 'web_import', + WEB = 'website_crawl', } export type DataSet = { @@ -39,6 +39,22 @@ export type CustomFile = File & { created_at?: number } +export type CrawlOptions = { + crawl_sub_pages: boolean + only_main_content: boolean + includes: string + excludes: string + limit: number | string + max_depth: number | string +} + +export type CrawlResultItem = { + title: string + markdown: string + description: string + source_url: string +} + export type FileItem = { fileID: string file: CustomFile @@ -149,6 +165,8 @@ export type DataSourceInfo = { extension: string } notion_page_icon?: string + job_id: string + url: string } export type InitialDocumentDetail = { @@ -219,6 +237,11 @@ export type DataSource = { file_info_list?: { file_ids: string[] } + website_info_list?: { + provider: string + job_id: string + urls: string[] + } } } diff --git a/web/service/datasets.ts b/web/service/datasets.ts index 302b16f6f..a382ee8ec 100644 --- a/web/service/datasets.ts +++ b/web/service/datasets.ts @@ -152,6 +152,10 @@ export const syncDocument: Fetcher = ({ datasetId, return get(`/datasets/${datasetId}/documents/${documentId}/notion/sync`) } +export const syncWebsite: Fetcher = ({ datasetId, documentId }) => { + return get(`/datasets/${datasetId}/documents/${documentId}/website-sync`) +} + export const preImportNotionPages: Fetcher<{ notion_info: DataSourceNotionWorkspace[] }, { url: string; datasetId?: string }> = ({ url, datasetId }) => { return get<{ notion_info: DataSourceNotionWorkspace[] }>(url, { params: { dataset_id: datasetId } }) } @@ -227,6 +231,37 @@ export const fetchDatasetApiBaseUrl: Fetcher<{ api_base_url: string }, string> = return get<{ api_base_url: string }>(url) } +export const fetchFirecrawlApiKey = () => { + return get('api-key-auth/data-source') +} + +export const createFirecrawlApiKey: Fetcher> = (body) => { + return post('api-key-auth/data-source/binding', { body }) +} + +export const removeFirecrawlApiKey: Fetcher = (id: string) => { + return del(`api-key-auth/data-source/${id}`) +} + +export const createFirecrawlTask: Fetcher> = (body) => { + return post('website/crawl', { + body: { + ...body, + provider: 'firecrawl', + }, + }) +} + +export const checkFirecrawlTaskStatus: Fetcher = (jobId: string) => { + return get(`website/crawl/status/${jobId}`, { + params: { + provider: 'firecrawl', + }, + }, { + silent: true, + }) +} + type FileTypesRes = { allowed_extensions: string[] }