Feat/parent child retrieval (#12086)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: AkaraChen <akarachen@outlook.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Warren Chen <warren.chen830@gmail.com>
Co-authored-by: crazywoola <427733928@qq.com>
Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com>
Co-authored-by: yihong <zouzou0208@gmail.com>
Co-authored-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: KVOJJJin <jzongcode@gmail.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com>
Co-authored-by: Charlie.Wei <luowei@cvte.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: huayaoyue6 <huayaoyue@163.com>
Co-authored-by: kurokobo <kuro664@gmail.com>
Co-authored-by: Matsuda <yiyth.fcb6@gmail.com>
Co-authored-by: shirochan <s.yusuke0711@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: Huỳnh Gia Bôi <boihuynh147@gmail.com>
Co-authored-by: Julian Huynh <julian.huynh@immersio.io>
Co-authored-by: Hash Brown <hi@xzd.me>
Co-authored-by: 非法操作 <hjlarry@163.com>
Co-authored-by: Kazuki Takamatsu <kazuki.takamatsu@chowagiken.co.jp>
Co-authored-by: Trey Dong <1346650911@qq.com>
Co-authored-by: VoidIsVoid <343750470@qq.com>
Co-authored-by: Gimling <huangjl@ruyi.ai>
Co-authored-by: xiandan-erizo <xiandan.erizo@gmail.com>
Co-authored-by: Muneyuki Noguchi <nogu.dev@gmail.com>
Co-authored-by: zhaobingshuang <1475195565@qq.com>
Co-authored-by: zhaobs <zhaobs@cailian.net>
Co-authored-by: suzuki.sh <s2terminal@users.noreply.github.com>
Co-authored-by: Yingchun Lai <laiyingchun@apache.org>
Co-authored-by: huanshare <huanshare@live.com>
Co-authored-by: huanshare <liuhuan101@longfor.com>
Co-authored-by: orangeclk <orangeclk@users.noreply.github.com>
Co-authored-by: 문정현 <120004247+JungHyunMoon@users.noreply.github.com>
Co-authored-by: barabicu <kztk533@gmail.com>
Co-authored-by: Wei Mingzhi <whistler_wmz@users.sf.net>
Co-authored-by: Paul van Oorschot <20116814+pvoo@users.noreply.github.com>
Co-authored-by: zkyTech <zhangkunyuan@hotmail.com>
Co-authored-by: zhangkunyuan <zhangkunyuan@cmhi.chinamobile.com>
Co-authored-by: Tommy <34446820+Asterovim@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Novice <857526207@qq.com>
Co-authored-by: Novice Lee <novicelee@NovicedeMacBook-Pro.local>
Co-authored-by: Novice Lee <novicelee@NoviPro.local>
Co-authored-by: zxhlyh <16177003+zxhlyh@users.noreply.github.com>
Co-authored-by: liuzhenghua <1090179900@qq.com>
Co-authored-by: Jiang <65766008+AlwaysBluer@users.noreply.github.com>
Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com>
Co-authored-by: Joe <79627742+ZhouhaoJiang@users.noreply.github.com>
Co-authored-by: Alok Shrivastwa <alok.shrivastwa@gmail.com>
Co-authored-by: Alok Shrivastwa <Alok.Shrivastwa@microland.com>
Co-authored-by: JasonVV <jasonwangiii@outlook.com>
Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com>
Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com>
Co-authored-by: NFish <douxc512@gmail.com>
Co-authored-by: Junyan Qin <1010553892@qq.com>
Co-authored-by: IWAI, Masaharu <iwaim.sub@gmail.com>
Co-authored-by: IWAI, Masaharu <iwai_masaharu@funkit.co.jp>
Co-authored-by: Bowen Liang <liangbowen@gf.com.cn>
Co-authored-by: luckylhb90 <luckylhb90@gmail.com>
Co-authored-by: hobo.l <hobo.l@binance.com>
Co-authored-by: douxc <7553076+douxc@users.noreply.github.com>
This commit is contained in:
Wu Tianwei
2024-12-25 18:17:15 +08:00
committed by GitHub
parent 39ace9bdee
commit 2b2263a349
216 changed files with 9066 additions and 3116 deletions

View File

@@ -14,24 +14,7 @@
border-radius: 6px;
overflow: hidden;
}
.sourceItem.error {
background: #FEE4E2;
}
.sourceItem.success {
background: #D1FADF;
}
.progressbar {
position: absolute;
top: 0;
left: 0;
height: 100%;
background-color: #B2CCFF;
}
.sourceItem .info {
display: flex;
align-items: center;
z-index: 1;
}
.sourceItem .info .name {
font-weight: 500;
font-size: 12px;
@@ -55,13 +38,6 @@
color: #05603A;
}
.cost {
@apply flex justify-between items-center text-xs text-gray-700;
}
.embeddingStatus {
@apply flex items-center justify-between text-gray-900 font-medium text-sm mr-2;
}
.commonIcon {
@apply w-3 h-3 mr-1 inline-block align-middle;
}
@@ -81,35 +57,33 @@
@apply text-xs font-medium;
}
.fileIcon {
@apply w-4 h-4 mr-1 bg-center bg-no-repeat;
.unknownFileIcon {
background-image: url(../assets/unknown.svg);
background-size: 16px;
}
.fileIcon.csv {
.csv {
background-image: url(../assets/csv.svg);
}
.fileIcon.docx {
.docx {
background-image: url(../assets/docx.svg);
}
.fileIcon.xlsx,
.fileIcon.xls {
.xlsx,
.xls {
background-image: url(../assets/xlsx.svg);
}
.fileIcon.pdf {
.pdf {
background-image: url(../assets/pdf.svg);
}
.fileIcon.html,
.fileIcon.htm {
.html,
.htm {
background-image: url(../assets/html.svg);
}
.fileIcon.md,
.fileIcon.markdown {
.md,
.markdown {
background-image: url(../assets/md.svg);
}
.fileIcon.txt {
.txt {
background-image: url(../assets/txt.svg);
}
.fileIcon.json {
.json {
background-image: url(../assets/json.svg);
}

View File

@@ -6,32 +6,44 @@ import { useTranslation } from 'react-i18next'
import { omit } from 'lodash-es'
import { ArrowRightIcon } from '@heroicons/react/24/solid'
import {
RiCheckboxCircleFill,
RiErrorWarningFill,
RiLoader2Fill,
RiTerminalBoxLine,
} from '@remixicon/react'
import s from './index.module.css'
import Image from 'next/image'
import { indexMethodIcon, retrievalIcon } from '../icons'
import { IndexingType } from '../step-two'
import DocumentFileIcon from '../../common/document-file-icon'
import cn from '@/utils/classnames'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
import Button from '@/app/components/base/button'
import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets'
import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets'
import { DataSourceType } from '@/models/datasets'
import { DataSourceType, ProcessMode } from '@/models/datasets'
import NotionIcon from '@/app/components/base/notion-icon'
import PriorityLabel from '@/app/components/billing/priority-label'
import { Plan } from '@/app/components/billing/type'
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
import { useProviderContext } from '@/context/provider-context'
import Tooltip from '@/app/components/base/tooltip'
import { sleep } from '@/utils'
import { RETRIEVE_METHOD } from '@/types/app'
import Tooltip from '@/app/components/base/tooltip'
type Props = {
datasetId: string
batchId: string
documents?: FullDocumentDetail[]
indexingType?: string
retrievalMethod?: string
}
const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => {
const RuleDetail: FC<{
sourceData?: ProcessRuleResponse
indexingType?: string
retrievalMethod?: string
}> = ({ sourceData, indexingType, retrievalMethod }) => {
const { t } = useTranslation()
const segmentationRuleMap = {
@@ -51,29 +63,47 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
return t('datasetCreation.stepTwo.removeStopwords')
}
const isNumber = (value: unknown) => {
return typeof value === 'number'
}
const getValue = useCallback((field: string) => {
let value: string | number | undefined = '-'
const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
? sourceData.rules.segmentation.max_tokens
: value
const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
? sourceData.rules.subchunk_segmentation.max_tokens
: value
switch (field) {
case 'mode':
value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string)
value = !sourceData?.mode
? value
: sourceData.mode === ProcessMode.general
? (t('datasetDocuments.embedding.custom') as string)
: `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph'
? t('dataset.parentMode.paragraph')
: t('dataset.parentMode.fullDoc')}`
break
case 'segmentLength':
value = sourceData?.rules?.segmentation?.max_tokens
value = !sourceData?.mode
? value
: sourceData.mode === ProcessMode.general
? maxTokens
: `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}`
break
default:
value = sourceData?.mode === 'automatic'
? (t('datasetDocuments.embedding.automatic') as string)
// eslint-disable-next-line array-callback-return
: sourceData?.rules?.pre_processing_rules?.map((rule) => {
if (rule.enabled)
return getRuleName(rule.id)
}).filter(Boolean).join(';')
value = !sourceData?.mode
? value
: sourceData?.rules?.pre_processing_rules?.filter(rule =>
rule.enabled).map(rule => getRuleName(rule.id)).join(',')
break
}
return value
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sourceData])
return <div className='flex flex-col pt-8 pb-10 first:mt-0'>
return <div className='flex flex-col gap-1'>
{Object.keys(segmentationRuleMap).map((field) => {
return <FieldInfo
key={field}
@@ -81,10 +111,43 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
displayedValue={String(getValue(field))}
/>
})}
<FieldInfo
label={t('datasetCreation.stepTwo.indexMode')}
displayedValue={t(`datasetCreation.stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`) as string}
valueIcon={
<Image
className='size-4'
src={
indexingType === IndexingType.ECONOMICAL
? indexMethodIcon.economical
: indexMethodIcon.high_quality
}
alt=''
/>
}
/>
<FieldInfo
label={t('datasetSettings.form.retrievalSetting.title')}
// displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
displayedValue={t(`dataset.retrieval.${indexingType === IndexingType.ECONOMICAL ? 'invertedIndex' : retrievalMethod}.title`) as string}
valueIcon={
<Image
className='size-4'
src={
retrievalMethod === RETRIEVE_METHOD.fullText
? retrievalIcon.fullText
: retrievalMethod === RETRIEVE_METHOD.hybrid
? retrievalIcon.hybrid
: retrievalIcon.vector
}
alt=''
/>
}
/>
</div>
}
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => {
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
const { t } = useTranslation()
const { enableBilling, plan } = useProviderContext()
@@ -127,6 +190,7 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
}
useEffect(() => {
setIsStopQuery(false)
startQueryStatus()
return () => {
stopQueryStatus()
@@ -146,6 +210,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
const navToDocumentList = () => {
router.push(`/datasets/${datasetId}/documents`)
}
const navToApiDocs = () => {
router.push('/datasets?category=api')
}
const isEmbedding = useMemo(() => {
return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
@@ -177,13 +244,17 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
return doc?.data_source_info.notion_page_icon
}
const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
const isSourceEmbedding = (detail: IndexingStatusResponse) =>
['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
return (
<>
<div className='h-5 flex items-center mb-5'>
<div className={s.embeddingStatus}>
{isEmbedding && t('datasetDocuments.embedding.processing')}
<div className="h-5 flex items-center mb-3">
<div className="flex items-center justify-between text-gray-900 font-medium text-sm mr-2">
{isEmbedding && <div className='flex items-center'>
<RiLoader2Fill className='size-4 mr-1 animate-spin' />
{t('datasetDocuments.embedding.processing')}
</div>}
{isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
</div>
</div>
@@ -200,69 +271,80 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
</div>
)
}
<div className={s.progressContainer}>
<div className="flex flex-col gap-0.5 pb-2">
{indexingStatusBatchDetail.map(indexingStatusDetail => (
<div key={indexingStatusDetail.id} className={cn(
s.sourceItem,
indexingStatusDetail.indexing_status === 'error' && s.error,
indexingStatusDetail.indexing_status === 'completed' && s.success,
'relative h-[26px] bg-components-progress-bar-bg rounded-md overflow-hidden',
indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
// indexingStatusDetail.indexing_status === 'completed' && 's.success',
)}>
{isSourceEmbedding(indexingStatusDetail) && (
<div className={s.progressbar} style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} />
<div className="absolute top-0 left-0 h-full min-w-0.5 bg-components-progress-bar-progress border-r-[2px] border-r-components-progress-bar-progress-highlight" style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} />
)}
<div className={`${s.info} grow`}>
<div className="flex gap-1 pl-[6px] pr-2 h-full items-center z-[1]">
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
<div className={cn(s.fileIcon, s[getFileType(getSourceName(indexingStatusDetail.id))])} />
// <div className={cn(
// 'shrink-0 marker:size-4 bg-center bg-no-repeat bg-contain',
// s[getFileType(getSourceName(indexingStatusDetail.id))] || s.unknownFileIcon,
// )} />
<DocumentFileIcon
className="shrink-0 size-4"
name={getSourceName(indexingStatusDetail.id)}
extension={getFileType(getSourceName(indexingStatusDetail.id))}
/>
)}
{getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
<NotionIcon
className='shrink-0 mr-1'
className='shrink-0'
type='page'
src={getIcon(indexingStatusDetail.id)}
/>
)}
<div className={`${s.name} truncate`} title={getSourceName(indexingStatusDetail.id)}>{getSourceName(indexingStatusDetail.id)}</div>
{
enableBilling && (
<PriorityLabel />
)
}
</div>
<div className='shrink-0'>
<div className="grow flex items-center gap-1 w-0" title={getSourceName(indexingStatusDetail.id)}>
<div className="text-xs truncate">
{getSourceName(indexingStatusDetail.id)}
</div>
{
enableBilling && (
<PriorityLabel className='ml-0' />
)
}
</div>
{isSourceEmbedding(indexingStatusDetail) && (
<div className={s.percent}>{`${getSourcePercent(indexingStatusDetail)}%`}</div>
<div className="shrink-0 text-xs">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
)}
{indexingStatusDetail.indexing_status === 'error' && indexingStatusDetail.error && (
{indexingStatusDetail.indexing_status === 'error' && (
<Tooltip
popupContent={(
<div className='max-w-[400px]'>
{indexingStatusDetail.error}
</div>
)}
popupClassName='px-4 py-[14px] max-w-60 text-sm leading-4 text-text-secondary border-[0.5px] border-components-panel-border rounded-xl'
offset={4}
popupContent={indexingStatusDetail.error}
>
<div className={cn(s.percent, s.error, 'flex items-center')}>
Error
<RiErrorWarningFill className='ml-1 w-4 h-4' />
</div>
<span>
<RiErrorWarningFill className='shrink-0 size-4 text-text-destructive' />
</span>
</Tooltip>
)}
{indexingStatusDetail.indexing_status === 'error' && !indexingStatusDetail.error && (
<div className={cn(s.percent, s.error, 'flex items-center')}>
Error
</div>
)}
{indexingStatusDetail.indexing_status === 'completed' && (
<div className={cn(s.percent, s.success)}>100%</div>
<RiCheckboxCircleFill className='shrink-0 size-4 text-text-success' />
)}
</div>
</div>
))}
</div>
<RuleDetail sourceData={ruleDetail} />
<div className='flex items-center gap-2 mt-10'>
<hr className="my-3 h-[1px] bg-divider-subtle border-0" />
<RuleDetail
sourceData={ruleDetail}
indexingType={indexingType}
retrievalMethod={retrievalMethod}
/>
<div className='flex items-center gap-2 my-10'>
<Button className='w-fit' onClick={navToApiDocs}>
<RiTerminalBoxLine className='size-4 mr-2' />
<span>Access the API</span>
</Button>
<Button className='w-fit' variant='primary' onClick={navToDocumentList}>
<span>{t('datasetCreation.stepThree.navTo')}</span>
<ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
<ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
</Button>
</div>
</>