mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-07 18:06:52 +08:00
Feat/parent child retrieval (#12086)
Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: AkaraChen <akarachen@outlook.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Warren Chen <warren.chen830@gmail.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: KVOJJJin <jzongcode@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: Charlie.Wei <luowei@cvte.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: huayaoyue6 <huayaoyue@163.com> Co-authored-by: kurokobo <kuro664@gmail.com> Co-authored-by: Matsuda <yiyth.fcb6@gmail.com> Co-authored-by: shirochan <s.yusuke0711@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: Huỳnh Gia Bôi <boihuynh147@gmail.com> Co-authored-by: Julian Huynh <julian.huynh@immersio.io> Co-authored-by: Hash Brown <hi@xzd.me> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Kazuki Takamatsu <kazuki.takamatsu@chowagiken.co.jp> Co-authored-by: Trey Dong <1346650911@qq.com> Co-authored-by: VoidIsVoid <343750470@qq.com> Co-authored-by: Gimling <huangjl@ruyi.ai> Co-authored-by: xiandan-erizo <xiandan.erizo@gmail.com> Co-authored-by: Muneyuki Noguchi <nogu.dev@gmail.com> Co-authored-by: zhaobingshuang <1475195565@qq.com> Co-authored-by: zhaobs <zhaobs@cailian.net> Co-authored-by: suzuki.sh <s2terminal@users.noreply.github.com> Co-authored-by: Yingchun Lai <laiyingchun@apache.org> Co-authored-by: huanshare <huanshare@live.com> Co-authored-by: huanshare <liuhuan101@longfor.com> Co-authored-by: orangeclk <orangeclk@users.noreply.github.com> Co-authored-by: 문정현 <120004247+JungHyunMoon@users.noreply.github.com> Co-authored-by: barabicu <kztk533@gmail.com> Co-authored-by: Wei Mingzhi <whistler_wmz@users.sf.net> Co-authored-by: Paul van Oorschot <20116814+pvoo@users.noreply.github.com> Co-authored-by: zkyTech <zhangkunyuan@hotmail.com> Co-authored-by: zhangkunyuan <zhangkunyuan@cmhi.chinamobile.com> Co-authored-by: Tommy <34446820+Asterovim@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Novice <857526207@qq.com> Co-authored-by: Novice Lee <novicelee@NovicedeMacBook-Pro.local> Co-authored-by: Novice Lee <novicelee@NoviPro.local> Co-authored-by: zxhlyh <16177003+zxhlyh@users.noreply.github.com> Co-authored-by: liuzhenghua <1090179900@qq.com> Co-authored-by: Jiang <65766008+AlwaysBluer@users.noreply.github.com> Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com> Co-authored-by: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Co-authored-by: Alok Shrivastwa <alok.shrivastwa@gmail.com> Co-authored-by: Alok Shrivastwa <Alok.Shrivastwa@microland.com> Co-authored-by: JasonVV <jasonwangiii@outlook.com> Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com> Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: NFish <douxc512@gmail.com> Co-authored-by: Junyan Qin <1010553892@qq.com> Co-authored-by: IWAI, Masaharu <iwaim.sub@gmail.com> Co-authored-by: IWAI, Masaharu <iwai_masaharu@funkit.co.jp> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: luckylhb90 <luckylhb90@gmail.com> Co-authored-by: hobo.l <hobo.l@binance.com> Co-authored-by: douxc <7553076+douxc@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import type { DataSourceNotionPage, DataSourceProvider } from './common'
|
||||
import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
|
||||
import type { Tag } from '@/app/components/base/tag-management/constant'
|
||||
import type { IndexingType } from '@/app/components/datasets/create/step-two'
|
||||
|
||||
export enum DataSourceType {
|
||||
FILE = 'upload_file',
|
||||
@@ -10,6 +11,12 @@ export enum DataSourceType {
|
||||
|
||||
export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
|
||||
|
||||
export enum ChunkingMode {
|
||||
'text' = 'text_model', // General text
|
||||
'qa' = 'qa_model', // General QA
|
||||
'parentChild' = 'hierarchical_model', // Parent-Child
|
||||
}
|
||||
|
||||
export type DataSet = {
|
||||
id: string
|
||||
name: string
|
||||
@@ -18,11 +25,12 @@ export type DataSet = {
|
||||
description: string
|
||||
permission: DatasetPermission
|
||||
data_source_type: DataSourceType
|
||||
indexing_technique: 'high_quality' | 'economy'
|
||||
indexing_technique: IndexingType
|
||||
created_by: string
|
||||
updated_by: string
|
||||
updated_at: number
|
||||
app_count: number
|
||||
doc_form: ChunkingMode
|
||||
document_count: number
|
||||
word_count: number
|
||||
provider: string
|
||||
@@ -95,6 +103,12 @@ export type CustomFile = File & {
|
||||
created_at?: number
|
||||
}
|
||||
|
||||
export type DocumentItem = {
|
||||
id: string
|
||||
name: string
|
||||
extension: string
|
||||
}
|
||||
|
||||
export type CrawlOptions = {
|
||||
crawl_sub_pages: boolean
|
||||
only_main_content: boolean
|
||||
@@ -144,7 +158,7 @@ export type IndexingEstimateResponse = {
|
||||
total_price: number
|
||||
currency: string
|
||||
total_segments: number
|
||||
preview: string[]
|
||||
preview: Array<{ content: string; child_chunks: string[] }>
|
||||
qa_preview?: QA[]
|
||||
}
|
||||
|
||||
@@ -170,7 +184,12 @@ export type IndexingStatusBatchResponse = {
|
||||
data: IndexingStatusResponse[]
|
||||
}
|
||||
|
||||
export type ProcessMode = 'automatic' | 'custom'
|
||||
export enum ProcessMode {
|
||||
general = 'custom',
|
||||
parentChild = 'hierarchical',
|
||||
}
|
||||
|
||||
export type ParentMode = 'full-doc' | 'paragraph'
|
||||
|
||||
export type ProcessRuleResponse = {
|
||||
mode: ProcessMode
|
||||
@@ -181,6 +200,8 @@ export type ProcessRuleResponse = {
|
||||
export type Rules = {
|
||||
pre_processing_rules: PreProcessingRule[]
|
||||
segmentation: Segmentation
|
||||
parent_mode: ParentMode
|
||||
subchunk_segmentation: Segmentation
|
||||
}
|
||||
|
||||
export type Limits = {
|
||||
@@ -195,7 +216,7 @@ export type PreProcessingRule = {
|
||||
export type Segmentation = {
|
||||
separator: string
|
||||
max_tokens: number
|
||||
chunk_overlap: number
|
||||
chunk_overlap?: number
|
||||
}
|
||||
|
||||
export const DocumentIndexingStatusList = [
|
||||
@@ -258,13 +279,14 @@ export type InitialDocumentDetail = {
|
||||
display_status: DocumentDisplayStatus
|
||||
completed_segments?: number
|
||||
total_segments?: number
|
||||
doc_form: 'text_model' | 'qa_model'
|
||||
doc_form: ChunkingMode
|
||||
doc_language: string
|
||||
}
|
||||
|
||||
export type SimpleDocumentDetail = InitialDocumentDetail & {
|
||||
enabled: boolean
|
||||
word_count: number
|
||||
is_qa: boolean // TODO waiting for backend to add this field
|
||||
error?: string | null
|
||||
archived: boolean
|
||||
updated_at: number
|
||||
@@ -289,7 +311,7 @@ export type DocumentListResponse = {
|
||||
export type DocumentReq = {
|
||||
original_document_id?: string
|
||||
indexing_technique?: string
|
||||
doc_form: 'text_model' | 'qa_model'
|
||||
doc_form: ChunkingMode
|
||||
doc_language: string
|
||||
process_rule: ProcessRule
|
||||
}
|
||||
@@ -331,7 +353,7 @@ export type NotionPage = {
|
||||
}
|
||||
|
||||
export type ProcessRule = {
|
||||
mode: string
|
||||
mode: ProcessMode
|
||||
rules: Rules
|
||||
}
|
||||
|
||||
@@ -341,6 +363,11 @@ export type createDocumentResponse = {
|
||||
documents: InitialDocumentDetail[]
|
||||
}
|
||||
|
||||
export type PrecessRule = {
|
||||
mode: ProcessMode
|
||||
rules: Rules
|
||||
}
|
||||
|
||||
export type FullDocumentDetail = SimpleDocumentDetail & {
|
||||
batch: string
|
||||
created_api_request_id: string
|
||||
@@ -363,6 +390,8 @@ export type FullDocumentDetail = SimpleDocumentDetail & {
|
||||
doc_type?: DocType | null | 'others'
|
||||
doc_metadata?: DocMetadata | null
|
||||
segment_count: number
|
||||
dataset_process_rule: PrecessRule
|
||||
document_process_rule: ProcessRule
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
@@ -399,12 +428,12 @@ export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
|
||||
export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
|
||||
|
||||
export type SegmentsQuery = {
|
||||
last_id?: string
|
||||
page?: string
|
||||
limit: number
|
||||
// status?: SegmentStatus
|
||||
hit_count_gte?: number
|
||||
keyword?: string
|
||||
enabled?: boolean
|
||||
enabled?: boolean | 'all'
|
||||
}
|
||||
|
||||
export type SegmentDetailModel = {
|
||||
@@ -429,6 +458,8 @@ export type SegmentDetailModel = {
|
||||
error: string | null
|
||||
stopped_at: number
|
||||
answer?: string
|
||||
child_chunks?: ChildChunkDetail[]
|
||||
updated_at: number
|
||||
}
|
||||
|
||||
export type SegmentsResponse = {
|
||||
@@ -436,6 +467,8 @@ export type SegmentsResponse = {
|
||||
has_more: boolean
|
||||
limit: number
|
||||
total: number
|
||||
total_pages: number
|
||||
page: number
|
||||
}
|
||||
|
||||
export type HitTestingRecord = {
|
||||
@@ -448,10 +481,18 @@ export type HitTestingRecord = {
|
||||
created_at: number
|
||||
}
|
||||
|
||||
export type HitTestingChildChunk = {
|
||||
id: string
|
||||
content: string
|
||||
position: number
|
||||
score: number
|
||||
}
|
||||
export type HitTesting = {
|
||||
segment: Segment
|
||||
content: Segment
|
||||
score: number
|
||||
tsne_position: TsnePosition
|
||||
child_chunks?: HitTestingChildChunk[] | null
|
||||
}
|
||||
|
||||
export type ExternalKnowledgeBaseHitTesting = {
|
||||
@@ -530,11 +571,7 @@ export type SegmentUpdater = {
|
||||
content: string
|
||||
answer?: string
|
||||
keywords?: string[]
|
||||
}
|
||||
|
||||
export enum DocForm {
|
||||
TEXT = 'text_model',
|
||||
QA = 'qa_model',
|
||||
regenerate_child_chunks?: boolean
|
||||
}
|
||||
|
||||
export type ErrorDocsResponse = {
|
||||
@@ -579,3 +616,49 @@ export const DEFAULT_WEIGHTED_SCORE = {
|
||||
keyword: 0.3,
|
||||
},
|
||||
}
|
||||
|
||||
export type ChildChunkType = 'automatic' | 'customized'
|
||||
|
||||
export type ChildChunkDetail = {
|
||||
id: string
|
||||
position: number
|
||||
segment_id: string
|
||||
content: string
|
||||
word_count: number
|
||||
created_at: number
|
||||
updated_at: number
|
||||
type: ChildChunkType
|
||||
}
|
||||
|
||||
export type ChildSegmentsResponse = {
|
||||
data: ChildChunkDetail[]
|
||||
total: number
|
||||
total_pages: number
|
||||
page: number
|
||||
limit: number
|
||||
}
|
||||
|
||||
export type UpdateDocumentParams = {
|
||||
datasetId: string
|
||||
documentId: string
|
||||
}
|
||||
|
||||
// Used in api url
|
||||
export enum DocumentActionType {
|
||||
enable = 'enable',
|
||||
disable = 'disable',
|
||||
archive = 'archive',
|
||||
unArchive = 'un_archive',
|
||||
delete = 'delete',
|
||||
}
|
||||
|
||||
export type UpdateDocumentBatchParams = {
|
||||
datasetId: string
|
||||
documentId?: string
|
||||
documentIds?: string[] | string
|
||||
}
|
||||
|
||||
export type BatchImportResponse = {
|
||||
job_id: string
|
||||
job_status: string
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user