mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-09 02:46:52 +08:00
Fix/create document by api with metadata (#16307)
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
This commit is contained in:
@@ -47,44 +47,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
<Property name='text' type='string' key='text'>
|
||||
Document content
|
||||
</Property>
|
||||
<Property name='doc_type' type='string' key='doc_type'>
|
||||
Type of document (optional):
|
||||
- <code>book</code> Book
|
||||
- <code>web_page</code> Web page
|
||||
- <code>paper</code> Academic paper/article
|
||||
- <code>social_media_post</code> Social media post
|
||||
- <code>wikipedia_entry</code> Wikipedia entry
|
||||
- <code>personal_document</code> Personal document
|
||||
- <code>business_document</code> Business document
|
||||
- <code>im_chat_log</code> Chat log
|
||||
- <code>synced_from_notion</code> Notion document
|
||||
- <code>synced_from_github</code> GitHub document
|
||||
- <code>others</code> Other document types
|
||||
</Property>
|
||||
<Property name='doc_metadata' type='object' key='doc_metadata'>
|
||||
Document metadata (required if doc_type is provided). Fields vary by doc_type:
|
||||
For <code>book</code>:
|
||||
- <code>title</code> Book title
|
||||
- <code>language</code> Book language
|
||||
- <code>author</code> Book author
|
||||
- <code>publisher</code> Publisher name
|
||||
- <code>publication_date</code> Publication date
|
||||
- <code>isbn</code> ISBN number
|
||||
- <code>category</code> Book category
|
||||
|
||||
For <code>web_page</code>:
|
||||
- <code>title</code> Page title
|
||||
- <code>url</code> Page URL
|
||||
- <code>language</code> Page language
|
||||
- <code>publish_date</code> Publish date
|
||||
- <code>author/publisher</code> Author or publisher
|
||||
- <code>topic/keywords</code> Topic or keywords
|
||||
- <code>description</code> Page description
|
||||
|
||||
Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
|
||||
|
||||
For doc_type "others", any valid JSON object is accepted
|
||||
</Property>
|
||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||
Index mode
|
||||
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
|
||||
@@ -233,68 +195,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
- <code>hierarchical_model</code> Parent-child mode
|
||||
- <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
|
||||
|
||||
- <code>doc_type</code> Type of document (optional)
|
||||
- <code>book</code> Book
|
||||
Document records a book or publication
|
||||
- <code>web_page</code> Web page
|
||||
Document records web page content
|
||||
- <code>paper</code> Academic paper/article
|
||||
Document records academic paper or research article
|
||||
- <code>social_media_post</code> Social media post
|
||||
Content from social media posts
|
||||
- <code>wikipedia_entry</code> Wikipedia entry
|
||||
Content from Wikipedia entries
|
||||
- <code>personal_document</code> Personal document
|
||||
Documents related to personal content
|
||||
- <code>business_document</code> Business document
|
||||
Documents related to business content
|
||||
- <code>im_chat_log</code> Chat log
|
||||
Records of instant messaging chats
|
||||
- <code>synced_from_notion</code> Notion document
|
||||
Documents synchronized from Notion
|
||||
- <code>synced_from_github</code> GitHub document
|
||||
Documents synchronized from GitHub
|
||||
- <code>others</code> Other document types
|
||||
Other document types not listed above
|
||||
|
||||
- <code>doc_metadata</code> Document metadata (required if doc_type is provided)
|
||||
Fields vary by doc_type:
|
||||
|
||||
For <code>book</code>:
|
||||
- <code>title</code> Book title
|
||||
Title of the book
|
||||
- <code>language</code> Book language
|
||||
Language of the book
|
||||
- <code>author</code> Book author
|
||||
Author of the book
|
||||
- <code>publisher</code> Publisher name
|
||||
Name of the publishing house
|
||||
- <code>publication_date</code> Publication date
|
||||
Date when the book was published
|
||||
- <code>isbn</code> ISBN number
|
||||
International Standard Book Number
|
||||
- <code>category</code> Book category
|
||||
Category or genre of the book
|
||||
|
||||
For <code>web_page</code>:
|
||||
- <code>title</code> Page title
|
||||
Title of the web page
|
||||
- <code>url</code> Page URL
|
||||
URL address of the web page
|
||||
- <code>language</code> Page language
|
||||
Language of the web page
|
||||
- <code>publish_date</code> Publish date
|
||||
Date when the web page was published
|
||||
- <code>author/publisher</code> Author or publisher
|
||||
Author or publisher of the web page
|
||||
- <code>topic/keywords</code> Topic or keywords
|
||||
Topics or keywords of the web page
|
||||
- <code>description</code> Page description
|
||||
Description of the web page content
|
||||
|
||||
Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
|
||||
For doc_type "others", any valid JSON object is accepted
|
||||
|
||||
- <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
|
||||
|
||||
- <code>process_rule</code> Processing rules
|
||||
@@ -407,44 +307,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
<Property name='description' type='string' key='description'>
|
||||
Knowledge description (optional)
|
||||
</Property>
|
||||
<Property name='doc_type' type='string' key='doc_type'>
|
||||
Type of document (optional):
|
||||
- <code>book</code> Book
|
||||
- <code>web_page</code> Web page
|
||||
- <code>paper</code> Academic paper/article
|
||||
- <code>social_media_post</code> Social media post
|
||||
- <code>wikipedia_entry</code> Wikipedia entry
|
||||
- <code>personal_document</code> Personal document
|
||||
- <code>business_document</code> Business document
|
||||
- <code>im_chat_log</code> Chat log
|
||||
- <code>synced_from_notion</code> Notion document
|
||||
- <code>synced_from_github</code> GitHub document
|
||||
- <code>others</code> Other document types
|
||||
</Property>
|
||||
<Property name='doc_metadata' type='object' key='doc_metadata'>
|
||||
Document metadata (required if doc_type is provided). Fields vary by doc_type:
|
||||
For <code>book</code>:
|
||||
- <code>title</code> Book title
|
||||
- <code>language</code> Book language
|
||||
- <code>author</code> Book author
|
||||
- <code>publisher</code> Publisher name
|
||||
- <code>publication_date</code> Publication date
|
||||
- <code>isbn</code> ISBN number
|
||||
- <code>category</code> Book category
|
||||
|
||||
For <code>web_page</code>:
|
||||
- <code>title</code> Page title
|
||||
- <code>url</code> Page URL
|
||||
- <code>language</code> Page language
|
||||
- <code>publish_date</code> Publish date
|
||||
- <code>author/publisher</code> Author or publisher
|
||||
- <code>topic/keywords</code> Topic or keywords
|
||||
- <code>description</code> Page description
|
||||
|
||||
Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
|
||||
|
||||
For doc_type "others", any valid JSON object is accepted
|
||||
</Property>
|
||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||
Index technique (optional)
|
||||
- <code>high_quality</code> High quality
|
||||
@@ -762,67 +624,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
- <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
|
||||
- <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
|
||||
- <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
|
||||
- <code>doc_type</code> Type of document (optional)
|
||||
- <code>book</code> Book
|
||||
Document records a book or publication
|
||||
- <code>web_page</code> Web page
|
||||
Document records web page content
|
||||
- <code>paper</code> Academic paper/article
|
||||
Document records academic paper or research article
|
||||
- <code>social_media_post</code> Social media post
|
||||
Content from social media posts
|
||||
- <code>wikipedia_entry</code> Wikipedia entry
|
||||
Content from Wikipedia entries
|
||||
- <code>personal_document</code> Personal document
|
||||
Documents related to personal content
|
||||
- <code>business_document</code> Business document
|
||||
Documents related to business content
|
||||
- <code>im_chat_log</code> Chat log
|
||||
Records of instant messaging chats
|
||||
- <code>synced_from_notion</code> Notion document
|
||||
Documents synchronized from Notion
|
||||
- <code>synced_from_github</code> GitHub document
|
||||
Documents synchronized from GitHub
|
||||
- <code>others</code> Other document types
|
||||
Other document types not listed above
|
||||
|
||||
- <code>doc_metadata</code> Document metadata (required if doc_type is provided)
|
||||
Fields vary by doc_type:
|
||||
|
||||
For <code>book</code>:
|
||||
- <code>title</code> Book title
|
||||
Title of the book
|
||||
- <code>language</code> Book language
|
||||
Language of the book
|
||||
- <code>author</code> Book author
|
||||
Author of the book
|
||||
- <code>publisher</code> Publisher name
|
||||
Name of the publishing house
|
||||
- <code>publication_date</code> Publication date
|
||||
Date when the book was published
|
||||
- <code>isbn</code> ISBN number
|
||||
International Standard Book Number
|
||||
- <code>category</code> Book category
|
||||
Category or genre of the book
|
||||
|
||||
For <code>web_page</code>:
|
||||
- <code>title</code> Page title
|
||||
Title of the web page
|
||||
- <code>url</code> Page URL
|
||||
URL address of the web page
|
||||
- <code>language</code> Page language
|
||||
Language of the web page
|
||||
- <code>publish_date</code> Publish date
|
||||
Date when the web page was published
|
||||
- <code>author/publisher</code> Author or publisher
|
||||
Author or publisher of the web page
|
||||
- <code>topic/keywords</code> Topic or keywords
|
||||
Topics or keywords of the web page
|
||||
- <code>description</code> Page description
|
||||
Description of the web page content
|
||||
|
||||
Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
|
||||
For doc_type "others", any valid JSON object is accepted
|
||||
</Property>
|
||||
</Properties>
|
||||
</Col>
|
||||
@@ -1528,7 +1329,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
"id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
|
||||
"data_source_type": "upload_file",
|
||||
"name": "readme.txt",
|
||||
"doc_type": null
|
||||
}
|
||||
},
|
||||
"score": 3.730463140527718e-05,
|
||||
|
||||
@@ -47,46 +47,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
<Property name='text' type='string' key='text'>
|
||||
文档内容
|
||||
</Property>
|
||||
<Property name='doc_type' type='string' key='doc_type'>
|
||||
文档类型(选填)
|
||||
- <code>book</code> 图书 Book
|
||||
- <code>web_page</code> 网页 Web page
|
||||
- <code>paper</code> 学术论文/文章 Academic paper/article
|
||||
- <code>social_media_post</code> 社交媒体帖子 Social media post
|
||||
- <code>wikipedia_entry</code> 维基百科条目 Wikipedia entry
|
||||
- <code>personal_document</code> 个人文档 Personal document
|
||||
- <code>business_document</code> 商业文档 Business document
|
||||
- <code>im_chat_log</code> 即时通讯记录 Chat log
|
||||
- <code>synced_from_notion</code> Notion同步文档 Notion document
|
||||
- <code>synced_from_github</code> GitHub同步文档 GitHub document
|
||||
- <code>others</code> 其他文档类型 Other document types
|
||||
</Property>
|
||||
<Property name='doc_metadata' type='object' key='doc_metadata'>
|
||||
|
||||
文档元数据(如提供文档类型则必填)。字段因文档类型而异:
|
||||
|
||||
针对图书 For <code>book</code>:
|
||||
- <code>title</code> 书名 Book title
|
||||
- <code>language</code> 图书语言 Book language
|
||||
- <code>author</code> 作者 Book author
|
||||
- <code>publisher</code> 出版社 Publisher name
|
||||
- <code>publication_date</code> 出版日期 Publication date
|
||||
- <code>isbn</code> ISBN号码 ISBN number
|
||||
- <code>category</code> 图书分类 Book category
|
||||
|
||||
针对网页 For <code>web_page</code>:
|
||||
- <code>title</code> 页面标题 Page title
|
||||
- <code>url</code> 页面网址 Page URL
|
||||
- <code>language</code> 页面语言 Page language
|
||||
- <code>publish_date</code> 发布日期 Publish date
|
||||
- <code>author/publisher</code> 作者/发布者 Author or publisher
|
||||
- <code>topic/keywords</code> 主题/关键词 Topic or keywords
|
||||
- <code>description</code> 页面描述 Page description
|
||||
|
||||
请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
|
||||
|
||||
针对"其他"类型文档,接受任何有效的JSON对象
|
||||
</Property>
|
||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||
索引方式
|
||||
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
|
||||
@@ -234,68 +194,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
- <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
|
||||
- <code>hierarchical_model</code> parent-child 模式
|
||||
- <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
|
||||
- <code>doc_type</code> 文档类型(选填)Type of document (optional)
|
||||
- <code>book</code> 图书
|
||||
文档记录一本书籍或出版物
|
||||
- <code>web_page</code> 网页
|
||||
网页内容的文档记录
|
||||
- <code>paper</code> 学术论文/文章
|
||||
学术论文或研究文章的记录
|
||||
- <code>social_media_post</code> 社交媒体帖子
|
||||
社交媒体上的帖子内容
|
||||
- <code>wikipedia_entry</code> 维基百科条目
|
||||
维基百科的词条内容
|
||||
- <code>personal_document</code> 个人文档
|
||||
个人相关的文档记录
|
||||
- <code>business_document</code> 商业文档
|
||||
商业相关的文档记录
|
||||
- <code>im_chat_log</code> 即时通讯记录
|
||||
即时通讯的聊天记录
|
||||
- <code>synced_from_notion</code> Notion同步文档
|
||||
从Notion同步的文档内容
|
||||
- <code>synced_from_github</code> GitHub同步文档
|
||||
从GitHub同步的文档内容
|
||||
- <code>others</code> 其他文档类型
|
||||
其他未列出的文档类型
|
||||
|
||||
- <code>doc_metadata</code> 文档元数据(如提供文档类型则必填
|
||||
字段因文档类型而异
|
||||
|
||||
针对图书类型 For <code>book</code>:
|
||||
- <code>title</code> 书名
|
||||
书籍的标题
|
||||
- <code>language</code> 图书语言
|
||||
书籍的语言
|
||||
- <code>author</code> 作者
|
||||
书籍的作者
|
||||
- <code>publisher</code> 出版社
|
||||
出版社的名称
|
||||
- <code>publication_date</code> 出版日期
|
||||
书籍的出版日期
|
||||
- <code>isbn</code> ISBN号码
|
||||
书籍的ISBN编号
|
||||
- <code>category</code> 图书分类
|
||||
书籍的分类类别
|
||||
|
||||
针对网页类型 For <code>web_page</code>:
|
||||
- <code>title</code> 页面标题
|
||||
网页的标题
|
||||
- <code>url</code> 页面网址
|
||||
网页的URL地址
|
||||
- <code>language</code> 页面语言
|
||||
网页的语言
|
||||
- <code>publish_date</code> 发布日期
|
||||
网页的发布日期
|
||||
- <code>author/publisher</code> 作者/发布者
|
||||
网页的作者或发布者
|
||||
- <code>topic/keywords</code> 主题/关键词
|
||||
网页的主题或关键词
|
||||
- <code>description</code> 页面描述
|
||||
网页的描述信息
|
||||
|
||||
请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
|
||||
|
||||
针对"其他"类型文档,接受任何有效的JSON对象
|
||||
|
||||
- <code>doc_language</code> 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
|
||||
|
||||
@@ -606,46 +504,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
<Property name='text' type='string' key='text'>
|
||||
文档内容(选填)
|
||||
</Property>
|
||||
<Property name='doc_type' type='string' key='doc_type'>
|
||||
文档类型(选填)
|
||||
- <code>book</code> 图书 Book
|
||||
- <code>web_page</code> 网页 Web page
|
||||
- <code>paper</code> 学术论文/文章 Academic paper/article
|
||||
- <code>social_media_post</code> 社交媒体帖子 Social media post
|
||||
- <code>wikipedia_entry</code> 维基百科条目 Wikipedia entry
|
||||
- <code>personal_document</code> 个人文档 Personal document
|
||||
- <code>business_document</code> 商业文档 Business document
|
||||
- <code>im_chat_log</code> 即时通讯记录 Chat log
|
||||
- <code>synced_from_notion</code> Notion同步文档 Notion document
|
||||
- <code>synced_from_github</code> GitHub同步文档 GitHub document
|
||||
- <code>others</code> 其他文档类型 Other document types
|
||||
</Property>
|
||||
<Property name='doc_metadata' type='object' key='doc_metadata'>
|
||||
|
||||
文档元数据(如提供文档类型则必填)。字段因文档类型而异:
|
||||
|
||||
针对图书 For <code>book</code>:
|
||||
- <code>title</code> 书名 Book title
|
||||
- <code>language</code> 图书语言 Book language
|
||||
- <code>author</code> 作者 Book author
|
||||
- <code>publisher</code> 出版社 Publisher name
|
||||
- <code>publication_date</code> 出版日期 Publication date
|
||||
- <code>isbn</code> ISBN号码 ISBN number
|
||||
- <code>category</code> 图书分类 Book category
|
||||
|
||||
针对网页 For <code>web_page</code>:
|
||||
- <code>title</code> 页面标题 Page title
|
||||
- <code>url</code> 页面网址 Page URL
|
||||
- <code>language</code> 页面语言 Page language
|
||||
- <code>publish_date</code> 发布日期 Publish date
|
||||
- <code>author/publisher</code> 作者/发布者 Author or publisher
|
||||
- <code>topic/keywords</code> 主题/关键词 Topic or keywords
|
||||
- <code>description</code> 页面描述 Page description
|
||||
|
||||
请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
|
||||
|
||||
针对"其他"类型文档,接受任何有效的JSON对象
|
||||
</Property>
|
||||
<Property name='process_rule' type='object' key='process_rule'>
|
||||
处理规则(选填)
|
||||
- <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
|
||||
@@ -766,68 +624,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
- <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
|
||||
- <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
|
||||
- <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
|
||||
- <code>doc_type</code> 文档类型(选填)Type of document (optional)
|
||||
- <code>book</code> 图书
|
||||
文档记录一本书籍或出版物
|
||||
- <code>web_page</code> 网页
|
||||
网页内容的文档记录
|
||||
- <code>paper</code> 学术论文/文章
|
||||
学术论文或研究文章的记录
|
||||
- <code>social_media_post</code> 社交媒体帖子
|
||||
社交媒体上的帖子内容
|
||||
- <code>wikipedia_entry</code> 维基百科条目
|
||||
维基百科的词条内容
|
||||
- <code>personal_document</code> 个人文档
|
||||
个人相关的文档记录
|
||||
- <code>business_document</code> 商业文档
|
||||
商业相关的文档记录
|
||||
- <code>im_chat_log</code> 即时通讯记录
|
||||
即时通讯的聊天记录
|
||||
- <code>synced_from_notion</code> Notion同步文档
|
||||
从Notion同步的文档内容
|
||||
- <code>synced_from_github</code> GitHub同步文档
|
||||
从GitHub同步的文档内容
|
||||
- <code>others</code> 其他文档类型
|
||||
其他未列出的文档类型
|
||||
|
||||
- <code>doc_metadata</code> 文档元数据(如提供文档类型则必填
|
||||
字段因文档类型而异
|
||||
|
||||
针对图书类型 For <code>book</code>:
|
||||
- <code>title</code> 书名
|
||||
书籍的标题
|
||||
- <code>language</code> 图书语言
|
||||
书籍的语言
|
||||
- <code>author</code> 作者
|
||||
书籍的作者
|
||||
- <code>publisher</code> 出版社
|
||||
出版社的名称
|
||||
- <code>publication_date</code> 出版日期
|
||||
书籍的出版日期
|
||||
- <code>isbn</code> ISBN号码
|
||||
书籍的ISBN编号
|
||||
- <code>category</code> 图书分类
|
||||
书籍的分类类别
|
||||
|
||||
针对网页类型 For <code>web_page</code>:
|
||||
- <code>title</code> 页面标题
|
||||
网页的标题
|
||||
- <code>url</code> 页面网址
|
||||
网页的URL地址
|
||||
- <code>language</code> 页面语言
|
||||
网页的语言
|
||||
- <code>publish_date</code> 发布日期
|
||||
网页的发布日期
|
||||
- <code>author/publisher</code> 作者/发布者
|
||||
网页的作者或发布者
|
||||
- <code>topic/keywords</code> 主题/关键词
|
||||
网页的主题或关键词
|
||||
- <code>description</code> 页面描述
|
||||
网页的描述信息
|
||||
|
||||
请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
|
||||
|
||||
针对"其他"类型文档,接受任何有效的JSON对象
|
||||
</Property>
|
||||
</Properties>
|
||||
</Col>
|
||||
@@ -1534,7 +1330,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
"id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
|
||||
"data_source_type": "upload_file",
|
||||
"name": "readme.txt",
|
||||
"doc_type": null
|
||||
}
|
||||
},
|
||||
"score": 3.730463140527718e-05,
|
||||
|
||||
Reference in New Issue
Block a user