feat(knowledge): 添加 MinerU组件以支持 PDF 文件处理

- 新增 RenderMinerU 组件,用于 PDF 文件的预处理和 Markdown 编辑
- 在 create.vue 中集成 RenderMinerU组件,实现预处理结果预览功能
- 更新公共样式以支持 Markdown 渲染
- 在 main.js 中注册 RMinerU组件
- 修改 package.json,添加 markdown-it 和 markdown-it-katex 依赖
- 更新 vue.config.js,添加对 mjs 文件的处理规则
This commit is contained in:
陈昱达
2025-04-11 16:26:27 +08:00
parent e9ee7fe0a2
commit d108379f41
8 changed files with 379 additions and 42 deletions

View File

@@ -25,6 +25,8 @@
"exif-js": "^2.3.0", "exif-js": "^2.3.0",
"file-saver": "^2.0.5", "file-saver": "^2.0.5",
"js-cookie": "2.2.0", "js-cookie": "2.2.0",
"markdown-it": "^14.1.0",
"markdown-it-katex": "^2.0.3",
"mavon-editor": "^2.9.1", "mavon-editor": "^2.9.1",
"node-gyp": "^8.0.0", "node-gyp": "^8.0.0",
"normalize.css": "7.0.0", "normalize.css": "7.0.0",

View File

@@ -40,3 +40,21 @@ export function datasetDelete(data) {
method: 'delete', method: 'delete',
}) })
} }
//minerU 获取bbox
export function minerUBbox(params) {
return request({
url:'http://192.168.8.165:7196/document/mineru/bbox_json',
// url: getUrl('/document/mineru/bbox_json'),
method: 'get',
params:params
})
}
//minerU 获取markdown
export function minerUMarkDown(params) {
return `http://192.168.8.165:7196/document/mineru/md?documentId=${params.documentId}`
// url: getUrl('/document/mineru/md'),
}

View File

@@ -142,6 +142,9 @@
.lineH35 { .lineH35 {
line-height: 35px; line-height: 35px;
} }
.lineH25 {
line-height: 25px;
}
.lineH40 { .lineH40 {
line-height: 40px; line-height: 40px;
} }
@@ -258,3 +261,90 @@ body .el-collapse-item__wrap {
.cursor-pointer { .cursor-pointer {
cursor: pointer; cursor: pointer;
} }
.view-body {
text-align: left;
font-size: 14px;
div{
outline: unset;
}
/* 添加一些基本的样式以美化 Markdown 内容 */
p {
font-size: 16px;
line-height: 1.6;
margin-bottom: 16px;
}
h1, h2, h3, h4, h5, h6 {
margin-top: 24px;
margin-bottom: 12px;
border-bottom: 1px dashed #cecece;
padding:5px;
}
ul, ol {
margin: 16px 0;
padding-left: 32px;
}
li {
margin-bottom: 8px;
}
a {
color: #007BFF;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
code {
background-color: #f1f1f1;
padding: 4px 8px;
border-radius: 4px;
font-family: 'Courier New', Courier, monospace;
}
img {
max-width: 100%;
}
pre {
background-color: #f1f1f1;
padding: 16px;
border-radius: 4px;
overflow-x: auto;
font-family: 'Courier New', Courier, monospace;
}
pre code {
background-color: transparent;
padding: 0;
}
table {
border: 1px solid #f9f9f9;
width: 100%;
border-collapse: collapse;
background: linear-gradient(to bottom, #ffffff, #f9f9f9);
}
th, td {
border: 1px solid #ccc;
padding: 8px;
text-align: left;
}
th {
background-color: #f2f2f2;
}
// 添加斑马条纹效果
tr:nth-child(even) {
background-color: #f9fafc;
}
}

View File

@@ -0,0 +1,173 @@
<template>
<div style="height: 100%;">
<div class="flex" style="height:calc(100% - 20px);flex:1">
<iframe
id="iframe"
:src="`${iframeSrc}/pdfjs-dist/web/viewer.html?file=${encodeURIComponent(prdUrl)}`"
class="miner-u el-card is-always-shadow ml20"
></iframe>
<div style="flex:1;max-width: 800px;" class="mh20 miner-u-md">
<el-tabs type="border-card" style="height: 100%;overflow: hidden" @tab-click="changeTab">
<el-tab-pane label="预览" style="overflow:scroll;">
<div v-html="markdownHtml" class="view-body"></div>
</el-tab-pane>
<el-tab-pane label="编辑">
<div class="lineH25" contenteditable id="md-editor" @blur="emitMarkDown">{{ markdown }}</div>
</el-tab-pane>
</el-tabs>
</div>
</div>
</div>
</template>
<script>
import { minerUBbox, minerUMarkDown } from '@/api/generatedApi/index'
import { DEFAULT_COLOR_SECTION, PDF_COLOR_PICKER } from './pdf-color'
import MarkdownIt from 'markdown-it'
import markdownItKatex from 'markdown-it-katex'
const md = new MarkdownIt({
html: true
}).use(markdownItKatex)
export default {
name: 'index',
data() {
return {
prdUrl: ``,
iframeSrc: window.location.href,
bboxList: [],
markdown: '',
md,
markdownHtml: ''
}
},
props: {},
watch: {},
components: {},
filters: {},
methods: {
emitMarkDown() {
let pre = document.getElementById('md-editor').innerText
this.$emit('getMarkDownIt', { innerText: pre })
},
formatJson(data) {
return data.map(item => {
let bboxes = []
// 处理预处理块
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
bboxes.push({
type: block.type,
bbox: block.bbox,
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
})
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
bboxes.push({
type: block.type,
bbox: block.bbox,
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
})
})
}
return {
...item,
bboxes
}
})
},
// bbox 解析 传递 颜色
getPDFDetailBbox() {
minerUBbox({ documentId: 1 }).then(res => {
this.bboxList = this.formatJson(JSON.parse(JSON.stringify(res.content.content)))
})
},
changeTab() {
let pre = document.getElementById('md-editor').innerText
this.markdownHtml = md.render(pre)
},
async getPDFDetailMarkDown() {
const response = await fetch(minerUMarkDown({ documentId: 1 }))
this.markdown = await response.text()
this.markdownHtml = this.md.render(this.markdown)
},
// 向 iframe 发送消息
sendMessageToIframe(type, message) {
// 获取 iframe 元素
const iframe = document.getElementById('iframe')
// 检查 iframe 是否存在并且可以发送消息
if (iframe && iframe.contentWindow) {
iframe.contentWindow.postMessage(
{
type,
data: message
},
'*'
)
}
}
},
created() {
this.getPDFDetailBbox()
this.getPDFDetailMarkDown()
},
mounted() {
// 监听 iframe 的 postMessage 事件
window.addEventListener('message', event => {
// 检查消息来源是否合法
if (event.origin + '/' === process.env.BASE_URL) {
// 根据消息状态执行不同操作
switch (event.data.status) {
case 'loaded':
// 格式化 JSON 数据并发送给 iframe
this.sendMessageToIframe('initExtractLayerData', this.bboxList)
this.sendMessageToIframe('title', '')
break
}
}
if (event.data.pageNum) {
const num = event.data.pageNum || 1
this.sendMessageToIframe('pageChange', num)
}
if (event.data.pageNumDetail) {
const pageNumDetail = event.data.pageNumDetail || 1
this.page.value = pageNumDetail
this.sendMessageToIframe('pageNumDetail', pageNumDetail)
}
})
},
computed: {}
}
</script>
<style scoped lang="scss">
.miner-u {
outline: none;
border: none;
flex: 1;
border-radius: unset;
//width:500px;
//height:100%
}
#md-editor {
//超过宽度自动折行
white-space: pre-wrap;
word-wrap: break-word;
overflow: auto;
outline: none;
border: none;
}
.miner-u-md {
/deep/ .el-tabs--border-card > .el-tabs__content {
height: calc(100% - 30px);
overflow: auto;
}
.tabs__content {
}
}
</style>

View File

@@ -0,0 +1,37 @@
export const PDF_COLOR_PICKER = {
title: {
line: 'rgba(121, 124, 255, 1)',
fill: 'rgba(121, 124, 255, 0.4)'
},
text: {
line: 'rgba(230, 122, 171, 1)',
fill: 'rgba(230, 122, 171, 0.4)'
},
interline_equation: {
line: 'rgba(240, 240, 124, 1)',
fill: 'rgba(240, 240, 124, 0.4)'
},
discarded: {
line: 'rgba(164,164,164,1)',
fill: 'rgba(164,164,164,0.4)'
},
image: {
line: 'rgba(149, 226, 115, 1)',
fill: 'rgba(149, 226, 115, 0.4)'
},
table: {
line: 'rgba(230, 113, 230, 1)',
fill: 'rgba(230, 113, 230, 0.4)'
},
inline_equation: {
line: 'rgba(150, 232, 172, 1)',
fill: 'rgba(150, 232, 172, 0.4)'
}
};
export const DEFAULT_COLOR_SECTION = {
line: 'rgba(166, 113, 230, 1)',
fill: 'rgba(166, 113, 230, 0.4)'
};
export const PDF_TEMPLATE_URL_KEY = 't';

View File

@@ -11,6 +11,7 @@ import RenderTable from './components/RenderTable'
import RenderSwiper from './components/RenderSwiper' import RenderSwiper from './components/RenderSwiper'
import VueEditor from './components/VueEditor' import VueEditor from './components/VueEditor'
import MavonEditor from './components/MavonEditor' import MavonEditor from './components/MavonEditor'
import RenderMinerU from '@/components/RenderMinerU/index.vue'
import utils from '@/assets/js/common' import utils from '@/assets/js/common'
// 生成的数据交互api // 生成的数据交互api
import generatedApi from '@/api/generatedApi' import generatedApi from '@/api/generatedApi'
@@ -32,6 +33,7 @@ Vue.use(ElementUI, { locale })
//二次封装的el-table //二次封装的el-table
Vue.component('RTable', RenderTable) Vue.component('RTable', RenderTable)
Vue.component('RSwiper', RenderSwiper) Vue.component('RSwiper', RenderSwiper)
Vue.component('RMinerU', RenderMinerU)
// 富文本编辑器 // 富文本编辑器
Vue.component('VEditor', VueEditor) Vue.component('VEditor', VueEditor)
// 富文本编辑器 可视化代码 // 富文本编辑器 可视化代码

View File

@@ -1,83 +1,89 @@
<template> <template>
<div class='container create-container'> <div class="container create-container">
<el-card shadow="hover"> <el-card shadow="hover">
<div slot="header" class="clearfix"> <div slot="header" class="clearfix">
<h3>创建知识库</h3> <h3>创建知识库</h3>
</div> </div>
<div class='card-body'> <div class="card-body">
<el-steps :active="active" simple finish-status="success"> <el-steps :active="active" simple finish-status="success">
<el-step title="文件上传/预处理"></el-step> <el-step title="文件上传/预处理"></el-step>
<el-step title="拆分配置"></el-step> <el-step title="拆分配置"></el-step>
<el-step title="题词配置"></el-step> <el-step title="题词配置"></el-step>
</el-steps> </el-steps>
<div class='components'> <div class="components">
<step-preprocessing v-if='active===0' @getForm='getForm'></step-preprocessing> <step-preprocessing v-if="active === 0" @getForm="getForm"></step-preprocessing>
<split-config v-if='active===1'></split-config> <split-config v-if="active === 1"></split-config>
<words v-if='active===2'></words> </div>
</div> </div>
</div> <div class="card-bottom">
<el-button type="primary" size="medium" @click="fetchApi">一键处理</el-button>
<div class='card-bottom'> <el-button type="primary" size="medium" @click="active++" v-if="active < 2">下一步</el-button>
<el-button type="primary" size='medium' @click='fetchApi'>一键处理</el-button> <el-button type="primary" size="medium" v-if="active === 2">确定</el-button>
<el-button type="primary" size='medium' @click='active++' v-if='active<2'>一步</el-button> <el-button type="primary" size="medium" @click="active--" v-if="active >= 1">一步</el-button>
<el-button type="primary" size='medium' v-if='active === 2'>确定</el-button> <el-button type="primary" size="medium">取消</el-button>
<el-button type="primary" size='medium' @click='active--' v-if='active>=1'>上一步</el-button>
<el-button type="primary" size='medium' >取消</el-button>
</div> </div>
</el-card> </el-card>
<el-drawer :visible.sync="visible" size="80%" title="预处理结果预览">
<div class="mv10 mh20 text-right">
<el-button type="primary" size="medium">保存并处理</el-button>
<el-button size="medium">取消</el-button>
</div>
<div style="height:calc(100% - 55px);">
<r-miner-u></r-miner-u>
</div>
</el-drawer>
</div> </div>
</template> </template>
<script> <script>
import StepPreprocessing from './components/preprocessing.vue' import StepPreprocessing from './components/preprocessing.vue'
import SplitConfig from '@/views/knowledge/detail/components/split/SplitConfig.vue' import SplitConfig from '@/views/knowledge/detail/components/split/SplitConfig.vue'
import Words from '@/views/knowledge/detail/components/words/Index.vue'
// import StepC
export default { export default {
name: 'create', name: 'create',
data() { data() {
return { return {
visible: true,
active: 0 active: 0
} }
}, },
props: {}, props: {},
watch: {}, watch: {},
components: { components: {
Words,
SplitConfig, SplitConfig,
StepPreprocessing StepPreprocessing
}, },
filters: {}, filters: {},
methods: { methods: {
getForm() {}, getForm() {},
fetchApi(){ fetchApi() {}
},
},
created() {
},
mounted() {
}, },
created() {},
mounted() {},
computed: {} computed: {}
} }
</script> </script>
<style scoped lang='scss'> <style scoped lang="scss">
/deep/.el-drawer__header {
margin-bottom: unset;
border-bottom: 1px solid #eee;
padding-bottom: 20px;
}
/deep/ .card-bottom { /deep/ .card-bottom {
position: relative; position: relative;
width: 100%; width: 100%;
padding-top: 20px; padding-top: 10px;
&::after { &::after {
content: ''; content: '';
border-top: 1px solid #ebeef5; border-top: 1px solid #ebeef5;
position: absolute; position: absolute;
top: 0; top: 0;
left: 0; left: 0;
width:100% width: 100%;
} }
& .el-button { & .el-button {
} }
} }
</style> </style>

View File

@@ -47,6 +47,15 @@ module.exports = {
//在webpack的名称字段中提供应用程序的标题以便 //在webpack的名称字段中提供应用程序的标题以便
//可以在index.html中对其进行访问以注入正确的标题。 //可以在index.html中对其进行访问以注入正确的标题。
name: name, name: name,
module: {
rules: [
{
test: /\.mjs$/,
include: /node_modules/,
type: 'javascript/auto',
},
],
},
resolve: { resolve: {
alias: { alias: {
'@': resolve('src') '@': resolve('src')