mirror of
http://112.124.100.131/ebiz-ai/ebiz-ai-knowledge-manage.git
synced 2025-12-12 12:26:52 +08:00
feat(knowledge): 添加 MinerU组件以支持 PDF 文件处理
- 新增 RenderMinerU 组件,用于 PDF 文件的预处理和 Markdown 编辑 - 在 create.vue 中集成 RenderMinerU组件,实现预处理结果预览功能 - 更新公共样式以支持 Markdown 渲染 - 在 main.js 中注册 RMinerU组件 - 修改 package.json,添加 markdown-it 和 markdown-it-katex 依赖 - 更新 vue.config.js,添加对 mjs 文件的处理规则
This commit is contained in:
@@ -25,6 +25,8 @@
|
||||
"exif-js": "^2.3.0",
|
||||
"file-saver": "^2.0.5",
|
||||
"js-cookie": "2.2.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
"markdown-it-katex": "^2.0.3",
|
||||
"mavon-editor": "^2.9.1",
|
||||
"node-gyp": "^8.0.0",
|
||||
"normalize.css": "7.0.0",
|
||||
|
||||
@@ -40,3 +40,21 @@ export function datasetDelete(data) {
|
||||
method: 'delete',
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
//minerU 获取bbox
|
||||
export function minerUBbox(params) {
|
||||
return request({
|
||||
url:'http://192.168.8.165:7196/document/mineru/bbox_json',
|
||||
// url: getUrl('/document/mineru/bbox_json'),
|
||||
method: 'get',
|
||||
params:params
|
||||
})
|
||||
}
|
||||
//minerU 获取markdown
|
||||
export function minerUMarkDown(params) {
|
||||
return `http://192.168.8.165:7196/document/mineru/md?documentId=${params.documentId}`
|
||||
// url: getUrl('/document/mineru/md'),
|
||||
|
||||
}
|
||||
|
||||
@@ -142,6 +142,9 @@
|
||||
.lineH35 {
|
||||
line-height: 35px;
|
||||
}
|
||||
.lineH25 {
|
||||
line-height: 25px;
|
||||
}
|
||||
.lineH40 {
|
||||
line-height: 40px;
|
||||
}
|
||||
@@ -258,3 +261,90 @@ body .el-collapse-item__wrap {
|
||||
.cursor-pointer {
|
||||
cursor: pointer;
|
||||
}
|
||||
.view-body {
|
||||
text-align: left;
|
||||
font-size: 14px;
|
||||
div{
|
||||
outline: unset;
|
||||
|
||||
}
|
||||
|
||||
/* 添加一些基本的样式以美化 Markdown 内容 */
|
||||
|
||||
p {
|
||||
font-size: 16px;
|
||||
line-height: 1.6;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
margin-top: 24px;
|
||||
margin-bottom: 12px;
|
||||
border-bottom: 1px dashed #cecece;
|
||||
padding:5px;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
margin: 16px 0;
|
||||
padding-left: 32px;
|
||||
}
|
||||
|
||||
li {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
a {
|
||||
color: #007BFF;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
code {
|
||||
background-color: #f1f1f1;
|
||||
padding: 4px 8px;
|
||||
border-radius: 4px;
|
||||
font-family: 'Courier New', Courier, monospace;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
pre {
|
||||
background-color: #f1f1f1;
|
||||
padding: 16px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
font-family: 'Courier New', Courier, monospace;
|
||||
}
|
||||
|
||||
pre code {
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
table {
|
||||
border: 1px solid #f9f9f9;
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
background: linear-gradient(to bottom, #ffffff, #f9f9f9);
|
||||
}
|
||||
|
||||
th, td {
|
||||
border: 1px solid #ccc;
|
||||
padding: 8px;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
|
||||
// 添加斑马条纹效果
|
||||
tr:nth-child(even) {
|
||||
background-color: #f9fafc;
|
||||
}
|
||||
}
|
||||
|
||||
173
src/components/RenderMinerU/index.vue
Normal file
173
src/components/RenderMinerU/index.vue
Normal file
@@ -0,0 +1,173 @@
|
||||
<template>
|
||||
<div style="height: 100%;">
|
||||
<div class="flex" style="height:calc(100% - 20px);flex:1">
|
||||
<iframe
|
||||
id="iframe"
|
||||
:src="`${iframeSrc}/pdfjs-dist/web/viewer.html?file=${encodeURIComponent(prdUrl)}`"
|
||||
class="miner-u el-card is-always-shadow ml20"
|
||||
></iframe>
|
||||
<div style="flex:1;max-width: 800px;" class="mh20 miner-u-md">
|
||||
<el-tabs type="border-card" style="height: 100%;overflow: hidden" @tab-click="changeTab">
|
||||
<el-tab-pane label="预览" style="overflow:scroll;">
|
||||
<div v-html="markdownHtml" class="view-body"></div>
|
||||
</el-tab-pane>
|
||||
<el-tab-pane label="编辑">
|
||||
<div class="lineH25" contenteditable id="md-editor" @blur="emitMarkDown">{{ markdown }}</div>
|
||||
</el-tab-pane>
|
||||
</el-tabs>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
<script>
|
||||
import { minerUBbox, minerUMarkDown } from '@/api/generatedApi/index'
|
||||
import { DEFAULT_COLOR_SECTION, PDF_COLOR_PICKER } from './pdf-color'
|
||||
import MarkdownIt from 'markdown-it'
|
||||
import markdownItKatex from 'markdown-it-katex'
|
||||
const md = new MarkdownIt({
|
||||
html: true
|
||||
}).use(markdownItKatex)
|
||||
export default {
|
||||
name: 'index',
|
||||
data() {
|
||||
return {
|
||||
prdUrl: ``,
|
||||
iframeSrc: window.location.href,
|
||||
bboxList: [],
|
||||
markdown: '',
|
||||
md,
|
||||
markdownHtml: ''
|
||||
}
|
||||
},
|
||||
props: {},
|
||||
watch: {},
|
||||
components: {},
|
||||
filters: {},
|
||||
methods: {
|
||||
emitMarkDown() {
|
||||
let pre = document.getElementById('md-editor').innerText
|
||||
this.$emit('getMarkDownIt', { innerText: pre })
|
||||
},
|
||||
formatJson(data) {
|
||||
return data.map(item => {
|
||||
let bboxes = []
|
||||
// 处理预处理块
|
||||
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
|
||||
item.preproc_blocks.forEach(block => {
|
||||
bboxes.push({
|
||||
type: block.type,
|
||||
bbox: block.bbox,
|
||||
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
|
||||
})
|
||||
})
|
||||
}
|
||||
// 处理丢弃块
|
||||
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
|
||||
item.discarded_blocks.forEach(block => {
|
||||
bboxes.push({
|
||||
type: block.type,
|
||||
bbox: block.bbox,
|
||||
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
return {
|
||||
...item,
|
||||
bboxes
|
||||
}
|
||||
})
|
||||
},
|
||||
// bbox 解析 传递 颜色
|
||||
getPDFDetailBbox() {
|
||||
minerUBbox({ documentId: 1 }).then(res => {
|
||||
this.bboxList = this.formatJson(JSON.parse(JSON.stringify(res.content.content)))
|
||||
})
|
||||
},
|
||||
changeTab() {
|
||||
let pre = document.getElementById('md-editor').innerText
|
||||
this.markdownHtml = md.render(pre)
|
||||
},
|
||||
async getPDFDetailMarkDown() {
|
||||
const response = await fetch(minerUMarkDown({ documentId: 1 }))
|
||||
this.markdown = await response.text()
|
||||
|
||||
this.markdownHtml = this.md.render(this.markdown)
|
||||
},
|
||||
// 向 iframe 发送消息
|
||||
sendMessageToIframe(type, message) {
|
||||
// 获取 iframe 元素
|
||||
const iframe = document.getElementById('iframe')
|
||||
// 检查 iframe 是否存在并且可以发送消息
|
||||
if (iframe && iframe.contentWindow) {
|
||||
iframe.contentWindow.postMessage(
|
||||
{
|
||||
type,
|
||||
data: message
|
||||
},
|
||||
'*'
|
||||
)
|
||||
}
|
||||
}
|
||||
},
|
||||
created() {
|
||||
this.getPDFDetailBbox()
|
||||
this.getPDFDetailMarkDown()
|
||||
},
|
||||
mounted() {
|
||||
// 监听 iframe 的 postMessage 事件
|
||||
window.addEventListener('message', event => {
|
||||
// 检查消息来源是否合法
|
||||
if (event.origin + '/' === process.env.BASE_URL) {
|
||||
// 根据消息状态执行不同操作
|
||||
switch (event.data.status) {
|
||||
case 'loaded':
|
||||
// 格式化 JSON 数据并发送给 iframe
|
||||
this.sendMessageToIframe('initExtractLayerData', this.bboxList)
|
||||
this.sendMessageToIframe('title', '')
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (event.data.pageNum) {
|
||||
const num = event.data.pageNum || 1
|
||||
this.sendMessageToIframe('pageChange', num)
|
||||
}
|
||||
if (event.data.pageNumDetail) {
|
||||
const pageNumDetail = event.data.pageNumDetail || 1
|
||||
|
||||
this.page.value = pageNumDetail
|
||||
this.sendMessageToIframe('pageNumDetail', pageNumDetail)
|
||||
}
|
||||
})
|
||||
},
|
||||
computed: {}
|
||||
}
|
||||
</script>
|
||||
<style scoped lang="scss">
|
||||
.miner-u {
|
||||
outline: none;
|
||||
border: none;
|
||||
flex: 1;
|
||||
border-radius: unset;
|
||||
//width:500px;
|
||||
//height:100%
|
||||
}
|
||||
|
||||
#md-editor {
|
||||
//超过宽度自动折行
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
overflow: auto;
|
||||
outline: none;
|
||||
border: none;
|
||||
}
|
||||
.miner-u-md {
|
||||
/deep/ .el-tabs--border-card > .el-tabs__content {
|
||||
height: calc(100% - 30px);
|
||||
overflow: auto;
|
||||
}
|
||||
.tabs__content {
|
||||
}
|
||||
}
|
||||
</style>
|
||||
37
src/components/RenderMinerU/pdf-color.js
Normal file
37
src/components/RenderMinerU/pdf-color.js
Normal file
@@ -0,0 +1,37 @@
|
||||
export const PDF_COLOR_PICKER = {
|
||||
title: {
|
||||
line: 'rgba(121, 124, 255, 1)',
|
||||
fill: 'rgba(121, 124, 255, 0.4)'
|
||||
},
|
||||
text: {
|
||||
line: 'rgba(230, 122, 171, 1)',
|
||||
fill: 'rgba(230, 122, 171, 0.4)'
|
||||
},
|
||||
interline_equation: {
|
||||
line: 'rgba(240, 240, 124, 1)',
|
||||
fill: 'rgba(240, 240, 124, 0.4)'
|
||||
},
|
||||
discarded: {
|
||||
line: 'rgba(164,164,164,1)',
|
||||
fill: 'rgba(164,164,164,0.4)'
|
||||
},
|
||||
image: {
|
||||
line: 'rgba(149, 226, 115, 1)',
|
||||
fill: 'rgba(149, 226, 115, 0.4)'
|
||||
},
|
||||
table: {
|
||||
line: 'rgba(230, 113, 230, 1)',
|
||||
fill: 'rgba(230, 113, 230, 0.4)'
|
||||
},
|
||||
inline_equation: {
|
||||
line: 'rgba(150, 232, 172, 1)',
|
||||
fill: 'rgba(150, 232, 172, 0.4)'
|
||||
}
|
||||
};
|
||||
|
||||
export const DEFAULT_COLOR_SECTION = {
|
||||
line: 'rgba(166, 113, 230, 1)',
|
||||
fill: 'rgba(166, 113, 230, 0.4)'
|
||||
};
|
||||
|
||||
export const PDF_TEMPLATE_URL_KEY = 't';
|
||||
@@ -11,6 +11,7 @@ import RenderTable from './components/RenderTable'
|
||||
import RenderSwiper from './components/RenderSwiper'
|
||||
import VueEditor from './components/VueEditor'
|
||||
import MavonEditor from './components/MavonEditor'
|
||||
import RenderMinerU from '@/components/RenderMinerU/index.vue'
|
||||
import utils from '@/assets/js/common'
|
||||
// 生成的数据交互api
|
||||
import generatedApi from '@/api/generatedApi'
|
||||
@@ -32,6 +33,7 @@ Vue.use(ElementUI, { locale })
|
||||
//二次封装的el-table
|
||||
Vue.component('RTable', RenderTable)
|
||||
Vue.component('RSwiper', RenderSwiper)
|
||||
Vue.component('RMinerU', RenderMinerU)
|
||||
// 富文本编辑器
|
||||
Vue.component('VEditor', VueEditor)
|
||||
// 富文本编辑器 可视化代码
|
||||
|
||||
@@ -1,83 +1,89 @@
|
||||
<template>
|
||||
<div class='container create-container'>
|
||||
<div class="container create-container">
|
||||
<el-card shadow="hover">
|
||||
<div slot="header" class="clearfix">
|
||||
<h3>创建知识库</h3>
|
||||
|
||||
</div>
|
||||
<div class='card-body'>
|
||||
<div class="card-body">
|
||||
<el-steps :active="active" simple finish-status="success">
|
||||
<el-step title="文件上传/预处理"></el-step>
|
||||
<el-step title="拆分配置"></el-step>
|
||||
<el-step title="题词配置"></el-step>
|
||||
</el-steps>
|
||||
|
||||
<div class='components'>
|
||||
<step-preprocessing v-if='active===0' @getForm='getForm'></step-preprocessing>
|
||||
<split-config v-if='active===1'></split-config>
|
||||
<words v-if='active===2'></words>
|
||||
<div class="components">
|
||||
<step-preprocessing v-if="active === 0" @getForm="getForm"></step-preprocessing>
|
||||
<split-config v-if="active === 1"></split-config>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class='card-bottom'>
|
||||
<el-button type="primary" size='medium' @click='fetchApi'>一键处理</el-button>
|
||||
<el-button type="primary" size='medium' @click='active++' v-if='active<2'>下一步</el-button>
|
||||
<el-button type="primary" size='medium' v-if='active === 2'>确定</el-button>
|
||||
<el-button type="primary" size='medium' @click='active--' v-if='active>=1'>上一步</el-button>
|
||||
<el-button type="primary" size='medium' >取消</el-button>
|
||||
<div class="card-bottom">
|
||||
<el-button type="primary" size="medium" @click="fetchApi">一键处理</el-button>
|
||||
<el-button type="primary" size="medium" @click="active++" v-if="active < 2">下一步</el-button>
|
||||
<el-button type="primary" size="medium" v-if="active === 2">确定</el-button>
|
||||
<el-button type="primary" size="medium" @click="active--" v-if="active >= 1">上一步</el-button>
|
||||
<el-button type="primary" size="medium">取消</el-button>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
<el-drawer :visible.sync="visible" size="80%" title="预处理结果预览">
|
||||
<div class="mv10 mh20 text-right">
|
||||
<el-button type="primary" size="medium">保存并处理</el-button>
|
||||
<el-button size="medium">取消</el-button>
|
||||
</div>
|
||||
<div style="height:calc(100% - 55px);">
|
||||
<r-miner-u></r-miner-u>
|
||||
</div>
|
||||
</el-drawer>
|
||||
</div>
|
||||
</template>
|
||||
<script>
|
||||
import StepPreprocessing from './components/preprocessing.vue'
|
||||
import SplitConfig from '@/views/knowledge/detail/components/split/SplitConfig.vue'
|
||||
import Words from '@/views/knowledge/detail/components/words/Index.vue'
|
||||
// import StepC
|
||||
export default {
|
||||
name: 'create',
|
||||
data() {
|
||||
return {
|
||||
visible: true,
|
||||
active: 0
|
||||
}
|
||||
},
|
||||
props: {},
|
||||
watch: {},
|
||||
components: {
|
||||
Words,
|
||||
SplitConfig,
|
||||
StepPreprocessing
|
||||
},
|
||||
filters: {},
|
||||
methods: {
|
||||
getForm() {},
|
||||
fetchApi(){
|
||||
|
||||
},
|
||||
},
|
||||
created() {
|
||||
},
|
||||
mounted() {
|
||||
fetchApi() {}
|
||||
},
|
||||
created() {},
|
||||
mounted() {},
|
||||
computed: {}
|
||||
}
|
||||
</script>
|
||||
<style scoped lang='scss'>
|
||||
<style scoped lang="scss">
|
||||
/deep/.el-drawer__header {
|
||||
margin-bottom: unset;
|
||||
border-bottom: 1px solid #eee;
|
||||
padding-bottom: 20px;
|
||||
}
|
||||
|
||||
/deep/ .card-bottom {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
padding-top: 20px;
|
||||
padding-top: 10px;
|
||||
&::after {
|
||||
content: '';
|
||||
border-top: 1px solid #ebeef5;
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width:100%
|
||||
width: 100%;
|
||||
}
|
||||
& .el-button {
|
||||
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -47,6 +47,15 @@ module.exports = {
|
||||
//在webpack的名称字段中提供应用程序的标题,以便
|
||||
//可以在index.html中对其进行访问以注入正确的标题。
|
||||
name: name,
|
||||
module: {
|
||||
rules: [
|
||||
{
|
||||
test: /\.mjs$/,
|
||||
include: /node_modules/,
|
||||
type: 'javascript/auto',
|
||||
},
|
||||
],
|
||||
},
|
||||
resolve: {
|
||||
alias: {
|
||||
'@': resolve('src')
|
||||
|
||||
Reference in New Issue
Block a user