Files
ebiz-ai-knowledge-manage/src/components/RenderMinerU/index.vue
陈昱达 2171440a37 feat(components): 优化 PDF 渲染和 Markdown 处理
- 在 RenderMinerU 组件中添加 loading 状态,提升用户体验- 重构 Markdown 数据处理逻辑,使用数组替代对象存储页面内容- 优化 PDF 详细信息获取流程,支持异步加载
- 在 lineEcharts 组件中增加空数据处理,确保图表正确显示
2025-05-08 15:28:24 +08:00

888 lines
26 KiB
Vue
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<template>
<div style="height: 100%;">
<div class="flex align-items-c justify-content-b mb10">
<div class="fileName flex align-items-c">
<svg-icon
icon-class="pdf"
style="width: 20px;height: 20px"
class-name="mr10"
/>
{{ fileName }}
</div>
<div style="margin-left: 50px">
<el-radio-group v-model="tab" @change="changeTab" class="group-medium">
<el-radio-button label="0" name="0">预览</el-radio-button>
<el-radio-button label="1" name="1">编辑</el-radio-button>
</el-radio-group>
</div>
<div class=" text-right" v-if="isEdit">
<!-- 重试按钮 -->
<el-button
size="medium"
class="default"
@click="retryMiner"
:disabled="finishedMiner"
style="margin-right: 10px;"
plain
>重试</el-button
>
<!-- 保存并处理按钮 -->
<el-button
type="primary"
size="medium"
@click="saveMarkDown"
:disabled="finishedMiner"
>保存并处理</el-button
>
<el-button
type="primary"
size="medium"
class="line-button"
@click="uploadKnowledge"
:disabled="finishedMiner"
>直接上传至知识库</el-button
>
</div>
</div>
<div
:class="!isEdit ? 'mt10 flex' : 'flex'"
style="height:calc(100%);flex:1"
v-loading="finishenEnd"
element-loading-text="读取文档中..."
>
<iframe
v-if="isShowPdf"
id="iframe"
ref="iframe"
:src="
`${iframeSrc}/pdfjs-dist/web/viewer.html?file=${encodeURIComponent(
prdUrl
)}`
"
class="ebiz-pdf el-card "
></iframe>
<div
style="flex:1;width:50%"
class="ml10 ebiz-pdf-md"
v-loading="finishedMiner"
element-loading-text="正在识别中..."
>
<div class="el-card ebiz-pdf" style="height: 100%;overflow: hidden">
<div ref="scrollView" v-show="tab === '0'">
<div
class="view-body"
id="viewBody"
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden"
v-html="markdownHtml"
></div>
</div>
<div :disabled="!isEdit" v-if="isEdit" v-show="tab === '1'">
<div
class="lineHeight25 view-body"
contenteditable
id="md-editor"
ref="mdEditor"
@blur="emitMarkDown"
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden "
v-html="markdown"
></div>
</div>
</div>
</div>
</div>
</div>
</template>
<script>
import {
getPdfUrl,
minerUBbox,
minerUMarkDown,
minerUMarkDownUpdate,
minerURetry,
minerUQuery,
imageRetry,
getMd_info,
mdIndex
} from '@/api/generatedApi/index'
import { DEFAULT_COLOR_SECTION, PDF_COLOR_PICKER } from './pdf-color'
import MarkdownIt from 'markdown-it'
import markdownItKatex from 'markdown-it-katex'
import {
contentPage,
contentUpdate,
documentPdfStatus,
preprocessEmbedding
} from '@/api/generatedApi/pdfApi'
const md = new MarkdownIt({
html: true
}).use(markdownItKatex)
export default {
name: 'index',
data() {
return {
mdJsons: {},
finishenEnd: false,
fileName: '',
recordId: '', //pdf 记录的id
endEmit: false,
tab: '0',
mdPges: 0,
tableIdCounter: 0,
finishedMiner: false,
prdUrl: ``,
iframeSrc: window.location.origin,
bboxList: [],
// 对照表
selectionTable: [], // 表格对照表 记录表格与image_path 关系
selectionImagePath: '', // 当前表格对应的图片路径
markdown: '',
md,
markdownHtml: '',
page: 1,
copyTable: null,
tableActionButtons: [
{
label: '重新识别',
icon: 'el-icon-view',
name: 'retry',
click: tableElement => {
// 获取的 识别图片
this.selectionImagePath = ''
let chooseItem = this.findMatchingTable(tableElement)
if (!this.selectionImagePath) {
this.$message.error('未能识别当前表格图片')
return false
} else {
// this.finishedMiner = true
this.updateTableAttributes(tableElement, chooseItem)
let loading = this.$loading({
target: tableElement,
lockScroll: false,
// spinner: 'element-loading-spinner',
// background: 'rgba(0, 0, 0, 0.8)',
text: 'AI模型分析中....'
})
this.retryMinerImage(chooseItem, loading, tableElement)
}
}
},
{
label: '编辑',
name: 'edit',
icon: 'el-icon-edit-outline',
click: tableElement => {
this.copyTable = tableElement.innerHTML
tableElement.classList.remove('m-view')
tableElement.setAttribute('contenteditable', 'true')
let buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
buttonContainer.innerHTML = null
this.generateButton(
tableElement,
buttonContainer,
this.tableActionConfirm
)
}
},
{
label: '撤销',
icon: 'el-icon-refresh-left',
name: 'refresh',
click: tableElement => {
this.$messageBox(
() => {
let loading = this.$loading({
target: tableElement,
lockScroll: false,
text: '正在撤销中....'
})
let chooseItem = this.findMatchingTable(tableElement)
let tableMatch = chooseItem.html.match(
/<table>([\s\S]*)<\/table>/
)
if (tableMatch) {
let div = document.createElement('table')
div.innerHTML = tableMatch[1]
setTimeout(() => {
loading.close()
tableElement.innerHTML = tableMatch[1]
}, 1000)
}
},
'是否撤销当前识别内容?',
'warning',
'撤销提醒'
)
}
}
],
tableActionConfirm: [
{
label: '返回',
icon: 'el-icon-refresh-left',
name: 'back',
click: tableElement => {
tableElement.classList.add('m-view')
tableElement.setAttribute('contenteditable', 'false')
let buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
buttonContainer.innerHTML = null
this.generateButton(
tableElement,
buttonContainer,
this.tableActionButtons
)
}
}
// {
// label: '取消',
// icon: 'el-icon-edit',
// click: tableElement => {}
// }
]
}
},
props: {
visible: {
type: Boolean,
default: false
},
documentId: {
type: String,
default: '1361351897324294144'
},
isEdit: {
type: Boolean,
default: true
},
isShowPdf: {
type: Boolean,
default: true
}
},
watch: {
documentId: {
handler(newVal, oldVal) {
if (newVal) {
this.getMinerUStatus()
this.prdUrl = getPdfUrl({ documentId: newVal })
}
},
immediate: true
},
page: {
handler(newVal, oldVal) {
if (newVal) {
// this.changePage(newVal, this.tab)
this.getPDFDetailMarkDown()
}
}
}
},
components: {},
filters: {},
methods: {
// 上传文档到知识库
uploadKnowledge() {
this.$router.push({
path: '/knowledge/reviewKnowledge',
query: {
documentId: this.documentId,
datasetId: this.$route.query.datasetId,
active: '1'
}
})
// preprocessEmbedding({ documentId: this.documentId }).then(res => {
// if (res) {
// this.$message.success('上传成功')
// this.$router.push({
// path: '/knowledge/detail/segments',
// query: {
// documentId: this.documentId,
// datasetId: this.$route.query.datasetId
// }
// })
// }
// })
},
//changePage
// 分页发生改变时
changePage(page) {
let documentId = document.getElementById(`view-code-${page - 1}`)
let viewBody = document.getElementById('viewBody')
if (this.tab === '1') {
documentId = document.getElementById(`ebiz-code-${page - 1}`)
viewBody = document.getElementById('md-editor')
}
if (documentId) {
viewBody.scrollTo({
top: documentId.offsetTop - 130,
behavior: 'smooth'
})
}
},
//重新识别表格
retryMinerImage(chooseItem, loading, tableElement) {
imageRetry({
documentId: this.documentId,
imgName: chooseItem.image_path
}).then(res => {
if (res) {
loading.close()
let path = res.content.content
let tableMatch = path.match(/<table>([\s\S]*)<\/table>/)
if (tableMatch) {
tableElement.innerHTML = tableMatch[1]
}
}
})
},
// 生成视觉模型按钮
generateTableButtons() {
this.$nextTick(() => {
const mdHtml = document.getElementById('md-editor')
// 监听鼠标悬停事件,为表格元素添加浮层按钮
mdHtml.addEventListener('mouseover', e => {
const tableElement = e.target.closest('table')
if (tableElement) {
// 检查是否已经存在按钮容器,避免重复创建
let buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
if (!buttonContainer) {
buttonContainer = document.createElement('div')
buttonContainer.style.position = 'absolute'
buttonContainer.style.pointerEvents = 'none'
buttonContainer.style.zIndex = '9999'
buttonContainer.className = 'md-editor-setting'
buttonContainer.setAttribute('contenteditable', 'false')
// 调用生成按钮的方法
this.generateButton(tableElement, buttonContainer)
// 设置按钮位置在表格正中间浮动
const rect = tableElement.getBoundingClientRect()
buttonContainer.style.left = `10px` // 调整位置
buttonContainer.style.top = `-20px` // 调整位置
tableElement.appendChild(buttonContainer)
}
}
})
// 监听鼠标离开事件,移除浮层按钮
mdHtml.addEventListener('mouseout', e => {
const tableElement = e.target.closest('table')
if (!tableElement || !tableElement.contains(e.relatedTarget)) {
const buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
if (buttonContainer) {
buttonContainer.remove()
}
}
})
})
},
// 封装 按钮生成事件
generateButton(tableElement, buttonContainer, actionButtons) {
// 获取表格的可编辑状态
let contenteditable = tableElement.getAttribute('contenteditable')
// 根据可编辑状态选择按钮配置
let buttons = !actionButtons
? contenteditable === 'false'
? this.tableActionButtons
: this.tableActionConfirm
: actionButtons
let pathId = tableElement.getAttribute('data-path-id')
// 循环按钮配置,动态生成按钮并绑定点击事件
for (let i = 0; i < buttons.length; i++) {
const icon = document.createElement('i')
const button = document.createElement('button')
icon.className = `${buttons[i].icon} public-icon`
button.appendChild(icon)
// 悬浮提示
button.setAttribute('title', buttons[i].label)
button.className =
'el-button el-button--primary el-button--mini editor-button is-plain'
button.style.pointerEvents = 'auto'
button.addEventListener('click', () => {
buttons[i].click(tableElement)
})
buttonContainer.appendChild(button)
}
},
// 查找匹配的表格
findMatchingTable(tableElement) {
let chooseItem = null
let pathId = tableElement.getAttribute('data-path-id')
if (pathId) {
let pathType = tableElement.getAttribute('data-path-type')
this.selectionImagePath = pathId + pathType
chooseItem = this.selectionTable.find(item => {
if (item.image_path === this.selectionImagePath) {
return item
}
return false
})
} else {
// 如果没有路径 ID则通过表格内容匹配对照表
let tableText = this.getTableText(tableElement)
chooseItem = this.selectionTable.find(item => {
if (item.html) {
let itemText = this.getTableTextFromHtml(item.html)
return tableText === itemText
}
return false
})
if (chooseItem) {
this.selectionImagePath = chooseItem.image_path
}
}
return chooseItem
},
// 获取表格文本
getTableText(tableElement) {
let stringTable = tableElement.innerHTML
let tbodyMatch = stringTable.match(/<tbody>([\s\S]*)<\/tbody>/)
if (tbodyMatch) {
let newTable = document.createElement('table')
newTable.innerHTML = tbodyMatch[1]
return newTable.innerText
}
return ''
},
// 从 HTML 中获取表格文本
getTableTextFromHtml(html) {
let tableMatch = html.match(/<table>([\s\S]*)<\/table>/)
if (tableMatch) {
let domTable = document.createElement('table')
domTable.innerHTML = tableMatch[1]
return domTable.innerText
}
return ''
},
// 更新表格属性
updateTableAttributes(tableElement, chooseItem) {
tableElement.setAttribute(
'data-path-id',
this.selectionImagePath.replace(/\.[^/.]+$/, '')
)
let fileType = this.selectionImagePath.match(/\.[^/.]+$/)[0]
tableElement.setAttribute('data-path-type', fileType)
},
// 获取bbox 的 表格图片生成对照表
extractTableData(table) {
;(table.blocks ? table.blocks : []).map(lines => {
lines.lines.map(spans => {
spans.spans.map(span => {
this.selectionTable.push({
html: span.html,
image_path: span.image_path
})
})
})
})
},
// 导出
emitMarkDown() {
let pre = document.getElementById('md-editor').innerText
this.$emit('getMarkDownIt', { innerText: pre })
},
// 保存markdown
async saveMarkDown() {
let promises = []
this.mdJsons.map(item => {
let promise = contentUpdate({
documentId: this.documentId,
pageIndex: item.key,
newMd: item.value
})
promises.push(promise)
})
try {
await Promise.all(promises)
this.$emit('saveMarkDown', true)
this.$message.success('保存成功')
} catch (error) {
this.$message.error('保存失败')
console.error(error)
}
},
// 给文件增加色块
formatJson(data) {
return data.map(item => {
let bboxes = []
// 处理预处理块
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
bboxes.push({
type: block.type,
bbox: block.bbox,
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
})
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
bboxes.push({
type: block.type,
bbox: block.bbox,
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
})
})
}
return {
...item,
bboxes
}
})
},
// 填充对照表
fillSelectionTable(selection) {
selection.map(item => {
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
})
},
// bbox 解析 传递 颜色
getPDFDetailBbox() {
minerUBbox({ documentId: this.documentId }).then(res => {
this.bboxList = this.formatJson(
JSON.parse(JSON.stringify(res.content.content.pdf_info))
)
this.fillSelectionTable(
JSON.parse(JSON.stringify(res.content.content.pdf_info))
)
// this.$refs.iframe 重新刷新iframe
this.$refs.iframe.contentWindow.location.reload()
this.getPDFDetailMarkDown()
})
},
// 渲染markdown
renderMarkDown() {
this.markdown = this.markdown
.replace(/<table/g, () => {
const uniqueId = `table-${this.tableIdCounter++}`
return `<table contenteditable='false' class="m-view"`
})
.replace(/<script/g, '< script')
this.markdownHtml = this.md.render(
this.markdown.replace(/class="m-view"/g, '')
)
},
// tab 切换
changeTab(evt) {
let editor = document.getElementById('md-editor')
// pre 获取 table的html 给table 增加 对照表的path id
let pre = editor.innerHTML
let tables = editor.querySelectorAll('table')
if (tables) {
tables.forEach(item => {
let pathId = item.getAttribute('data-path-id')
if (!pathId) {
let ite = this.findMatchingTable(item)
if (ite) {
item.setAttribute(
'data-path-id',
ite.image_path.replace(/\.[^/.]+$/, '')
)
item.setAttribute(
'data-path-type',
ite.image_path.match(/\.[^/.]+$/)[0]
)
}
}
})
}
this.mdJsons.map(item => {
if (item.key === this.page - 1) {
item.value = pre
}
})
// this.mdJsons[this.page - 1] = pre
this.markdownHtml = md.render(
this.mdJsons
.find(item => item.key === this.page - 1)
.value.replace(/class="m-view"/g, '')
)
},
// 初始md 文档
async getPDFDetailMarkDown() {
if (this.mdJsons.length > 0) {
let findValue = this.mdJsons.find(item => item.key === this.page - 1)
.value
if (findValue) {
this.markdown = findValue
} else {
// 将所有md 更新到本地 后续所有操作都将在本地进行处理
this.finishenEnd = true
let values = await mdIndex({
index: this.page - 1,
documentId: this.documentId
})
this.mdJsons.push({
key: this.page - 1,
value: values
})
// this.markdown = this.mdJsons[this.page - 1].value
this.finishenEnd = false
}
} else {
// 将所有md 更新到本地 后续所有操作都将在本地进行处理
this.finishenEnd = true
let array = []
for (let i = 0; i < this.mdPges; i++) {
// if (i == 3) {
// array.push({
// key: i,
// value: '401'
// })
// } else {
let values = await mdIndex({
index: i,
documentId: this.documentId
})
array.push({
key: i,
value: values
})
// }
}
this.mdJsons = array
this.markdown = this.mdJsons.find(
item => item.key === this.page - 1
).value
// this.markdown = this.mdJsons[this.page - 1].value
this.finishenEnd = false
}
// if (
// this.mdJsons.length > 0 &&
// this.mdJsons.find(item => item.key === this.page - 1).value
// ) {
// this.markdown = this.mdJsons.find(
// item => item.key === this.page - 1
// ).value
// } else {
//
// }
this.renderMarkDown()
},
// 向 iframe 发送消息
sendMessageToIframe(type, message) {
// 获取 iframe 元素
const iframe = document.getElementById('iframe')
// 检查 iframe 是否存在并且可以发送消息
if (iframe && iframe.contentWindow) {
iframe.contentWindow.postMessage(
{
type,
data: message
},
'*'
)
}
},
// 获取识别状态
getMinerUStatus() {
// INITIAL(0, "等待中"),
// PROCESSING(1, "处理中"),
// SUCCESS(2, "处理成功"),
// FAILURE(3, "处理失败")
this.finishedMiner = true
documentPdfStatus({ documentId: this.documentId }).then(res => {
if (res) {
this.mdPges = res.content.content.pages
this.fileName = res.content.content.name
this.recordId = res.content.content.recordId
switch (res.content.content.status) {
case 0:
case '0':
case 1:
case '1':
setTimeout(() => {
this.getMinerUStatus()
}, 5000)
break
case 2:
case '2':
this.finishedMiner = false
this.getPDFDetailBbox()
break
case 3:
case '3':
this.finishedMiner = false
this.$message({
message: '识别失败',
type: 'error'
})
break
default:
this.finishedMiner = false
this.$message({
message: '处理异常,请重新识别',
type: 'error'
})
break
}
}
})
},
// 重试方法
retryMiner() {
this.endEmit = true
minerURetry({ documentId: this.documentId }).then(res => {
if (res) {
this.$message({
message: '正在重新预处理',
type: 'success'
})
this.finishedMiner = true
this.getMinerUStatus() // 重新查询状态
}
})
},
// 处理滚动事件
handleScroll() {
let viewBody = this.$refs.viewBody
let viewCodes = this.$refs.viewBody.querySelectorAll('view-code')
if (this.tab === '1') {
viewBody = this.$refs.mdEditor
viewCodes = this.$refs.mdEditor.querySelectorAll('ebiz-code')
}
const viewBodyTop = viewBody.scrollTop
const viewBodyHeight = viewBody.clientHeight
let currentPage = this.page
for (let i = 0; i < viewCodes.length; i++) {
const viewCode = viewCodes[i]
const viewCodeTop = viewCode.offsetTop
if (
viewCodeTop >= viewBodyTop &&
viewCodeTop < viewBodyTop + viewBodyHeight / 2
) {
currentPage = i + 1
break
}
}
if (currentPage !== this.page) {
this.page = currentPage
this.sendMessageToIframe('setPage', currentPage)
}
}
},
created() {},
mounted() {
// 监听 iframe 的 postMessage 事件
window.addEventListener('message', event => {
// 检查消息来源是否合法
if (event.origin === window.location.origin) {
// 根据消息状态执行不同操作
switch (event.data.status) {
case 'loaded':
// 格式化 JSON 数据并发送给 iframe
this.sendMessageToIframe('initExtractLayerData', this.bboxList)
this.sendMessageToIframe('title', '')
break
}
}
if (event.data.pageNum) {
const num = event.data.pageNum || 1
this.sendMessageToIframe('pageChange', num)
}
if (event.data.pageNumDetail) {
const pageNumDetail = event.data.pageNumDetail || 1
this.page = pageNumDetail
this.sendMessageToIframe('pageNumDetail', pageNumDetail)
}
})
this.generateTableButtons()
//
// this.$refs.viewBody.onscroll = this.handleScroll
// this.$refs.mdEditor.onscroll = this.handleScroll
},
computed: {}
}
</script>
<style scoped lang="scss">
.lineHeight25 {
line-height: 25px;
}
.fileName {
font-family: PingFangSC, PingFang SC;
font-weight: 600;
font-size: 12px;
color: #3a3f4f;
line-height: 17px;
text-align: left;
font-style: normal;
}
.ebiz-pdf {
outline: none;
border-radius: 7px;
flex: 1;
}
#md-editor {
//超过宽度自动折行
white-space: pre-wrap;
word-wrap: break-word;
overflow: auto;
outline: none;
border: none;
}
.ebiz-pdf-md {
border-radius: 7px;
box-shadow: unset;
/deep/ .el-tabs--border-card {
box-shadow: unset;
border-radius: 7px;
& .el-tabs__header {
border: none;
}
& .el-tabs__content {
overflow: auto;
}
}
//height: calc(100% - 30px);
.tabs__content {
}
}
</style>