feat(api): 新增 PDF 文档处理相关 API

- 新增 preprocessEmbedding、contentPage、documentPdfStatus 和 contentUpdate 等 PDF 处理相关 API
-重构 mdIndex API,添加 zixi 参数以支持不同服务
- 优化 button 组件样式,增加禁用状态样式
- 更新 RenderMinerU 组件,支持 PDF 文档的预览和编辑功能
- 新增 index2.vue 文件,实现 PDF 文档的详细编辑界面
- 更新 knowledge/detail/create.vue,修改默认文档 ID
This commit is contained in:
陈昱达
2025-04-27 18:20:12 +08:00
parent cdf2d6bf3c
commit 9285c3b2f8
5 changed files with 967 additions and 113 deletions

View File

@@ -59,7 +59,7 @@ export function datasetsExTaskPages(data) {
//minerU 获取bbox
export function minerUBbox(params) {
return request({
url: getUrl('/document/mineru/bbox_json', 'zixi'),
url: getUrl('/document/mineru/bbox_json'),
method: 'get',
params: params
})
@@ -67,7 +67,7 @@ export function minerUBbox(params) {
//minerU 重试
export function minerURetry(params) {
return request({
url: getUrl('/document/mineru/retry', 'zixi'),
url: getUrl('/document/mineru/retry'),
method: 'get',
params: params,
noLoading: true
@@ -76,7 +76,7 @@ export function minerURetry(params) {
//minerU 获取文档处理状态
export function minerUQuery(params) {
return request({
url: getUrl('/dataset/document/query', 'zixi'),
url: getUrl('/dataset/document/query'),
method: 'get',
params: params,
noLoading: true
@@ -84,17 +84,17 @@ export function minerUQuery(params) {
}
//minerU 获取markdown
export function minerUMarkDown(params) {
return getUrl(`/document/mineru/md?documentId=${params.documentId}`, 'zixi')
return getUrl(`/document/mineru/md?documentId=${params.documentId}`)
}
// minerU 获取pdf
export function getPdfUrl(params) {
return getUrl(`/datasetDocumentEx/preview?id=${params.documentId}`, 'zixi')
return getUrl(`/datasetDocumentEx/preview?id=${params.documentId}`)
}
export function minerUMarkDownUpdate(data) {
return request({
url: getUrl(`/document/mineru/md/update`, 'zixi'),
url: getUrl(`/document/mineru/md/update`),
method: 'post',
data
})
@@ -209,7 +209,7 @@ export function saveContentToDocument(data) {
export function uploadFileByCustom(data) {
return request({
url: getUrl(`/datasetDocumentEx/upload/custom`, 'zixi'),
url: getUrl(`/datasetDocumentEx/upload/custom`),
method: 'post',
data,
headers: {

View File

@@ -0,0 +1,34 @@
import request from '@/assets/js/utils/request'
import getUrl from '@/assets/js/utils/get-url'
// 用户新增
export function preprocessEmbedding(data) {
return request({
url: getUrl('/datasetDocumentEx/preprocess/embedding'),
method: 'post',
data
})
}
export function contentPage(data) {
return request({
url: getUrl('/document/mineru/content/page'),
method: 'get',
params: data
})
}
export function documentPdfStatus(data) {
return request({
url: getUrl('/document/mineru/status'),
method: 'get',
params: data
})
}
export function contentUpdate(data) {
return request({
url: getUrl('/document/mineru/md/page/update'),
method: 'post',
data
})
}

View File

@@ -97,6 +97,10 @@
border: none;
color: #fff;
}
&.is-disabled {
background: var(--color-primary-disabled);
border-color: transparent;
}
}
&.el-button--medium {
padding: 8px 20px;

View File

@@ -34,6 +34,14 @@
:disabled="finishedMiner"
>保存并处理</el-button
>
<el-button
type="primary"
size="medium"
class="line-button"
@click="uploadKnowledge"
:disabled="finishedMiner"
>直接上传至知识库</el-button
>
</div>
</div>
@@ -53,7 +61,7 @@
class="ebiz-pdf el-card "
></iframe>
<div
style="flex:1;"
style="flex:1;width:50%"
class="ml10 ebiz-pdf-md"
v-loading="finishedMiner"
element-loading-text="正在识别中..."
@@ -61,22 +69,21 @@
<div class="el-card ebiz-pdf" style="height: 100%;overflow: hidden">
<div ref="scrollView" v-show="tab === '0'">
<div
v-html="markdownHtml"
class="view-body"
id="viewBody"
ref="viewBody"
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden "
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden"
v-html="markdownHtml"
></div>
</div>
<div :disabled="!isEdit" v-if="isEdit" v-show="tab === '1'">
<div
class="lineH25 view-body"
class="lineHeight25 view-body"
contenteditable
id="md-editor"
ref="mdEditor"
@blur="emitMarkDown"
v-html="markdown"
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden "
v-html="markdown"
></div>
</div>
</div>
@@ -99,6 +106,12 @@ import {
import { DEFAULT_COLOR_SECTION, PDF_COLOR_PICKER } from './pdf-color'
import MarkdownIt from 'markdown-it'
import markdownItKatex from 'markdown-it-katex'
import {
contentPage,
contentUpdate,
documentPdfStatus,
preprocessEmbedding
} from '@/api/generatedApi/pdfApi'
const md = new MarkdownIt({
html: true
}).use(markdownItKatex)
@@ -107,7 +120,10 @@ export default {
name: 'index',
data() {
return {
mdJsons: {},
fileName: '',
recordId: '', //pdf 记录的id
endEmit: false,
tab: '0',
mdPges: 0,
@@ -260,7 +276,9 @@ export default {
page: {
handler(newVal, oldVal) {
if (newVal) {
this.changePage(newVal, this.tab)
// this.changePage(newVal, this.tab)
this.getPDFDetailMarkDown()
}
}
}
@@ -268,6 +286,21 @@ export default {
components: {},
filters: {},
methods: {
uploadKnowledge() {
preprocessEmbedding({ documentId: this.documentId }).then(res => {
if (res) {
this.$message.success('上传成功')
this.$router.push({
path: '/knowledge/detail/segments',
query: {
documentId: this.documentId,
datasetId: this.$route.query.datasetId
}
})
}
})
},
//changePage
// 分页发生改变时
changePage(page) {
@@ -375,6 +408,7 @@ export default {
buttonContainer.appendChild(button)
}
},
// 查找匹配的表格
findMatchingTable(tableElement) {
let chooseItem = null
@@ -447,41 +481,30 @@ export default {
})
})
},
// 填充对照表
fillSelectionTable(selection) {
selection.map(item => {
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
})
},
// 导出
emitMarkDown() {
let pre = document.getElementById('md-editor').innerText
this.$emit('getMarkDownIt', { innerText: pre })
},
// 保存markdown
saveMarkDown() {
let pre = document.getElementById('md-editor').innerHTML
// pre + ebiz-code标签
minerUMarkDownUpdate({
async saveMarkDown() {
let promises = []
for (let item in this.mdJsons) {
let promise = contentUpdate({
documentId: this.documentId,
newMd: pre
}).then(res => {
this.$emit('saveMarkDown', true)
pageIndex: item,
newMd: this.mdJsons[item]
})
promises.push(promise)
}
try {
await Promise.all(promises)
this.$emit('saveMarkDown', true)
this.$message.success('保存成功')
} catch (error) {
this.$message.error('保存失败')
console.error(error)
}
},
// 给文件增加色块
formatJson(data) {
@@ -514,37 +537,43 @@ export default {
}
})
},
// 填充对照表
fillSelectionTable(selection) {
selection.map(item => {
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
})
},
// bbox 解析 传递 颜色
getPDFDetailBbox() {
minerUBbox({ documentId: this.documentId }).then(res => {
this.bboxList = this.formatJson(
JSON.parse(JSON.stringify(res.content.content))
JSON.parse(JSON.stringify(res.content.content.pdf_info))
)
this.fillSelectionTable(JSON.parse(JSON.stringify(res.content.content)))
this.fillSelectionTable(
JSON.parse(JSON.stringify(res.content.content.pdf_info))
)
// this.$refs.iframe 重新刷新iframe
this.$refs.iframe.contentWindow.location.reload()
this.getPDFDetailMarkDown()
})
},
// 获取md分页
getMdPage(back) {
let responseText = ''
getMd_info({ documentId: this.documentId }).then(async res => {
if (res) {
this.mdPges = res.content.content.indexList
for (let i = 0; i < this.mdPges.length; i++) {
let text = await mdIndex({ index: i, documentId: this.documentId })
responseText += `<ebiz-code id='ebiz-code-${i}'></ebiz-code>
${text}`
}
back(responseText)
}
})
},
// 渲染markdown
renderMarkDown() {
this.markdown = this.markdown
@@ -555,9 +584,7 @@ ${text}`
.replace(/<script/g, '< script')
this.markdownHtml = this.md.render(
this.markdown
.replace(/class="m-view"/g, '')
.replace(/ebiz-code/g, 'view-code')
this.markdown.replace(/class="m-view"/g, '')
)
},
// tab 切换
@@ -584,31 +611,24 @@ ${text}`
}
})
}
// 给 copyMdHtml 里面的table 增加 class m-view
// copyMdHtml = copyMdHtml.re
this.mdJsons[this.page - 1] = pre
this.markdownHtml = md.render(
pre.replace(/class="m-view"/g, '').replace(/ebiz-code/g, 'view-code')
this.mdJsons[this.page - 1].replace(/class="m-view"/g, '')
)
setTimeout(() => {
this.changePage(this.page, evt)
}, 100)
},
// 初始md 文档
async getPDFDetailMarkDown() {
// responseText 判断是否包含ebiz-code
const response = await fetch(
minerUMarkDown({ documentId: this.documentId })
)
this.markdown = await response.text()
// this.markdown 包含 ebiz-code
if (this.markdown.indexOf('<ebiz-code ') < 0) {
this.getMdPage(responseText => {
this.markdown = responseText
this.renderMarkDown()
})
if (this.mdJsons[this.page - 1]) {
this.markdown = this.mdJsons[this.page - 1]
} else {
this.renderMarkDown()
this.mdJsons[this.page - 1] = await mdIndex({
index: this.page - 1,
documentId: this.documentId
})
this.markdown = this.mdJsons[this.page - 1]
}
this.renderMarkDown()
},
// 向 iframe 发送消息
sendMessageToIframe(type, message) {
@@ -627,49 +647,61 @@ ${text}`
},
// 获取识别状态
getMinerUStatus() {
// INITIAL(0, "等待中"),
// PROCESSING(1, "处理中"),
// SUCCESS(2, "处理成功"),
// FAILURE(3, "处理失败")
this.finishedMiner = true
minerUQuery({ id: this.documentId }).then(res => {
let mineruStatus = res.content.content.mineruStatus
this.fileName = res.content.content.fileName
switch (mineruStatus) {
documentPdfStatus({ documentId: this.documentId }).then(res => {
if (res) {
this.mdPges = res.content.content.pages
this.fileName = res.content.content.name
this.recordId = res.content.content.recordId
switch (res.content.content.status) {
case 0:
case '0':
case 1:
case '1':
setTimeout(() => {
this.getMinerUStatus()
}, 5000)
break
case 1:
case '1':
case 2:
case '2':
this.finishedMiner = false
this.getPDFDetailBbox()
break
case 2:
case '2':
case -1:
case '-1':
case 3:
case '3':
this.finishedMiner = false
this.$message({
message: '识别失败',
type: 'error'
})
break
default:
this.finishedMiner = false
this.$message({
message: '处理异常,请重新识别',
type: 'error'
})
break
}
}
})
},
// 重试方法
retryMiner() {
this.endEmit = true
minerURetry({ documentId: this.documentId })
.then(res => {
minerURetry({ documentId: this.documentId }).then(res => {
if (res) {
this.$message({
message: '正在重新预处理',
type: 'success'
})
this.finishedMiner = true
this.getMinerUStatus() // 重新查询状态
})
.catch(() => {
this.$message.error('重试操作失败')
}
})
},
@@ -738,6 +770,9 @@ ${text}`
}
</script>
<style scoped lang="scss">
.lineHeight25 {
line-height: 25px;
}
.fileName {
font-family: PingFangSC, PingFang SC;
font-weight: 600;

View File

@@ -0,0 +1,781 @@
<template>
<div style="height: 100%;">
<div class="flex align-items-c justify-content-b mb10">
<div class="fileName flex align-items-c">
<svg-icon
icon-class="pdf"
style="width: 20px;height: 20px"
class-name="mr10"
/>
{{ fileName }}
</div>
<div style="margin-left: 50px">
<el-radio-group v-model="tab" @change="changeTab" class="group-medium">
<el-radio-button label="0" name="0">预览</el-radio-button>
<el-radio-button label="1" name="1">编辑</el-radio-button>
</el-radio-group>
</div>
<div class=" text-right" v-if="isEdit">
<!-- 重试按钮 -->
<el-button
size="medium"
class="default"
@click="retryMiner"
:disabled="finishedMiner"
style="margin-right: 10px;"
plain
>重试</el-button
>
<!-- 保存并处理按钮 -->
<el-button
type="primary"
size="medium"
@click="saveMarkDown"
:disabled="finishedMiner"
>保存并处理</el-button
>
</div>
</div>
<div
:class="!isEdit ? 'mt10 flex' : 'flex'"
style="height:calc(100%);flex:1"
>
<iframe
v-if="isShowPdf"
id="iframe"
ref="iframe"
:src="
`${iframeSrc}/pdfjs-dist/web/viewer.html?file=${encodeURIComponent(
prdUrl
)}`
"
class="ebiz-pdf el-card "
></iframe>
<div
style="flex:1;"
class="ml10 ebiz-pdf-md"
v-loading="finishedMiner"
element-loading-text="正在识别中..."
>
<div class="el-card ebiz-pdf" style="height: 100%;overflow: hidden">
<div ref="scrollView" v-show="tab === '0'">
<div
v-html="markdownHtml"
class="view-body"
id="viewBody"
ref="viewBody"
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden "
></div>
</div>
<div :disabled="!isEdit" v-if="isEdit" v-show="tab === '1'">
<div
class="lineH25 view-body"
contenteditable
id="md-editor"
ref="mdEditor"
@blur="emitMarkDown"
v-html="markdown"
style="height:calc(100vh - 180px);overflow-y: scroll;overflow-x:hidden "
></div>
</div>
</div>
</div>
</div>
</div>
</template>
<script>
import {
getPdfUrl,
minerUBbox,
minerUMarkDown,
minerUMarkDownUpdate,
minerURetry,
minerUQuery,
imageRetry,
getMd_info,
mdIndex
} from '@/api/generatedApi/index'
import { DEFAULT_COLOR_SECTION, PDF_COLOR_PICKER } from './pdf-color'
import MarkdownIt from 'markdown-it'
import markdownItKatex from 'markdown-it-katex'
const md = new MarkdownIt({
html: true
}).use(markdownItKatex)
export default {
name: 'index',
data() {
return {
fileName: '',
endEmit: false,
tab: '0',
mdPges: 0,
tableIdCounter: 0,
finishedMiner: false,
prdUrl: ``,
iframeSrc: window.location.origin,
bboxList: [],
// 对照表
selectionTable: [], // 表格对照表 记录表格与image_path 关系
selectionImagePath: '', // 当前表格对应的图片路径
markdown: '',
md,
markdownHtml: '',
page: 1,
copyTable: null,
tableActionButtons: [
{
label: '重新识别',
icon: 'el-icon-view',
name: 'retry',
click: tableElement => {
// 获取的 识别图片
this.selectionImagePath = ''
let chooseItem = this.findMatchingTable(tableElement)
if (!this.selectionImagePath) {
this.$message.error('未能识别当前表格图片')
return false
} else {
// this.finishedMiner = true
this.updateTableAttributes(tableElement, chooseItem)
let loading = this.$loading({
target: tableElement,
lockScroll: false,
// spinner: 'element-loading-spinner',
// background: 'rgba(0, 0, 0, 0.8)',
text: 'AI模型分析中....'
})
this.retryMinerImage(chooseItem, loading, tableElement)
}
}
},
{
label: '编辑',
name: 'edit',
icon: 'el-icon-edit-outline',
click: tableElement => {
this.copyTable = tableElement.innerHTML
tableElement.classList.remove('m-view')
tableElement.setAttribute('contenteditable', 'true')
let buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
buttonContainer.innerHTML = null
this.generateButton(
tableElement,
buttonContainer,
this.tableActionConfirm
)
}
},
{
label: '撤销',
icon: 'el-icon-refresh-left',
name: 'refresh',
click: tableElement => {
this.$messageBox(
() => {
let loading = this.$loading({
target: tableElement,
lockScroll: false,
text: '正在撤销中....'
})
let chooseItem = this.findMatchingTable(tableElement)
let tableMatch = chooseItem.html.match(
/<table>([\s\S]*)<\/table>/
)
if (tableMatch) {
let div = document.createElement('table')
div.innerHTML = tableMatch[1]
setTimeout(() => {
loading.close()
tableElement.innerHTML = tableMatch[1]
}, 1000)
}
},
'是否撤销当前识别内容?',
'warning',
'撤销提醒'
)
}
}
],
tableActionConfirm: [
{
label: '返回',
icon: 'el-icon-refresh-left',
name: 'back',
click: tableElement => {
tableElement.classList.add('m-view')
tableElement.setAttribute('contenteditable', 'false')
let buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
buttonContainer.innerHTML = null
this.generateButton(
tableElement,
buttonContainer,
this.tableActionButtons
)
}
}
// {
// label: '取消',
// icon: 'el-icon-edit',
// click: tableElement => {}
// }
]
}
},
props: {
visible: {
type: Boolean,
default: false
},
documentId: {
type: String,
default: '1361351897324294144'
},
isEdit: {
type: Boolean,
default: true
},
isShowPdf: {
type: Boolean,
default: true
}
},
watch: {
documentId: {
handler(newVal, oldVal) {
if (newVal) {
this.getMinerUStatus()
this.prdUrl = getPdfUrl({ documentId: newVal })
}
},
immediate: true
},
page: {
handler(newVal, oldVal) {
if (newVal) {
this.changePage(newVal, this.tab)
}
}
}
},
components: {},
filters: {},
methods: {
//changePage
// 分页发生改变时
changePage(page) {
let documentId = document.getElementById(`view-code-${page - 1}`)
let viewBody = document.getElementById('viewBody')
if (this.tab === '1') {
documentId = document.getElementById(`ebiz-code-${page - 1}`)
viewBody = document.getElementById('md-editor')
}
if (documentId) {
viewBody.scrollTo({
top: documentId.offsetTop - 130,
behavior: 'smooth'
})
}
},
//重新识别表格
retryMinerImage(chooseItem, loading, tableElement) {
imageRetry({
documentId: this.documentId,
imgName: chooseItem.image_path
}).then(res => {
if (res) {
loading.close()
let path = res.content.content
let tableMatch = path.match(/<table>([\s\S]*)<\/table>/)
if (tableMatch) {
tableElement.innerHTML = tableMatch[1]
}
}
})
},
// 生成视觉模型按钮
generateTableButtons() {
this.$nextTick(() => {
const mdHtml = document.getElementById('md-editor')
// 监听鼠标悬停事件,为表格元素添加浮层按钮
mdHtml.addEventListener('mouseover', e => {
const tableElement = e.target.closest('table')
if (tableElement) {
// 检查是否已经存在按钮容器,避免重复创建
let buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
if (!buttonContainer) {
buttonContainer = document.createElement('div')
buttonContainer.style.position = 'absolute'
buttonContainer.style.pointerEvents = 'none'
buttonContainer.style.zIndex = '9999'
buttonContainer.className = 'md-editor-setting'
buttonContainer.setAttribute('contenteditable', 'false')
// 调用生成按钮的方法
this.generateButton(tableElement, buttonContainer)
// 设置按钮位置在表格正中间浮动
const rect = tableElement.getBoundingClientRect()
buttonContainer.style.left = `10px` // 调整位置
buttonContainer.style.top = `-20px` // 调整位置
tableElement.appendChild(buttonContainer)
}
}
})
// 监听鼠标离开事件,移除浮层按钮
mdHtml.addEventListener('mouseout', e => {
const tableElement = e.target.closest('table')
if (!tableElement || !tableElement.contains(e.relatedTarget)) {
const buttonContainer = tableElement.querySelector(
'.md-editor-setting'
)
if (buttonContainer) {
buttonContainer.remove()
}
}
})
})
},
// 封装 按钮生成事件
generateButton(tableElement, buttonContainer, actionButtons) {
// 获取表格的可编辑状态
let contenteditable = tableElement.getAttribute('contenteditable')
// 根据可编辑状态选择按钮配置
let buttons = !actionButtons
? contenteditable === 'false'
? this.tableActionButtons
: this.tableActionConfirm
: actionButtons
let pathId = tableElement.getAttribute('data-path-id')
// 循环按钮配置,动态生成按钮并绑定点击事件
for (let i = 0; i < buttons.length; i++) {
const icon = document.createElement('i')
const button = document.createElement('button')
icon.className = `${buttons[i].icon} public-icon`
button.appendChild(icon)
// 悬浮提示
button.setAttribute('title', buttons[i].label)
button.className =
'el-button el-button--primary el-button--mini editor-button is-plain'
button.style.pointerEvents = 'auto'
button.addEventListener('click', () => {
buttons[i].click(tableElement)
})
buttonContainer.appendChild(button)
}
},
// 查找匹配的表格
findMatchingTable(tableElement) {
let chooseItem = null
let pathId = tableElement.getAttribute('data-path-id')
if (pathId) {
let pathType = tableElement.getAttribute('data-path-type')
this.selectionImagePath = pathId + pathType
chooseItem = this.selectionTable.find(item => {
if (item.image_path === this.selectionImagePath) {
return item
}
return false
})
} else {
// 如果没有路径 ID则通过表格内容匹配对照表
let tableText = this.getTableText(tableElement)
chooseItem = this.selectionTable.find(item => {
if (item.html) {
let itemText = this.getTableTextFromHtml(item.html)
return tableText === itemText
}
return false
})
if (chooseItem) {
this.selectionImagePath = chooseItem.image_path
}
}
return chooseItem
},
// 获取表格文本
getTableText(tableElement) {
let stringTable = tableElement.innerHTML
let tbodyMatch = stringTable.match(/<tbody>([\s\S]*)<\/tbody>/)
if (tbodyMatch) {
let newTable = document.createElement('table')
newTable.innerHTML = tbodyMatch[1]
return newTable.innerText
}
return ''
},
// 从 HTML 中获取表格文本
getTableTextFromHtml(html) {
let tableMatch = html.match(/<table>([\s\S]*)<\/table>/)
if (tableMatch) {
let domTable = document.createElement('table')
domTable.innerHTML = tableMatch[1]
return domTable.innerText
}
return ''
},
// 更新表格属性
updateTableAttributes(tableElement, chooseItem) {
tableElement.setAttribute(
'data-path-id',
this.selectionImagePath.replace(/\.[^/.]+$/, '')
)
let fileType = this.selectionImagePath.match(/\.[^/.]+$/)[0]
tableElement.setAttribute('data-path-type', fileType)
},
// 获取bbox 的 表格图片生成对照表
extractTableData(table) {
;(table.blocks ? table.blocks : []).map(lines => {
lines.lines.map(spans => {
spans.spans.map(span => {
this.selectionTable.push({
html: span.html,
image_path: span.image_path
})
})
})
})
},
// 填充对照表
fillSelectionTable(selection) {
selection.map(item => {
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
if (block.type === 'table') {
this.extractTableData(block)
}
})
}
})
},
// 导出
emitMarkDown() {
let pre = document.getElementById('md-editor').innerText
this.$emit('getMarkDownIt', { innerText: pre })
},
// 保存markdown
saveMarkDown() {
let pre = document.getElementById('md-editor').innerHTML
// pre + ebiz-code标签
minerUMarkDownUpdate({
documentId: this.documentId,
newMd: pre
}).then(res => {
this.$emit('saveMarkDown', true)
})
},
// 给文件增加色块
formatJson(data) {
return data.map(item => {
let bboxes = []
// 处理预处理块
if (item.preproc_blocks && item.preproc_blocks.length > 0) {
item.preproc_blocks.forEach(block => {
bboxes.push({
type: block.type,
bbox: block.bbox,
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
})
})
}
// 处理丢弃块
if (item.discarded_blocks && item.discarded_blocks.length > 0) {
item.discarded_blocks.forEach(block => {
bboxes.push({
type: block.type,
bbox: block.bbox,
color: PDF_COLOR_PICKER[block.type] || DEFAULT_COLOR_SECTION
})
})
}
return {
...item,
bboxes
}
})
},
// bbox 解析 传递 颜色
getPDFDetailBbox() {
minerUBbox({ documentId: this.documentId }).then(res => {
this.bboxList = this.formatJson(
JSON.parse(JSON.stringify(res.content.content))
)
this.fillSelectionTable(JSON.parse(JSON.stringify(res.content.content)))
// this.$refs.iframe 重新刷新iframe
this.$refs.iframe.contentWindow.location.reload()
this.getPDFDetailMarkDown()
})
},
// 获取md分页
getMdPage(back) {
let responseText = ''
getMd_info({ documentId: this.documentId }).then(async res => {
if (res) {
this.mdPges = res.content.content.indexList
for (let i = 0; i < this.mdPges.length; i++) {
let text = await mdIndex({ index: i, documentId: this.documentId })
responseText += `<ebiz-code id='ebiz-code-${i}'></ebiz-code>
${text}`
}
back(responseText)
}
})
},
// 渲染markdown
renderMarkDown() {
this.markdown = this.markdown
.replace(/<table/g, () => {
const uniqueId = `table-${this.tableIdCounter++}`
return `<table contenteditable='false' class="m-view"`
})
.replace(/<script/g, '< script')
this.markdownHtml = this.md.render(
this.markdown
.replace(/class="m-view"/g, '')
.replace(/ebiz-code/g, 'view-code')
)
},
// tab 切换
changeTab(evt) {
let editor = document.getElementById('md-editor')
// pre 获取 table的html 给table 增加 对照表的path id
let pre = editor.innerHTML
let tables = editor.querySelectorAll('table')
if (tables) {
tables.forEach(item => {
let pathId = item.getAttribute('data-path-id')
if (!pathId) {
let ite = this.findMatchingTable(item)
if (ite) {
item.setAttribute(
'data-path-id',
ite.image_path.replace(/\.[^/.]+$/, '')
)
item.setAttribute(
'data-path-type',
ite.image_path.match(/\.[^/.]+$/)[0]
)
}
}
})
}
// 给 copyMdHtml 里面的table 增加 class m-view
// copyMdHtml = copyMdHtml.re
this.markdownHtml = md.render(
pre.replace(/class="m-view"/g, '').replace(/ebiz-code/g, 'view-code')
)
setTimeout(() => {
this.changePage(this.page, evt)
}, 100)
},
// 初始md 文档
async getPDFDetailMarkDown() {
// responseText 判断是否包含ebiz-code
const response = await fetch(
minerUMarkDown({ documentId: this.documentId })
)
this.markdown = await response.text()
// this.markdown 包含 ebiz-code
if (this.markdown.indexOf('<ebiz-code ') < 0) {
this.getMdPage(responseText => {
this.markdown = responseText
this.renderMarkDown()
})
} else {
this.renderMarkDown()
}
},
// 向 iframe 发送消息
sendMessageToIframe(type, message) {
// 获取 iframe 元素
const iframe = document.getElementById('iframe')
// 检查 iframe 是否存在并且可以发送消息
if (iframe && iframe.contentWindow) {
iframe.contentWindow.postMessage(
{
type,
data: message
},
'*'
)
}
},
// 获取识别状态
getMinerUStatus() {
this.finishedMiner = true
minerUQuery({ id: this.documentId }).then(res => {
let mineruStatus = res.content.content.mineruStatus
this.fileName = res.content.content.fileName
switch (mineruStatus) {
case 0:
case '0':
setTimeout(() => {
this.getMinerUStatus()
}, 5000)
break
case 1:
case '1':
this.finishedMiner = false
this.getPDFDetailBbox()
break
case 2:
case '2':
case -1:
case '-1':
this.finishedMiner = false
this.$message({
message: '识别失败',
type: 'error'
})
break
}
})
},
// 重试方法
retryMiner() {
this.endEmit = true
minerURetry({ documentId: this.documentId })
.then(res => {
this.$message({
message: '正在重新预处理',
type: 'success'
})
this.finishedMiner = true
this.getMinerUStatus() // 重新查询状态
})
.catch(() => {
this.$message.error('重试操作失败')
})
},
// 处理滚动事件
handleScroll() {
let viewBody = this.$refs.viewBody
let viewCodes = this.$refs.viewBody.querySelectorAll('view-code')
if (this.tab === '1') {
viewBody = this.$refs.mdEditor
viewCodes = this.$refs.mdEditor.querySelectorAll('ebiz-code')
}
const viewBodyTop = viewBody.scrollTop
const viewBodyHeight = viewBody.clientHeight
let currentPage = this.page
for (let i = 0; i < viewCodes.length; i++) {
const viewCode = viewCodes[i]
const viewCodeTop = viewCode.offsetTop
if (
viewCodeTop >= viewBodyTop &&
viewCodeTop < viewBodyTop + viewBodyHeight / 2
) {
currentPage = i + 1
break
}
}
if (currentPage !== this.page) {
this.page = currentPage
this.sendMessageToIframe('setPage', currentPage)
}
}
},
created() {},
mounted() {
// 监听 iframe 的 postMessage 事件
window.addEventListener('message', event => {
// 检查消息来源是否合法
if (event.origin === window.location.origin) {
// 根据消息状态执行不同操作
switch (event.data.status) {
case 'loaded':
// 格式化 JSON 数据并发送给 iframe
this.sendMessageToIframe('initExtractLayerData', this.bboxList)
this.sendMessageToIframe('title', '')
break
}
}
if (event.data.pageNum) {
const num = event.data.pageNum || 1
this.sendMessageToIframe('pageChange', num)
}
if (event.data.pageNumDetail) {
const pageNumDetail = event.data.pageNumDetail || 1
this.page = pageNumDetail
this.sendMessageToIframe('pageNumDetail', pageNumDetail)
}
})
this.generateTableButtons()
//
// this.$refs.viewBody.onscroll = this.handleScroll
// this.$refs.mdEditor.onscroll = this.handleScroll
},
computed: {}
}
</script>
<style scoped lang="scss">
.fileName {
font-family: PingFangSC, PingFang SC;
font-weight: 600;
font-size: 12px;
color: #3a3f4f;
line-height: 17px;
text-align: left;
font-style: normal;
}
.ebiz-pdf {
outline: none;
border-radius: 7px;
flex: 1;
}
#md-editor {
//超过宽度自动折行
white-space: pre-wrap;
word-wrap: break-word;
overflow: auto;
outline: none;
border: none;
}
.ebiz-pdf-md {
border-radius: 7px;
box-shadow: unset;
/deep/ .el-tabs--border-card {
box-shadow: unset;
border-radius: 7px;
& .el-tabs__header {
border: none;
}
& .el-tabs__content {
overflow: auto;
}
}
//height: calc(100% - 30px);
.tabs__content {
}
}
</style>