fix: code block segmentation problem of markdown document (#6465)

This commit is contained in:
灰灰
2024-07-25 17:24:37 +08:00
committed by GitHub
parent 16b4f560cd
commit 5e4ac11df3

View File

@@ -54,8 +54,16 @@ class MarkdownExtractor(BaseExtractor):
current_header = None
current_text = ""
code_block_flag = False
for line in lines:
if line.startswith("```"):
code_block_flag = not code_block_flag
current_text += line + "\n"
continue
if code_block_flag:
current_text += line + "\n"
continue
header_match = re.match(r"^#+\s", line)
if header_match:
if current_header is not None: