From e785cbb81d8ce771c419888516394f6b5d1bf2b6 Mon Sep 17 00:00:00 2001
From: fishisnow <fishisnow2021@gmail.com>
Date: Wed, 19 Jun 2024 12:36:40 +0800
Subject: [PATCH] Fix: multi image preview sign (#5376)

Co-authored-by: huangyusong <huangyusong@yingzi.com>
---
 api/models/dataset.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/api/models/dataset.py b/api/models/dataset.py
index 9f8b15be1..09e18ab53 100644
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -425,9 +425,9 @@ class DocumentSegment(db.Model):
     def get_sign_content(self):
         pattern = r"/files/([a-f0-9\-]+)/image-preview"
         text = self.content
-        match = re.search(pattern, text)
-
-        if match:
+        matches = re.finditer(pattern, text)
+        signed_urls = []
+        for match in matches:
             upload_file_id = match.group(1)
             nonce = os.urandom(16).hex()
             timestamp = str(int(time.time()))
@@ -437,8 +437,15 @@ class DocumentSegment(db.Model):
             encoded_sign = base64.urlsafe_b64encode(sign).decode()
 
             params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
-            replacement = r"\g<0>?{params}".format(params=params)
-            text = re.sub(pattern, replacement, text)
+            signed_url = f"{match.group(0)}?{params}"
+            signed_urls.append((match.start(), match.end(), signed_url))
+
+        # Reconstruct the text with signed URLs
+        offset = 0
+        for start, end, signed_url in signed_urls:
+            text = text[:start + offset] + signed_url + text[end + offset:]
+            offset += len(signed_url) - (end - start)
+
         return text