convert audio wav to mp3 (#552)

2025-12-09 19:06:51 +08:00 · 2023-07-12 17:18:56 +08:00
parent b91e226063
commit 397a92f2ee
6 changed files with 64 additions and 27 deletions
--- a/api/services/audio_service.py
+++ b/api/services/audio_service.py
@@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
 from core.llm.whisper import Whisper
 from models.provider import ProviderName

-FILE_SIZE_LIMIT = 1 * 1024 * 1024
+FILE_SIZE = 15
+FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
 ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']

 class AudioService:
@@ -23,17 +24,17 @@ class AudioService:
        file_size = len(file_content)

        if file_size > FILE_SIZE_LIMIT:
-            message = f"({file_size} > {FILE_SIZE_LIMIT})"
+            message = f"Audio size larger than {FILE_SIZE} mb"
            raise AudioTooLargeServiceError(message)
        
        provider_name = LLMBuilder.get_default_provider(tenant_id)
        if provider_name != ProviderName.OPENAI.value:
-            raise ProviderNotSupportSpeechToTextServiceError('haha')
+            raise ProviderNotSupportSpeechToTextServiceError()

        provider_service = LLMProviderService(tenant_id, provider_name)

        buffer = io.BytesIO(file_content)
-        buffer.name = 'temp.wav'
+        buffer.name = 'temp.mp3'

        return Whisper(provider_service.provider).transcribe(buffer)

--- a/api/services/errors/audio.py
+++ b/api/services/errors/audio.py
@@ -1,23 +1,13 @@
-from services.errors.base import BaseServiceError
-
-class NoAudioUploadedServiceError(BaseServiceError):
-    error_code = 'no_audio_uploaded'
-    description = "Please upload your audio."
-    code = 400
+class NoAudioUploadedServiceError(Exception):
+    pass


-class AudioTooLargeServiceError(BaseServiceError):
-    error_code = 'audio_too_large'
-    description = "Audio size exceeded. {message}"
-    code = 413
+class AudioTooLargeServiceError(Exception):
+    pass


-class UnsupportedAudioTypeServiceError(BaseServiceError):
-    error_code = 'unsupported_audio_type'
-    description = "Audio type not allowed."
-    code = 415
+class UnsupportedAudioTypeServiceError(Exception):
+    pass

-class ProviderNotSupportSpeechToTextServiceError(BaseServiceError):
-    error_code = 'provider_not_support_speech_to_text'
-    description = "Provider not support speech to text. {message}"
-    code = 400
+class ProviderNotSupportSpeechToTextServiceError(Exception):
+    pass