Feat/chat support voice input (#532)

2025-12-09 19:06:51 +08:00 · 2023-07-07 17:50:42 +08:00
parent feebb5dd1f
commit a03a92e9db
70 changed files with 1420 additions and 26 deletions
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@@ -9,7 +9,7 @@ api = ExternalApi(bp)
 from . import setup, version, apikey, admin

 # Import app controllers
-from .app import app, site, completion, model_config, statistic, conversation, message, generator
+from .app import app, site, completion, model_config, statistic, conversation, message, generator, audio

 # Import auth controllers
 from .auth import login, oauth, data_source_oauth
@@ -21,4 +21,4 @@ from .datasets import datasets, datasets_document, datasets_segments, file, hit_
 from .workspace import workspace, members, providers, account

 # Import explore controllers
-from .explore import installed_app, recommended_app, completion, conversation, message, parameter, saved_message
+from .explore import installed_app, recommended_app, completion, conversation, message, parameter, saved_message, audio
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -22,6 +22,7 @@ model_config_fields = {
    'opening_statement': fields.String,
    'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
    'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
+    'speech_to_text': fields.Raw(attribute='speech_to_text_dict'),
    'more_like_this': fields.Raw(attribute='more_like_this_dict'),
    'model': fields.Raw(attribute='model_dict'),
    'user_input_form': fields.Raw(attribute='user_input_form_list'),
@@ -144,6 +145,7 @@ class AppListApi(Resource):
                opening_statement=model_configuration['opening_statement'],
                suggested_questions=json.dumps(model_configuration['suggested_questions']),
                suggested_questions_after_answer=json.dumps(model_configuration['suggested_questions_after_answer']),
+                speech_to_text=json.dumps(model_configuration['speech_to_text']),
                more_like_this=json.dumps(model_configuration['more_like_this']),
                model=json.dumps(model_configuration['model']),
                user_input_form=json.dumps(model_configuration['user_input_form']),
@@ -434,6 +436,7 @@ class AppCopy(Resource):
            opening_statement=app_config.opening_statement,
            suggested_questions=app_config.suggested_questions,
            suggested_questions_after_answer=app_config.suggested_questions_after_answer,
+            speech_to_text=app_config.speech_to_text,
            more_like_this=app_config.more_like_this,
            model=app_config.model,
            user_input_form=app_config.user_input_form,
--- a/api/controllers/console/app/audio.py
+++ b/api/controllers/console/app/audio.py
@@ -0,0 +1,69 @@
+# -*- coding:utf-8 -*-
+import logging
+
+from flask import request
+from flask_login import login_required
+from werkzeug.exceptions import InternalServerError, NotFound
+
+import services
+from controllers.console import api
+from controllers.console.app import _get_app
+from controllers.console.app.error import AppUnavailableError, \
+    ProviderNotInitializeError, CompletionRequestError, ProviderQuotaExceededError, \
+    ProviderModelCurrentlyNotSupportError, NoAudioUploadedError, AudioTooLargeError, \
+    UnsupportedAudioTypeError, ProviderNotSupportSpeechToTextError
+from controllers.console.setup import setup_required
+from controllers.console.wraps import account_initialization_required
+from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthorizationError, LLMAPIConnectionError, \
+    LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
+from flask_restful import Resource
+from services.audio_service import AudioService
+from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServiceError, \
+    UnsupportedAudioTypeServiceError, ProviderNotSupportSpeechToTextServiceError
+
+
+class ChatMessageAudioApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self, app_id):
+        app_id = str(app_id)
+        app_model = _get_app(app_id, 'chat')
+
+        file = request.files['file']
+
+        try:
+            response = AudioService.transcript(
+                tenant_id=app_model.tenant_id,
+                file=file,
+            )
+
+            return response
+        except services.errors.app_model_config.AppModelConfigBrokenError:
+            logging.exception("App model config broken.")
+            raise AppUnavailableError()
+        except NoAudioUploadedServiceError:
+            raise NoAudioUploadedError()
+        except AudioTooLargeServiceError as e:
+            raise AudioTooLargeError(str(e))
+        except UnsupportedAudioTypeServiceError:
+            raise UnsupportedAudioTypeError()
+        except ProviderNotSupportSpeechToTextServiceError:
+            raise ProviderNotSupportSpeechToTextError()
+        except ProviderTokenNotInitError:
+            raise ProviderNotInitializeError()
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
+                LLMRateLimitError, LLMAuthorizationError) as e:
+            raise CompletionRequestError(str(e))
+        except ValueError as e:
+            raise e
+        except Exception as e:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+        
+
+api.add_resource(ChatMessageAudioApi, '/apps/<uuid:app_id>/audio-to-text')
--- a/api/controllers/console/app/error.py
+++ b/api/controllers/console/app/error.py
@@ -49,3 +49,27 @@ class AppMoreLikeThisDisabledError(BaseHTTPException):
    error_code = 'app_more_like_this_disabled'
    description = "The 'More like this' feature is disabled. Please refresh your page."
    code = 403
+
+
+class NoAudioUploadedError(BaseHTTPException):
+    error_code = 'no_audio_uploaded'
+    description = "Please upload your audio."
+    code = 400
+
+
+class AudioTooLargeError(BaseHTTPException):
+    error_code = 'audio_too_large'
+    description = "Audio size exceeded. {message}"
+    code = 413
+
+
+class UnsupportedAudioTypeError(BaseHTTPException):
+    error_code = 'unsupported_audio_type'
+    description = "Audio type not allowed."
+    code = 415
+
+
+class ProviderNotSupportSpeechToTextError(BaseHTTPException):
+    error_code = 'provider_not_support_speech_to_text'
+    description = "Provider not support speech to text."
+    code = 400
--- a/api/controllers/console/app/model_config.py
+++ b/api/controllers/console/app/model_config.py
@@ -41,6 +41,7 @@ class ModelConfigResource(Resource):
            opening_statement=model_configuration['opening_statement'],
            suggested_questions=json.dumps(model_configuration['suggested_questions']),
            suggested_questions_after_answer=json.dumps(model_configuration['suggested_questions_after_answer']),
+            speech_to_text=json.dumps(model_configuration['speech_to_text']),
            more_like_this=json.dumps(model_configuration['more_like_this']),
            model=json.dumps(model_configuration['model']),
            user_input_form=json.dumps(model_configuration['user_input_form']),
--- a/api/controllers/console/explore/audio.py
+++ b/api/controllers/console/explore/audio.py
@@ -0,0 +1,66 @@
+# -*- coding:utf-8 -*-
+import logging
+
+from flask import request
+from werkzeug.exceptions import InternalServerError
+
+import services
+from controllers.console import api
+from controllers.console.app.error import AppUnavailableError, ProviderNotInitializeError, \
+    ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError, CompletionRequestError, \
+    NoAudioUploadedError, AudioTooLargeError, \
+    UnsupportedAudioTypeError, ProviderNotSupportSpeechToTextError
+from controllers.console.explore.wraps import InstalledAppResource
+from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthorizationError, LLMAPIConnectionError, \
+    LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
+from services.audio_service import AudioService
+from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServiceError, \
+    UnsupportedAudioTypeServiceError, ProviderNotSupportSpeechToTextServiceError
+from models.model import AppModelConfig
+
+
+class ChatAudioApi(InstalledAppResource):
+    def post(self, installed_app):
+        app_model = installed_app.app
+        app_model_config: AppModelConfig = app_model.app_model_config
+
+        if not app_model_config.speech_to_text_dict['enabled']:
+            raise AppUnavailableError()
+
+        file = request.files['file']
+
+        try:
+            response = AudioService.transcript(
+                tenant_id=app_model.tenant_id,
+                file=file,
+            )
+
+            return response
+        except services.errors.app_model_config.AppModelConfigBrokenError:
+            logging.exception("App model config broken.")
+            raise AppUnavailableError()
+        except NoAudioUploadedServiceError:
+            raise NoAudioUploadedError()
+        except AudioTooLargeServiceError as e:
+            raise AudioTooLargeError(str(e))
+        except UnsupportedAudioTypeServiceError:
+            raise UnsupportedAudioTypeError()
+        except ProviderNotSupportSpeechToTextServiceError:
+            raise ProviderNotSupportSpeechToTextError()
+        except ProviderTokenNotInitError:
+            raise ProviderNotInitializeError()
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
+                LLMRateLimitError, LLMAuthorizationError) as e:
+            raise CompletionRequestError(str(e))
+        except ValueError as e:
+            raise e
+        except Exception as e:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+        
+
+api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')
--- a/api/controllers/console/explore/parameter.py
+++ b/api/controllers/console/explore/parameter.py
@@ -21,6 +21,7 @@ class AppParameterApi(InstalledAppResource):
        'opening_statement': fields.String,
        'suggested_questions': fields.Raw,
        'suggested_questions_after_answer': fields.Raw,
+        'speech_to_text': fields.Raw,
        'more_like_this': fields.Raw,
        'user_input_form': fields.Raw,
    }
@@ -35,6 +36,7 @@ class AppParameterApi(InstalledAppResource):
            'opening_statement': app_model_config.opening_statement,
            'suggested_questions': app_model_config.suggested_questions_list,
            'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
+            'speech_to_text': app_model_config.speech_to_text_dict,
            'more_like_this': app_model_config.more_like_this_dict,
            'user_input_form': app_model_config.user_input_form_list
        }