mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-10 03:16:51 +08:00
Feat/chat support voice input (#532)
This commit is contained in:
@@ -4,6 +4,7 @@ import uuid
|
||||
from core.constant import llm_constant
|
||||
from models.account import Account
|
||||
from services.dataset_service import DatasetService
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
|
||||
|
||||
class AppModelConfigService:
|
||||
@@ -109,6 +110,26 @@ class AppModelConfigService:
|
||||
if not isinstance(config["suggested_questions_after_answer"]["enabled"], bool):
|
||||
raise ValueError("enabled in suggested_questions_after_answer must be of boolean type")
|
||||
|
||||
# speech_to_text
|
||||
if 'speech_to_text' not in config or not config["speech_to_text"]:
|
||||
config["speech_to_text"] = {
|
||||
"enabled": False
|
||||
}
|
||||
|
||||
if not isinstance(config["speech_to_text"], dict):
|
||||
raise ValueError("speech_to_text must be of dict type")
|
||||
|
||||
if "enabled" not in config["speech_to_text"] or not config["speech_to_text"]["enabled"]:
|
||||
config["speech_to_text"]["enabled"] = False
|
||||
|
||||
if not isinstance(config["speech_to_text"]["enabled"], bool):
|
||||
raise ValueError("enabled in speech_to_text must be of boolean type")
|
||||
|
||||
provider_name = LLMBuilder.get_default_provider(account.current_tenant_id)
|
||||
|
||||
if config["speech_to_text"]["enabled"] and provider_name != 'openai':
|
||||
raise ValueError("provider not support speech to text")
|
||||
|
||||
# more_like_this
|
||||
if 'more_like_this' not in config or not config["more_like_this"]:
|
||||
config["more_like_this"] = {
|
||||
@@ -277,6 +298,7 @@ class AppModelConfigService:
|
||||
"opening_statement": config["opening_statement"],
|
||||
"suggested_questions": config["suggested_questions"],
|
||||
"suggested_questions_after_answer": config["suggested_questions_after_answer"],
|
||||
"speech_to_text": config["speech_to_text"],
|
||||
"more_like_this": config["more_like_this"],
|
||||
"model": {
|
||||
"provider": config["model"]["provider"],
|
||||
|
||||
43
api/services/audio_service.py
Normal file
43
api/services/audio_service.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import io
|
||||
from werkzeug.datastructures import FileStorage
|
||||
from core.llm.llm_builder import LLMBuilder
|
||||
from core.llm.provider.llm_provider_service import LLMProviderService
|
||||
from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServiceError, UnsupportedAudioTypeServiceError, ProviderNotSupportSpeechToTextServiceError
|
||||
from core.llm.whisper import Whisper
|
||||
from models.provider import ProviderName
|
||||
|
||||
FILE_SIZE_LIMIT = 1 * 1024 * 1024
|
||||
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
|
||||
|
||||
class AudioService:
|
||||
@classmethod
|
||||
def transcript(cls, tenant_id: str, file: FileStorage):
|
||||
if file is None:
|
||||
raise NoAudioUploadedServiceError()
|
||||
|
||||
extension = file.mimetype
|
||||
if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
|
||||
raise UnsupportedAudioTypeServiceError()
|
||||
|
||||
file_content = file.read()
|
||||
file_size = len(file_content)
|
||||
|
||||
if file_size > FILE_SIZE_LIMIT:
|
||||
message = f"({file_size} > {FILE_SIZE_LIMIT})"
|
||||
raise AudioTooLargeServiceError(message)
|
||||
|
||||
provider_name = LLMBuilder.get_default_provider(tenant_id)
|
||||
if provider_name != ProviderName.OPENAI.value:
|
||||
raise ProviderNotSupportSpeechToTextServiceError('haha')
|
||||
|
||||
provider_service = LLMProviderService(tenant_id, provider_name)
|
||||
|
||||
buffer = io.BytesIO(file_content)
|
||||
buffer.name = 'temp.wav'
|
||||
|
||||
return Whisper(provider_service.provider).transcribe(buffer)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
__all__ = [
|
||||
'base', 'conversation', 'message', 'index', 'app_model_config', 'account', 'document', 'dataset',
|
||||
'app', 'completion'
|
||||
'app', 'completion', 'audio'
|
||||
]
|
||||
|
||||
from . import *
|
||||
|
||||
23
api/services/errors/audio.py
Normal file
23
api/services/errors/audio.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from services.errors.base import BaseServiceError
|
||||
|
||||
class NoAudioUploadedServiceError(BaseServiceError):
|
||||
error_code = 'no_audio_uploaded'
|
||||
description = "Please upload your audio."
|
||||
code = 400
|
||||
|
||||
|
||||
class AudioTooLargeServiceError(BaseServiceError):
|
||||
error_code = 'audio_too_large'
|
||||
description = "Audio size exceeded. {message}"
|
||||
code = 413
|
||||
|
||||
|
||||
class UnsupportedAudioTypeServiceError(BaseServiceError):
|
||||
error_code = 'unsupported_audio_type'
|
||||
description = "Audio type not allowed."
|
||||
code = 415
|
||||
|
||||
class ProviderNotSupportSpeechToTextServiceError(BaseServiceError):
|
||||
error_code = 'provider_not_support_speech_to_text'
|
||||
description = "Provider not support speech to text. {message}"
|
||||
code = 400
|
||||
Reference in New Issue
Block a user