mirror of
http://112.124.100.131/huang.ze/ebiz-dify-ai.git
synced 2025-12-21 00:36:53 +08:00
ISSUE=11042: add tts model in siliconflow (#11043)
This commit is contained in:
@@ -24,4 +24,3 @@
|
|||||||
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||||
- google/gemma-2-27b-it
|
- google/gemma-2-27b-it
|
||||||
- google/gemma-2-9b-it
|
- google/gemma-2-9b-it
|
||||||
- deepseek-ai/DeepSeek-V2-Chat
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ supported_model_types:
|
|||||||
- text-embedding
|
- text-embedding
|
||||||
- rerank
|
- rerank
|
||||||
- speech2text
|
- speech2text
|
||||||
|
- tts
|
||||||
configurate_methods:
|
configurate_methods:
|
||||||
- predefined-model
|
- predefined-model
|
||||||
- customizable-model
|
- customizable-model
|
||||||
|
|||||||
@@ -0,0 +1,37 @@
|
|||||||
|
model: fishaudio/fish-speech-1.4
|
||||||
|
model_type: tts
|
||||||
|
model_properties:
|
||||||
|
default_voice: 'fishaudio/fish-speech-1.4:alex'
|
||||||
|
voices:
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:alex"
|
||||||
|
name: "Alex(男声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:benjamin"
|
||||||
|
name: "Benjamin(男声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:charles"
|
||||||
|
name: "Charles(男声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:david"
|
||||||
|
name: "David(男声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:anna"
|
||||||
|
name: "Anna(女声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:bella"
|
||||||
|
name: "Bella(女声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:claire"
|
||||||
|
name: "Claire(女声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
- mode: "fishaudio/fish-speech-1.4:diana"
|
||||||
|
name: "Diana(女声)"
|
||||||
|
language: [ "zh-Hans", "en-US" ]
|
||||||
|
audio_type: 'mp3'
|
||||||
|
max_workers: 5
|
||||||
|
# stream: false
|
||||||
|
pricing:
|
||||||
|
input: '0.015'
|
||||||
|
output: '0'
|
||||||
|
unit: '0.001'
|
||||||
|
currency: RMB
|
||||||
105
api/core/model_runtime/model_providers/siliconflow/tts/tts.py
Normal file
105
api/core/model_runtime/model_providers/siliconflow/tts/tts.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
import concurrent.futures
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||||
|
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||||
|
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
||||||
|
from core.model_runtime.model_providers.openai._common import _CommonOpenAI
|
||||||
|
|
||||||
|
|
||||||
|
class SiliconFlowText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||||
|
"""
|
||||||
|
Model class for SiliconFlow Speech to text model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _invoke(
|
||||||
|
self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, user: Optional[str] = None
|
||||||
|
) -> Any:
|
||||||
|
"""
|
||||||
|
_invoke text2speech model
|
||||||
|
|
||||||
|
:param model: model name
|
||||||
|
:param tenant_id: user tenant id
|
||||||
|
:param credentials: model credentials
|
||||||
|
:param content_text: text content to be translated
|
||||||
|
:param voice: model timbre
|
||||||
|
:param user: unique user id
|
||||||
|
:return: text translated to audio file
|
||||||
|
"""
|
||||||
|
if not voice or voice not in [
|
||||||
|
d["value"] for d in self.get_tts_model_voices(model=model, credentials=credentials)
|
||||||
|
]:
|
||||||
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
|
# if streaming:
|
||||||
|
return self._tts_invoke_streaming(model=model, credentials=credentials, content_text=content_text, voice=voice)
|
||||||
|
|
||||||
|
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
|
||||||
|
"""
|
||||||
|
validate credentials text2speech model
|
||||||
|
|
||||||
|
:param model: model name
|
||||||
|
:param credentials: model credentials
|
||||||
|
:param user: unique user id
|
||||||
|
:return: text translated to audio file
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self._tts_invoke_streaming(
|
||||||
|
model=model,
|
||||||
|
credentials=credentials,
|
||||||
|
content_text="Hello SiliconFlow!",
|
||||||
|
voice=self._get_model_default_voice(model, credentials),
|
||||||
|
)
|
||||||
|
except Exception as ex:
|
||||||
|
raise CredentialsValidateFailedError(str(ex))
|
||||||
|
|
||||||
|
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> Any:
|
||||||
|
"""
|
||||||
|
_tts_invoke_streaming text2speech model
|
||||||
|
|
||||||
|
:param model: model name
|
||||||
|
:param credentials: model credentials
|
||||||
|
:param content_text: text content to be translated
|
||||||
|
:param voice: model timbre
|
||||||
|
:return: text translated to audio file
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# doc: https://docs.siliconflow.cn/capabilities/text-to-speech
|
||||||
|
self._add_custom_parameters(credentials)
|
||||||
|
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||||
|
client = OpenAI(**credentials_kwargs)
|
||||||
|
model_support_voice = [
|
||||||
|
x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)
|
||||||
|
]
|
||||||
|
if not voice or voice not in model_support_voice:
|
||||||
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
|
if len(content_text) > 4096:
|
||||||
|
sentences = self._split_text_into_sentences(content_text, max_length=4096)
|
||||||
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
|
||||||
|
futures = [
|
||||||
|
executor.submit(
|
||||||
|
client.audio.speech.with_streaming_response.create,
|
||||||
|
model=model,
|
||||||
|
response_format="mp3",
|
||||||
|
input=sentences[i],
|
||||||
|
voice=voice,
|
||||||
|
)
|
||||||
|
for i in range(len(sentences))
|
||||||
|
]
|
||||||
|
for future in futures:
|
||||||
|
yield from future.result().__enter__().iter_bytes(1024) # noqa:PLC2801
|
||||||
|
|
||||||
|
else:
|
||||||
|
response = client.audio.speech.with_streaming_response.create(
|
||||||
|
model=model, voice=voice, response_format="mp3", input=content_text.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
yield from response.__enter__().iter_bytes(1024) # noqa:PLC2801
|
||||||
|
except Exception as ex:
|
||||||
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _add_custom_parameters(cls, credentials: dict) -> None:
|
||||||
|
credentials["openai_api_base"] = "https://api.siliconflow.cn"
|
||||||
|
credentials["openai_api_key"] = credentials["api_key"]
|
||||||
Reference in New Issue
Block a user