This commit is contained in:
161
tts/cosyvoice_engine.py
Normal file
161
tts/cosyvoice_engine.py
Normal file
@ -0,0 +1,161 @@
|
||||
"""
|
||||
CosyVoice 引擎实现
|
||||
|
||||
支持本地部署的 CosyVoice API 服务
|
||||
"""
|
||||
import httpx
|
||||
from typing import Optional
|
||||
from io import BytesIO
|
||||
from .base import TTSEngine
|
||||
from utils.logger import logger
|
||||
|
||||
|
||||
class CosyVoiceEngine(TTSEngine):
|
||||
"""
|
||||
CosyVoice 引擎实现
|
||||
|
||||
调用本地部署的 CosyVoice API 服务进行语音合成。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str = "http://192.168.1.200:8000/tts/zero_shot",
|
||||
timeout: float = 3600.0,
|
||||
):
|
||||
"""
|
||||
初始化 CosyVoice 引擎
|
||||
|
||||
Args:
|
||||
api_url: CosyVoice API 地址,默认为本地部署地址
|
||||
timeout: HTTP 请求超时时间(秒)
|
||||
"""
|
||||
self.api_url = api_url
|
||||
self.timeout = timeout
|
||||
self.engine_name = "cosyvoice"
|
||||
self.engine_version = "1.0.0"
|
||||
logger.info(
|
||||
f"Initialized {self.engine_name} engine with API URL: {api_url}"
|
||||
)
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
language: str = "zh-CN",
|
||||
voice: Optional[str] = None,
|
||||
rate: float = 1.0,
|
||||
pitch: float = 1.0,
|
||||
) -> BytesIO:
|
||||
"""
|
||||
使用 CosyVoice 将文本合成为语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
language: 语言代码,默认 zh-CN (中文)。注:CosyVoice 主要支持中文
|
||||
voice: 声音/发音人 ID (zero_shot_spk_id)
|
||||
rate: 语速,1.0 为正常速度(暂不支持)
|
||||
pitch: 音调,1.0 为正常音调(暂不支持)
|
||||
|
||||
Returns:
|
||||
BytesIO 对象,包含合成后的音频数据
|
||||
|
||||
Raises:
|
||||
ValueError: 如果 voice 参数为空
|
||||
httpx.HTTPError: 如果 API 请求失败
|
||||
"""
|
||||
if not voice:
|
||||
raise ValueError("voice (zero_shot_spk_id) is required for CosyVoice")
|
||||
|
||||
try:
|
||||
logger.debug(
|
||||
f"Synthesizing text with CosyVoice - "
|
||||
f"voice={voice}, language={language}"
|
||||
)
|
||||
|
||||
# 构建请求参数
|
||||
form_data = {
|
||||
"text": text,
|
||||
"zero_shot_spk_id": voice,
|
||||
}
|
||||
|
||||
logger.debug(f"Calling CosyVoice API: {self.api_url}")
|
||||
logger.debug(f"Request form data: {form_data}")
|
||||
|
||||
# 使用 httpx.AsyncClient 作为上下文管理器
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(
|
||||
self.api_url,
|
||||
data=form_data,
|
||||
)
|
||||
|
||||
# 检查响应状态
|
||||
response.raise_for_status()
|
||||
|
||||
# 获取音频数据
|
||||
audio_data = BytesIO(response.content)
|
||||
audio_data.seek(0)
|
||||
|
||||
logger.debug(
|
||||
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
|
||||
)
|
||||
return audio_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(
|
||||
f"CosyVoice API error: HTTP {e.response.status_code} - {e.response.text}"
|
||||
)
|
||||
raise ValueError(
|
||||
f"CosyVoice API error: HTTP {e.response.status_code}"
|
||||
) from e
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"CosyVoice API request failed: {str(e)}")
|
||||
raise ValueError(f"Failed to connect to CosyVoice API: {str(e)}") from e
|
||||
except Exception as e:
|
||||
logger.error(f"Error synthesizing text with CosyVoice: {str(e)}")
|
||||
raise
|
||||
|
||||
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
|
||||
"""
|
||||
获取支持的声音列表
|
||||
|
||||
Args:
|
||||
language: 语言代码(CosyVoice 主要支持中文)
|
||||
|
||||
Returns:
|
||||
声音列表。由于 CosyVoice 的 zero_shot 特性,
|
||||
返回示例发音人信息
|
||||
"""
|
||||
# CosyVoice 支持 zero_shot 发音人合成
|
||||
# 可以返回一些常见的发音人示例或从配置文件加载
|
||||
example_speakers = [
|
||||
{
|
||||
"name": "默认发音人1",
|
||||
"voice_id": "default_speaker_1",
|
||||
"description": "CosyVoice 默认发音人示例",
|
||||
},
|
||||
{
|
||||
"name": "默认发音人2",
|
||||
"voice_id": "default_speaker_2",
|
||||
"description": "CosyVoice 默认发音人示例",
|
||||
},
|
||||
]
|
||||
|
||||
logger.debug(
|
||||
f"Returning example speakers for CosyVoice (language: {language})"
|
||||
)
|
||||
return example_speakers
|
||||
|
||||
def get_engine_name(self) -> str:
|
||||
"""获取引擎名称"""
|
||||
return self.engine_name
|
||||
|
||||
def get_engine_version(self) -> str:
|
||||
"""获取引擎版本"""
|
||||
return self.engine_version
|
||||
|
||||
async def close(self) -> None:
|
||||
"""
|
||||
关闭 HTTP 客户端连接
|
||||
|
||||
已弃用:不再需要关闭客户端。
|
||||
"""
|
||||
logger.debug("CosyVoice HTTP client close() called (no-op)")
|
||||
Reference in New Issue
Block a user