162 lines
4.9 KiB
Python
162 lines
4.9 KiB
Python
"""
|
||
CosyVoice 引擎实现
|
||
|
||
支持本地部署的 CosyVoice API 服务
|
||
"""
|
||
import httpx
|
||
from typing import Optional
|
||
from io import BytesIO
|
||
from .base import TTSEngine
|
||
from utils.logger import logger
|
||
|
||
|
||
class CosyVoiceEngine(TTSEngine):
|
||
"""
|
||
CosyVoice 引擎实现
|
||
|
||
调用本地部署的 CosyVoice API 服务进行语音合成。
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
api_url: str = "http://192.168.1.200:8000/tts/zero_shot",
|
||
timeout: float = 3600.0,
|
||
):
|
||
"""
|
||
初始化 CosyVoice 引擎
|
||
|
||
Args:
|
||
api_url: CosyVoice API 地址,默认为本地部署地址
|
||
timeout: HTTP 请求超时时间(秒)
|
||
"""
|
||
self.api_url = api_url
|
||
self.timeout = timeout
|
||
self.engine_name = "cosyvoice"
|
||
self.engine_version = "1.0.0"
|
||
logger.info(
|
||
f"Initialized {self.engine_name} engine with API URL: {api_url}"
|
||
)
|
||
|
||
async def synthesize(
|
||
self,
|
||
text: str,
|
||
language: str = "zh-CN",
|
||
voice: Optional[str] = None,
|
||
rate: float = 1.0,
|
||
pitch: float = 1.0,
|
||
) -> BytesIO:
|
||
"""
|
||
使用 CosyVoice 将文本合成为语音
|
||
|
||
Args:
|
||
text: 要合成的文本
|
||
language: 语言代码,默认 zh-CN (中文)。注:CosyVoice 主要支持中文
|
||
voice: 声音/发音人 ID (zero_shot_spk_id)
|
||
rate: 语速,1.0 为正常速度(暂不支持)
|
||
pitch: 音调,1.0 为正常音调(暂不支持)
|
||
|
||
Returns:
|
||
BytesIO 对象,包含合成后的音频数据
|
||
|
||
Raises:
|
||
ValueError: 如果 voice 参数为空
|
||
httpx.HTTPError: 如果 API 请求失败
|
||
"""
|
||
if not voice:
|
||
raise ValueError("voice (zero_shot_spk_id) is required for CosyVoice")
|
||
|
||
try:
|
||
logger.debug(
|
||
f"Synthesizing text with CosyVoice - "
|
||
f"voice={voice}, language={language}"
|
||
)
|
||
|
||
# 构建请求参数
|
||
form_data = {
|
||
"text": text,
|
||
"zero_shot_spk_id": voice,
|
||
}
|
||
|
||
logger.debug(f"Calling CosyVoice API: {self.api_url}")
|
||
logger.debug(f"Request form data: {form_data}")
|
||
|
||
# 使用 httpx.AsyncClient 作为上下文管理器
|
||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||
response = await client.post(
|
||
self.api_url,
|
||
data=form_data,
|
||
)
|
||
|
||
# 检查响应状态
|
||
response.raise_for_status()
|
||
|
||
# 获取音频数据
|
||
audio_data = BytesIO(response.content)
|
||
audio_data.seek(0)
|
||
|
||
logger.debug(
|
||
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
|
||
)
|
||
return audio_data
|
||
|
||
except httpx.HTTPStatusError as e:
|
||
logger.error(
|
||
f"CosyVoice API error: HTTP {e.response.status_code} - {e.response.text}"
|
||
)
|
||
raise ValueError(
|
||
f"CosyVoice API error: HTTP {e.response.status_code}"
|
||
) from e
|
||
except httpx.RequestError as e:
|
||
logger.error(f"CosyVoice API request failed: {str(e)}")
|
||
raise ValueError(f"Failed to connect to CosyVoice API: {str(e)}") from e
|
||
except Exception as e:
|
||
logger.error(f"Error synthesizing text with CosyVoice: {str(e)}")
|
||
raise
|
||
|
||
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
|
||
"""
|
||
获取支持的声音列表
|
||
|
||
Args:
|
||
language: 语言代码(CosyVoice 主要支持中文)
|
||
|
||
Returns:
|
||
声音列表。由于 CosyVoice 的 zero_shot 特性,
|
||
返回示例发音人信息
|
||
"""
|
||
# CosyVoice 支持 zero_shot 发音人合成
|
||
# 可以返回一些常见的发音人示例或从配置文件加载
|
||
example_speakers = [
|
||
{
|
||
"name": "默认发音人1",
|
||
"voice_id": "default_speaker_1",
|
||
"description": "CosyVoice 默认发音人示例",
|
||
},
|
||
{
|
||
"name": "默认发音人2",
|
||
"voice_id": "default_speaker_2",
|
||
"description": "CosyVoice 默认发音人示例",
|
||
},
|
||
]
|
||
|
||
logger.debug(
|
||
f"Returning example speakers for CosyVoice (language: {language})"
|
||
)
|
||
return example_speakers
|
||
|
||
def get_engine_name(self) -> str:
|
||
"""获取引擎名称"""
|
||
return self.engine_name
|
||
|
||
def get_engine_version(self) -> str:
|
||
"""获取引擎版本"""
|
||
return self.engine_version
|
||
|
||
async def close(self) -> None:
|
||
"""
|
||
关闭 HTTP 客户端连接
|
||
|
||
已弃用:不再需要关闭客户端。
|
||
"""
|
||
logger.debug("CosyVoice HTTP client close() called (no-op)")
|