Files
meme/tts/cosyvoice_engine.py
konjacpotato 6772699cfe
Some checks failed
Gitea Actions Demo / deploy (push) Failing after 2s
commit code
2025-12-29 19:34:39 +08:00

162 lines
4.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
CosyVoice 引擎实现
支持本地部署的 CosyVoice API 服务
"""
import httpx
from typing import Optional
from io import BytesIO
from .base import TTSEngine
from utils.logger import logger
class CosyVoiceEngine(TTSEngine):
"""
CosyVoice 引擎实现
调用本地部署的 CosyVoice API 服务进行语音合成。
"""
def __init__(
self,
api_url: str = "http://192.168.1.200:8000/tts/zero_shot",
timeout: float = 3600.0,
):
"""
初始化 CosyVoice 引擎
Args:
api_url: CosyVoice API 地址,默认为本地部署地址
timeout: HTTP 请求超时时间(秒)
"""
self.api_url = api_url
self.timeout = timeout
self.engine_name = "cosyvoice"
self.engine_version = "1.0.0"
logger.info(
f"Initialized {self.engine_name} engine with API URL: {api_url}"
)
async def synthesize(
self,
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
"""
使用 CosyVoice 将文本合成为语音
Args:
text: 要合成的文本
language: 语言代码,默认 zh-CN (中文)。注CosyVoice 主要支持中文
voice: 声音/发音人 ID (zero_shot_spk_id)
rate: 语速1.0 为正常速度(暂不支持)
pitch: 音调1.0 为正常音调(暂不支持)
Returns:
BytesIO 对象,包含合成后的音频数据
Raises:
ValueError: 如果 voice 参数为空
httpx.HTTPError: 如果 API 请求失败
"""
if not voice:
raise ValueError("voice (zero_shot_spk_id) is required for CosyVoice")
try:
logger.debug(
f"Synthesizing text with CosyVoice - "
f"voice={voice}, language={language}"
)
# 构建请求参数
form_data = {
"text": text,
"zero_shot_spk_id": voice,
}
logger.debug(f"Calling CosyVoice API: {self.api_url}")
logger.debug(f"Request form data: {form_data}")
# 使用 httpx.AsyncClient 作为上下文管理器
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(
self.api_url,
data=form_data,
)
# 检查响应状态
response.raise_for_status()
# 获取音频数据
audio_data = BytesIO(response.content)
audio_data.seek(0)
logger.debug(
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
)
return audio_data
except httpx.HTTPStatusError as e:
logger.error(
f"CosyVoice API error: HTTP {e.response.status_code} - {e.response.text}"
)
raise ValueError(
f"CosyVoice API error: HTTP {e.response.status_code}"
) from e
except httpx.RequestError as e:
logger.error(f"CosyVoice API request failed: {str(e)}")
raise ValueError(f"Failed to connect to CosyVoice API: {str(e)}") from e
except Exception as e:
logger.error(f"Error synthesizing text with CosyVoice: {str(e)}")
raise
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
"""
获取支持的声音列表
Args:
language: 语言代码CosyVoice 主要支持中文)
Returns:
声音列表。由于 CosyVoice 的 zero_shot 特性,
返回示例发音人信息
"""
# CosyVoice 支持 zero_shot 发音人合成
# 可以返回一些常见的发音人示例或从配置文件加载
example_speakers = [
{
"name": "默认发音人1",
"voice_id": "default_speaker_1",
"description": "CosyVoice 默认发音人示例",
},
{
"name": "默认发音人2",
"voice_id": "default_speaker_2",
"description": "CosyVoice 默认发音人示例",
},
]
logger.debug(
f"Returning example speakers for CosyVoice (language: {language})"
)
return example_speakers
def get_engine_name(self) -> str:
"""获取引擎名称"""
return self.engine_name
def get_engine_version(self) -> str:
"""获取引擎版本"""
return self.engine_version
async def close(self) -> None:
"""
关闭 HTTP 客户端连接
已弃用:不再需要关闭客户端。
"""
logger.debug("CosyVoice HTTP client close() called (no-op)")