""" CosyVoice 引擎实现 支持本地部署的 CosyVoice API 服务 """ import httpx from typing import Optional from io import BytesIO from .base import TTSEngine from utils.logger import logger class CosyVoiceEngine(TTSEngine): """ CosyVoice 引擎实现 调用本地部署的 CosyVoice API 服务进行语音合成。 """ def __init__( self, api_url: str = "http://192.168.1.200:8000/tts/zero_shot", timeout: float = 3600.0, ): """ 初始化 CosyVoice 引擎 Args: api_url: CosyVoice API 地址,默认为本地部署地址 timeout: HTTP 请求超时时间(秒) """ self.api_url = api_url self.timeout = timeout self.engine_name = "cosyvoice" self.engine_version = "1.0.0" logger.info( f"Initialized {self.engine_name} engine with API URL: {api_url}" ) async def synthesize( self, text: str, language: str = "zh-CN", voice: Optional[str] = None, rate: float = 1.0, pitch: float = 1.0, ) -> BytesIO: """ 使用 CosyVoice 将文本合成为语音 Args: text: 要合成的文本 language: 语言代码,默认 zh-CN (中文)。注:CosyVoice 主要支持中文 voice: 声音/发音人 ID (zero_shot_spk_id) rate: 语速,1.0 为正常速度(暂不支持) pitch: 音调,1.0 为正常音调(暂不支持) Returns: BytesIO 对象,包含合成后的音频数据 Raises: ValueError: 如果 voice 参数为空 httpx.HTTPError: 如果 API 请求失败 """ if not voice: raise ValueError("voice (zero_shot_spk_id) is required for CosyVoice") try: logger.debug( f"Synthesizing text with CosyVoice - " f"voice={voice}, language={language}" ) # 构建请求参数 form_data = { "text": text, "zero_shot_spk_id": voice, } logger.debug(f"Calling CosyVoice API: {self.api_url}") logger.debug(f"Request form data: {form_data}") # 使用 httpx.AsyncClient 作为上下文管理器 async with httpx.AsyncClient(timeout=self.timeout) as client: response = await client.post( self.api_url, data=form_data, ) # 检查响应状态 response.raise_for_status() # 获取音频数据 audio_data = BytesIO(response.content) audio_data.seek(0) logger.debug( f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes" ) return audio_data except httpx.HTTPStatusError as e: logger.error( f"CosyVoice API error: HTTP {e.response.status_code} - {e.response.text}" ) raise ValueError( f"CosyVoice API error: HTTP {e.response.status_code}" ) from e except httpx.RequestError as e: logger.error(f"CosyVoice API request failed: {str(e)}") raise ValueError(f"Failed to connect to CosyVoice API: {str(e)}") from e except Exception as e: logger.error(f"Error synthesizing text with CosyVoice: {str(e)}") raise async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]: """ 获取支持的声音列表 Args: language: 语言代码(CosyVoice 主要支持中文) Returns: 声音列表。由于 CosyVoice 的 zero_shot 特性, 返回示例发音人信息 """ # CosyVoice 支持 zero_shot 发音人合成 # 可以返回一些常见的发音人示例或从配置文件加载 example_speakers = [ { "name": "默认发音人1", "voice_id": "default_speaker_1", "description": "CosyVoice 默认发音人示例", }, { "name": "默认发音人2", "voice_id": "default_speaker_2", "description": "CosyVoice 默认发音人示例", }, ] logger.debug( f"Returning example speakers for CosyVoice (language: {language})" ) return example_speakers def get_engine_name(self) -> str: """获取引擎名称""" return self.engine_name def get_engine_version(self) -> str: """获取引擎版本""" return self.engine_version async def close(self) -> None: """ 关闭 HTTP 客户端连接 已弃用:不再需要关闭客户端。 """ logger.debug("CosyVoice HTTP client close() called (no-op)")