meme/tts/cosyvoice_engine.py

"""
CosyVoice 引擎实现

支持本地部署的 CosyVoice API 服务
"""
import httpx
from typing import Optional
from io import BytesIO
from .base import TTSEngine
from utils.logger import logger


class CosyVoiceEngine(TTSEngine):
    """
    CosyVoice 引擎实现

    调用本地部署的 CosyVoice API 服务进行语音合成。
    """

    def __init__(
        self,
        api_url: str = "http://192.168.1.200:8000/tts/zero_shot",
        timeout: float = 3600.0,
    ):
        """
        初始化 CosyVoice 引擎

        Args:
            api_url: CosyVoice API 地址，默认为本地部署地址
            timeout: HTTP 请求超时时间（秒）
        """
        self.api_url = api_url
        self.timeout = timeout
        self.engine_name = "cosyvoice"
        self.engine_version = "1.0.0"
        logger.info(
            f"Initialized {self.engine_name} engine with API URL: {api_url}"
        )

    async def synthesize(
        self,
        text: str,
        language: str = "zh-CN",
        voice: Optional[str] = None,
        rate: float = 1.0,
        pitch: float = 1.0,
    ) -> BytesIO:
        """
        使用 CosyVoice 将文本合成为语音

        Args:
            text: 要合成的文本
            language: 语言代码，默认 zh-CN (中文)。注：CosyVoice 主要支持中文
            voice: 声音/发音人 ID (zero_shot_spk_id)
            rate: 语速，1.0 为正常速度（暂不支持）
            pitch: 音调，1.0 为正常音调（暂不支持）

        Returns:
            BytesIO 对象，包含合成后的音频数据

        Raises:
            ValueError: 如果 voice 参数为空
            httpx.HTTPError: 如果 API 请求失败
        """
        if not voice:
            raise ValueError("voice (zero_shot_spk_id) is required for CosyVoice")

        try:
            logger.debug(
                f"Synthesizing text with CosyVoice - "
                f"voice={voice}, language={language}"
            )

            # 构建请求参数
            form_data = {
                "text": text,
                "zero_shot_spk_id": voice,
            }

            logger.debug(f"Calling CosyVoice API: {self.api_url}")
            logger.debug(f"Request form data: {form_data}")

            # 使用 httpx.AsyncClient 作为上下文管理器
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                response = await client.post(
                    self.api_url,
                    data=form_data,
                )

            # 检查响应状态
            response.raise_for_status()

            # 获取音频数据
            audio_data = BytesIO(response.content)
            audio_data.seek(0)

            logger.debug(
                f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
            )
            return audio_data

        except httpx.HTTPStatusError as e:
            logger.error(
                f"CosyVoice API error: HTTP {e.response.status_code} - {e.response.text}"
            )
            raise ValueError(
                f"CosyVoice API error: HTTP {e.response.status_code}"
            ) from e
        except httpx.RequestError as e:
            logger.error(f"CosyVoice API request failed: {str(e)}")
            raise ValueError(f"Failed to connect to CosyVoice API: {str(e)}") from e
        except Exception as e:
            logger.error(f"Error synthesizing text with CosyVoice: {str(e)}")
            raise

    async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
        """
        获取支持的声音列表

        Args:
            language: 语言代码（CosyVoice 主要支持中文）

        Returns:
            声音列表。由于 CosyVoice 的 zero_shot 特性，
            返回示例发音人信息
        """
        # CosyVoice 支持 zero_shot 发音人合成
        # 可以返回一些常见的发音人示例或从配置文件加载
        example_speakers = [
            {
                "name": "默认发音人1",
                "voice_id": "default_speaker_1",
                "description": "CosyVoice 默认发音人示例",
            },
            {
                "name": "默认发音人2",
                "voice_id": "default_speaker_2",
                "description": "CosyVoice 默认发音人示例",
            },
        ]

        logger.debug(
            f"Returning example speakers for CosyVoice (language: {language})"
        )
        return example_speakers

    def get_engine_name(self) -> str:
        """获取引擎名称"""
        return self.engine_name

    def get_engine_version(self) -> str:
        """获取引擎版本"""
        return self.engine_version

    async def close(self) -> None:
        """
        关闭 HTTP 客户端连接

        已弃用：不再需要关闭客户端。
        """
        logger.debug("CosyVoice HTTP client close() called (no-op)")