""" Edge-TTS 引擎实现 """ import edge_tts from typing import Optional from io import BytesIO from .base import TTSEngine from utils.logger import logger class EdgeTTSEngine(TTSEngine): """ Microsoft Edge TTS 引擎实现 支持多种语言和声音,免费使用。 """ def __init__(self): """初始化 Edge TTS 引擎""" self.engine_name = "edge-tts" self.engine_version = "1.0.0" logger.info(f"Initialized {self.engine_name} engine") async def synthesize( self, text: str, language: str = "zh-CN", voice: Optional[str] = None, rate: float = 1.0, pitch: float = 1.0, ) -> BytesIO: """ 使用 Edge TTS 将文本合成为语音 Args: text: 要合成的文本 language: 语言代码,默认 zh-CN (中文) voice: 声音 ID,如果为 None 则使用语言默认声音 rate: 语速,1.0 为正常速度 pitch: 音调,1.0 为正常音调 Returns: BytesIO 对象,包含合成后的 MP3 音频数据 """ try: # 如果没有指定声音,使用语言默认声音 if voice is None: voice = self._get_default_voice(language) logger.debug( f"Synthesizing text with Edge TTS - " f"language={language}, voice={voice}, rate={rate}, pitch={pitch}" ) # 构建速率和音调字符串(+/-值的百分比形式) rate_str = f"{(rate - 1) * 100:+.0f}%" pitch_str = f"{(pitch - 1) * 100:+.0f}Hz" # 创建 Edge TTS 客户端并合成 communicate = edge_tts.Communicate( text=text, voice=voice, rate=rate_str, pitch=pitch_str, ) # 收集所有音频数据块 audio_data = BytesIO() async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_data.write(chunk["data"]) audio_data.seek(0) logger.debug( f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes" ) return audio_data except Exception as e: logger.error(f"Error synthesizing text with Edge TTS: {str(e)}") raise async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]: """ 获取指定语言支持的声音列表 Args: language: 语言代码,例如 'zh-CN'、'en-US' 等 Returns: 声音列表,包含 name、voice_id、locale 等信息 """ try: logger.debug(f"Fetching supported voices for language: {language}") voices = await edge_tts.list_voices() # 筛选指定语言的声音 filtered_voices = [ { "name": voice.get("ShortName", ""), "voice_id": voice.get("ShortName", ""), "locale": voice.get("Locale", ""), "display_name": voice.get("DisplayName", ""), "gender": voice.get("Gender", ""), } for voice in voices if voice.get("Locale", "").startswith(language.split("-")[0]) ] logger.debug(f"Found {len(filtered_voices)} voices for language {language}") return filtered_voices except Exception as e: logger.error(f"Error fetching supported voices: {str(e)}") raise def get_engine_name(self) -> str: """获取引擎名称""" return self.engine_name def get_engine_version(self) -> str: """获取引擎版本""" return self.engine_version @staticmethod def _get_default_voice(language: str) -> str: """ 获取指定语言的默认声音 Args: language: 语言代码 Returns: 默认声音 ID """ # 预定义的语言默认声音映射表 default_voices = { "zh-CN": "zh-CN-XiaoxiaoNeural", # 中文(简体)- 晓晓 "zh-TW": "zh-TW-HsiaoChen", # 中文(繁体) "en-US": "en-US-AriaNeural", # 英文(美国) "en-GB": "en-GB-SoniaNeural", # 英文(英国) "ja-JP": "ja-JP-NanamiNeural", # 日语 "ko-KR": "ko-KR-SunHiNeural", # 韩语 "fr-FR": "fr-FR-CelesteNeural", # 法语 "de-DE": "de-DE-ConraadNeural", # 德语 "es-ES": "es-ES-AlvaroNeural", # 西班牙语 "ru-RU": "ru-RU-DmitryNeural", # 俄语 } return default_voices.get(language, "zh-CN-XiaoxiaoNeural")