Files
meme/tts/edge_tts_engine.py
2025-11-28 20:27:10 +08:00

151 lines
4.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Edge-TTS 引擎实现
"""
import edge_tts
from typing import Optional
from io import BytesIO
from .base import TTSEngine
from utils.logger import logger
class EdgeTTSEngine(TTSEngine):
"""
Microsoft Edge TTS 引擎实现
支持多种语言和声音,免费使用。
"""
def __init__(self):
"""初始化 Edge TTS 引擎"""
self.engine_name = "edge-tts"
self.engine_version = "1.0.0"
logger.info(f"Initialized {self.engine_name} engine")
async def synthesize(
self,
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
"""
使用 Edge TTS 将文本合成为语音
Args:
text: 要合成的文本
language: 语言代码,默认 zh-CN (中文)
voice: 声音 ID如果为 None 则使用语言默认声音
rate: 语速1.0 为正常速度
pitch: 音调1.0 为正常音调
Returns:
BytesIO 对象,包含合成后的 MP3 音频数据
"""
try:
# 如果没有指定声音,使用语言默认声音
if voice is None:
voice = self._get_default_voice(language)
logger.debug(
f"Synthesizing text with Edge TTS - "
f"language={language}, voice={voice}, rate={rate}, pitch={pitch}"
)
# 构建速率和音调字符串(+/-值的百分比形式)
rate_str = f"{(rate - 1) * 100:+.0f}%"
pitch_str = f"{(pitch - 1) * 100:+.0f}Hz"
# 创建 Edge TTS 客户端并合成
communicate = edge_tts.Communicate(
text=text,
voice=voice,
rate=rate_str,
pitch=pitch_str,
)
# 收集所有音频数据块
audio_data = BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data.write(chunk["data"])
audio_data.seek(0)
logger.debug(
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
)
return audio_data
except Exception as e:
logger.error(f"Error synthesizing text with Edge TTS: {str(e)}")
raise
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
"""
获取指定语言支持的声音列表
Args:
language: 语言代码,例如 'zh-CN''en-US'
Returns:
声音列表,包含 name、voice_id、locale 等信息
"""
try:
logger.debug(f"Fetching supported voices for language: {language}")
voices = await edge_tts.list_voices()
# 筛选指定语言的声音
filtered_voices = [
{
"name": voice.get("ShortName", ""),
"voice_id": voice.get("ShortName", ""),
"locale": voice.get("Locale", ""),
"display_name": voice.get("DisplayName", ""),
"gender": voice.get("Gender", ""),
}
for voice in voices
if voice.get("Locale", "").startswith(language.split("-")[0])
]
logger.debug(f"Found {len(filtered_voices)} voices for language {language}")
return filtered_voices
except Exception as e:
logger.error(f"Error fetching supported voices: {str(e)}")
raise
def get_engine_name(self) -> str:
"""获取引擎名称"""
return self.engine_name
def get_engine_version(self) -> str:
"""获取引擎版本"""
return self.engine_version
@staticmethod
def _get_default_voice(language: str) -> str:
"""
获取指定语言的默认声音
Args:
language: 语言代码
Returns:
默认声音 ID
"""
# 预定义的语言默认声音映射表
default_voices = {
"zh-CN": "zh-CN-XiaoxiaoNeural", # 中文(简体)- 晓晓
"zh-TW": "zh-TW-HsiaoChen", # 中文(繁体)
"en-US": "en-US-AriaNeural", # 英文(美国)
"en-GB": "en-GB-SoniaNeural", # 英文(英国)
"ja-JP": "ja-JP-NanamiNeural", # 日语
"ko-KR": "ko-KR-SunHiNeural", # 韩语
"fr-FR": "fr-FR-CelesteNeural", # 法语
"de-DE": "de-DE-ConraadNeural", # 德语
"es-ES": "es-ES-AlvaroNeural", # 西班牙语
"ru-RU": "ru-RU-DmitryNeural", # 俄语
}
return default_voices.get(language, "zh-CN-XiaoxiaoNeural")