add edge tts

This commit is contained in:
2025-11-28 20:27:10 +08:00
parent f796a3833b
commit 87160c5265
20 changed files with 3589 additions and 3 deletions

150
tts/edge_tts_engine.py Normal file
View File

@ -0,0 +1,150 @@
"""
Edge-TTS 引擎实现
"""
import edge_tts
from typing import Optional
from io import BytesIO
from .base import TTSEngine
from utils.logger import logger
class EdgeTTSEngine(TTSEngine):
"""
Microsoft Edge TTS 引擎实现
支持多种语言和声音,免费使用。
"""
def __init__(self):
"""初始化 Edge TTS 引擎"""
self.engine_name = "edge-tts"
self.engine_version = "1.0.0"
logger.info(f"Initialized {self.engine_name} engine")
async def synthesize(
self,
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
"""
使用 Edge TTS 将文本合成为语音
Args:
text: 要合成的文本
language: 语言代码,默认 zh-CN (中文)
voice: 声音 ID如果为 None 则使用语言默认声音
rate: 语速1.0 为正常速度
pitch: 音调1.0 为正常音调
Returns:
BytesIO 对象,包含合成后的 MP3 音频数据
"""
try:
# 如果没有指定声音,使用语言默认声音
if voice is None:
voice = self._get_default_voice(language)
logger.debug(
f"Synthesizing text with Edge TTS - "
f"language={language}, voice={voice}, rate={rate}, pitch={pitch}"
)
# 构建速率和音调字符串(+/-值的百分比形式)
rate_str = f"{(rate - 1) * 100:+.0f}%"
pitch_str = f"{(pitch - 1) * 100:+.0f}Hz"
# 创建 Edge TTS 客户端并合成
communicate = edge_tts.Communicate(
text=text,
voice=voice,
rate=rate_str,
pitch=pitch_str,
)
# 收集所有音频数据块
audio_data = BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data.write(chunk["data"])
audio_data.seek(0)
logger.debug(
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
)
return audio_data
except Exception as e:
logger.error(f"Error synthesizing text with Edge TTS: {str(e)}")
raise
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
"""
获取指定语言支持的声音列表
Args:
language: 语言代码,例如 'zh-CN''en-US'
Returns:
声音列表,包含 name、voice_id、locale 等信息
"""
try:
logger.debug(f"Fetching supported voices for language: {language}")
voices = await edge_tts.list_voices()
# 筛选指定语言的声音
filtered_voices = [
{
"name": voice.get("ShortName", ""),
"voice_id": voice.get("ShortName", ""),
"locale": voice.get("Locale", ""),
"display_name": voice.get("DisplayName", ""),
"gender": voice.get("Gender", ""),
}
for voice in voices
if voice.get("Locale", "").startswith(language.split("-")[0])
]
logger.debug(f"Found {len(filtered_voices)} voices for language {language}")
return filtered_voices
except Exception as e:
logger.error(f"Error fetching supported voices: {str(e)}")
raise
def get_engine_name(self) -> str:
"""获取引擎名称"""
return self.engine_name
def get_engine_version(self) -> str:
"""获取引擎版本"""
return self.engine_version
@staticmethod
def _get_default_voice(language: str) -> str:
"""
获取指定语言的默认声音
Args:
language: 语言代码
Returns:
默认声音 ID
"""
# 预定义的语言默认声音映射表
default_voices = {
"zh-CN": "zh-CN-XiaoxiaoNeural", # 中文(简体)- 晓晓
"zh-TW": "zh-TW-HsiaoChen", # 中文(繁体)
"en-US": "en-US-AriaNeural", # 英文(美国)
"en-GB": "en-GB-SoniaNeural", # 英文(英国)
"ja-JP": "ja-JP-NanamiNeural", # 日语
"ko-KR": "ko-KR-SunHiNeural", # 韩语
"fr-FR": "fr-FR-CelesteNeural", # 法语
"de-DE": "de-DE-ConraadNeural", # 德语
"es-ES": "es-ES-AlvaroNeural", # 西班牙语
"ru-RU": "ru-RU-DmitryNeural", # 俄语
}
return default_voices.get(language, "zh-CN-XiaoxiaoNeural")