151 lines
4.8 KiB
Python
151 lines
4.8 KiB
Python
"""
|
||
Edge-TTS 引擎实现
|
||
"""
|
||
import edge_tts
|
||
from typing import Optional
|
||
from io import BytesIO
|
||
from .base import TTSEngine
|
||
from utils.logger import logger
|
||
|
||
|
||
class EdgeTTSEngine(TTSEngine):
|
||
"""
|
||
Microsoft Edge TTS 引擎实现
|
||
|
||
支持多种语言和声音,免费使用。
|
||
"""
|
||
|
||
def __init__(self):
|
||
"""初始化 Edge TTS 引擎"""
|
||
self.engine_name = "edge-tts"
|
||
self.engine_version = "1.0.0"
|
||
logger.info(f"Initialized {self.engine_name} engine")
|
||
|
||
async def synthesize(
|
||
self,
|
||
text: str,
|
||
language: str = "zh-CN",
|
||
voice: Optional[str] = None,
|
||
rate: float = 1.0,
|
||
pitch: float = 1.0,
|
||
) -> BytesIO:
|
||
"""
|
||
使用 Edge TTS 将文本合成为语音
|
||
|
||
Args:
|
||
text: 要合成的文本
|
||
language: 语言代码,默认 zh-CN (中文)
|
||
voice: 声音 ID,如果为 None 则使用语言默认声音
|
||
rate: 语速,1.0 为正常速度
|
||
pitch: 音调,1.0 为正常音调
|
||
|
||
Returns:
|
||
BytesIO 对象,包含合成后的 MP3 音频数据
|
||
"""
|
||
try:
|
||
# 如果没有指定声音,使用语言默认声音
|
||
if voice is None:
|
||
voice = self._get_default_voice(language)
|
||
|
||
logger.debug(
|
||
f"Synthesizing text with Edge TTS - "
|
||
f"language={language}, voice={voice}, rate={rate}, pitch={pitch}"
|
||
)
|
||
|
||
# 构建速率和音调字符串(+/-值的百分比形式)
|
||
rate_str = f"{(rate - 1) * 100:+.0f}%"
|
||
pitch_str = f"{(pitch - 1) * 100:+.0f}Hz"
|
||
|
||
# 创建 Edge TTS 客户端并合成
|
||
communicate = edge_tts.Communicate(
|
||
text=text,
|
||
voice=voice,
|
||
rate=rate_str,
|
||
pitch=pitch_str,
|
||
)
|
||
|
||
# 收集所有音频数据块
|
||
audio_data = BytesIO()
|
||
async for chunk in communicate.stream():
|
||
if chunk["type"] == "audio":
|
||
audio_data.write(chunk["data"])
|
||
|
||
audio_data.seek(0)
|
||
logger.debug(
|
||
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
|
||
)
|
||
return audio_data
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error synthesizing text with Edge TTS: {str(e)}")
|
||
raise
|
||
|
||
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
|
||
"""
|
||
获取指定语言支持的声音列表
|
||
|
||
Args:
|
||
language: 语言代码,例如 'zh-CN'、'en-US' 等
|
||
|
||
Returns:
|
||
声音列表,包含 name、voice_id、locale 等信息
|
||
"""
|
||
try:
|
||
logger.debug(f"Fetching supported voices for language: {language}")
|
||
voices = await edge_tts.list_voices()
|
||
|
||
# 筛选指定语言的声音
|
||
filtered_voices = [
|
||
{
|
||
"name": voice.get("ShortName", ""),
|
||
"voice_id": voice.get("ShortName", ""),
|
||
"locale": voice.get("Locale", ""),
|
||
"display_name": voice.get("DisplayName", ""),
|
||
"gender": voice.get("Gender", ""),
|
||
}
|
||
for voice in voices
|
||
if voice.get("Locale", "").startswith(language.split("-")[0])
|
||
]
|
||
|
||
logger.debug(f"Found {len(filtered_voices)} voices for language {language}")
|
||
return filtered_voices
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error fetching supported voices: {str(e)}")
|
||
raise
|
||
|
||
def get_engine_name(self) -> str:
|
||
"""获取引擎名称"""
|
||
return self.engine_name
|
||
|
||
def get_engine_version(self) -> str:
|
||
"""获取引擎版本"""
|
||
return self.engine_version
|
||
|
||
@staticmethod
|
||
def _get_default_voice(language: str) -> str:
|
||
"""
|
||
获取指定语言的默认声音
|
||
|
||
Args:
|
||
language: 语言代码
|
||
|
||
Returns:
|
||
默认声音 ID
|
||
"""
|
||
# 预定义的语言默认声音映射表
|
||
default_voices = {
|
||
"zh-CN": "zh-CN-XiaoxiaoNeural", # 中文(简体)- 晓晓
|
||
"zh-TW": "zh-TW-HsiaoChen", # 中文(繁体)
|
||
"en-US": "en-US-AriaNeural", # 英文(美国)
|
||
"en-GB": "en-GB-SoniaNeural", # 英文(英国)
|
||
"ja-JP": "ja-JP-NanamiNeural", # 日语
|
||
"ko-KR": "ko-KR-SunHiNeural", # 韩语
|
||
"fr-FR": "fr-FR-CelesteNeural", # 法语
|
||
"de-DE": "de-DE-ConraadNeural", # 德语
|
||
"es-ES": "es-ES-AlvaroNeural", # 西班牙语
|
||
"ru-RU": "ru-RU-DmitryNeural", # 俄语
|
||
}
|
||
|
||
return default_voices.get(language, "zh-CN-XiaoxiaoNeural")
|