add edge tts

This commit is contained in:
2025-11-28 20:27:10 +08:00
parent f796a3833b
commit 87160c5265
20 changed files with 3589 additions and 3 deletions

342
tts/README.md Normal file
View File

@ -0,0 +1,342 @@
"""
TTS 模块文档
本模块提供文本转语音Text-to-Speech的统一接口支持多引擎扩展架构。
"""
# TTS 模块使用指南
## 模块结构
```
tts/
├── __init__.py # 模块入口
├── base.py # TTS 引擎基类(抽象接口)
├── edge_tts_engine.py # Edge-TTS 引擎实现
├── factory.py # TTS 引擎工厂类
├── service.py # 高级 TTS 服务接口
├── examples.py # 使用示例
└── README.md # 本文档
```
## 快速开始
### 1. 安装依赖
```bash
pip install edge-tts
```
### 2. 配置 TTS 引擎
`.env` 文件中配置:
```env
# TTS 引擎配置
TTS_ENGINE=edge-tts # 使用的 TTS 引擎
TTS_LANGUAGE=zh-CN # 默认语言
TTS_VOICE= # 默认声音(为空使用引擎默认)
TTS_RATE=1.0 # 语速1.0 为正常)
TTS_PITCH=1.0 # 音调1.0 为正常)
```
### 3. 基本使用
#### 方法一:使用高级服务(推荐)
```python
from tts.service import TTSService
import asyncio
async def main():
# 使用默认配置合成语音
audio = await TTSService.synthesize("你好,世界!")
# 自定义参数
audio = await TTSService.synthesize(
"Hello, World!",
language="en-US",
rate=1.2 # 快速
)
# 获取支持的声音
voices = await TTSService.get_supported_voices()
# 获取引擎信息
info = TTSService.get_engine_info()
asyncio.run(main())
```
#### 方法二:直接使用引擎工厂
```python
from tts.factory import TTSEngineFactory
import asyncio
async def main():
# 创建引擎实例
engine = TTSEngineFactory.create("edge-tts")
# 合成语音
audio = await engine.synthesize(
"你好,世界!",
language="zh-CN"
)
# 获取支持的声音
voices = await engine.get_supported_voices("zh-CN")
asyncio.run(main())
```
#### 方法三:直接使用引擎
```python
from tts.edge_tts_engine import EdgeTTSEngine
import asyncio
async def main():
engine = EdgeTTSEngine()
audio = await engine.synthesize(
"你好,世界!",
voice="zh-CN-XiaoxiaoNeural",
language="zh-CN"
)
asyncio.run(main())
```
## API 文档
### TTSService推荐使用
高级服务接口,自动使用配置文件中的设置。
```python
async def synthesize(
text: str,
language: Optional[str] = None,
voice: Optional[str] = None,
rate: Optional[float] = None,
pitch: Optional[float] = None,
) -> BytesIO:
"""将文本合成为语音"""
async def get_supported_voices(language: Optional[str] = None) -> list[dict]:
"""获取支持的声音列表"""
def get_engine_info() -> dict:
"""获取引擎信息"""
def reset_engine() -> None:
"""重置引擎(仅在切换引擎时需要)"""
```
### TTSEngineFactory
引擎工厂类,管理引擎的创建和生命周期。
```python
@classmethod
def create(engine_type: str | TTSEngineType) -> TTSEngine:
"""创建引擎实例(单例模式)"""
@classmethod
def register_engine(engine_type: str, engine_class: type[TTSEngine]) -> None:
"""注册新的引擎类型"""
@classmethod
def get_supported_engines() -> list[str]:
"""获取所有支持的引擎"""
```
### TTSEngine基类
所有引擎必须实现的接口。
```python
async def synthesize(
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
"""将文本合成为语音"""
async def get_supported_voices(language: str = "zh-CN") -> list[dict]:
"""获取支持的声音"""
def get_engine_name() -> str:
"""获取引擎名称"""
def get_engine_version() -> str:
"""获取引擎版本"""
```
## 支持的语言和声音
### Edge-TTS 支持的主要语言
- **中文(简体)**: zh-CN - 晓晓 (zh-CN-XiaoxiaoNeural)
- **中文(繁体)**: zh-TW
- **英文(美国)**: en-US - Aria (en-US-AriaNeural)
- **英文(英国)**: en-GB - Sonia (en-GB-SoniaNeural)
- **日语**: ja-JP
- **韩语**: ko-KR
- **法语**: fr-FR
- **德语**: de-DE
- **西班牙语**: es-ES
- **俄语**: ru-RU
### 获取完整的声音列表
```python
from tts.service import TTSService
import asyncio
async def main():
voices = await TTSService.get_supported_voices("zh-CN")
for voice in voices:
print(f"{voice['display_name']}: {voice['name']}")
asyncio.run(main())
```
## 扩展新的 TTS 引擎
### 步骤 1创建引擎类
创建新文件 `tts/new_engine.py`
```python
from .base import TTSEngine
from typing import Optional
from io import BytesIO
class NewTTSEngine(TTSEngine):
"""新的 TTS 引擎实现"""
async def synthesize(
self,
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
# 实现合成逻辑
pass
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
# 实现获取声音列表
pass
def get_engine_name(self) -> str:
return "new-engine"
def get_engine_version(self) -> str:
return "1.0.0"
```
### 步骤 2在工厂中注册
编辑 `tts/factory.py`
```python
from .new_engine import NewTTSEngine
class TTSEngineType(Enum):
EDGE_TTS = "edge-tts"
NEW_ENGINE = "new-engine" # 添加新引擎
class TTSEngineFactory:
_engines = {
TTSEngineType.EDGE_TTS: EdgeTTSEngine,
TTSEngineType.NEW_ENGINE: NewTTSEngine, # 注册引擎类
}
```
### 步骤 3更新配置
`.env` 中配置使用新引擎:
```env
TTS_ENGINE=new-engine
```
### 步骤 4使用新引擎
```python
from tts.service import TTSService
# TTSService 会自动使用配置中的引擎
audio = await TTSService.synthesize("Hello, World!")
```
## REST API 端点
### 1. 合成语音
```http
POST /api/v1/tts/synthesize
Content-Type: application/json
{
"text": "你好,世界!",
"language": "zh-CN",
"voice": null,
"rate": 1.0,
"pitch": 1.0
}
```
### 2. 获取声音列表
```http
GET /api/v1/tts/voices?language=zh-CN
```
### 3. 获取支持的引擎
```http
GET /api/v1/tts/engines
```
### 4. 获取引擎信息
```http
GET /api/v1/tts/engine-info
```
## 性能优化
1. **引擎缓存**TTSEngineFactory 使用单例模式缓存引擎实例
2. **异步处理**:所有 IO 操作都是异步的,支持高并发
3. **配置缓存**:从配置文件读取的设置只在初始化时加载一次
## 错误处理
```python
from tts.service import TTSService
try:
audio = await TTSService.synthesize("文本")
except Exception as e:
print(f"TTS 合成失败: {e}")
```
## 许可证
参考主项目许可证
## 更新日志
### v1.0.0 (初始版本)
- ✅ Edge-TTS 引擎实现
- ✅ 工厂模式支持引擎扩展
- ✅ 高级服务接口
- ✅ REST API 支持
- ✅ 多语言支持

19
tts/__init__.py Normal file
View File

@ -0,0 +1,19 @@
"""
TTS (Text-to-Speech) 模块
提供统一的 TTS 引擎接口,支持多个 TTS 引擎的扩展。
当前支持: Edge-TTS
"""
from .base import TTSEngine
from .edge_tts_engine import EdgeTTSEngine
from .factory import TTSEngineFactory, TTSEngineType
from .service import TTSService
__all__ = [
"TTSEngine",
"EdgeTTSEngine",
"TTSEngineFactory",
"TTSEngineType",
"TTSService",
]

71
tts/base.py Normal file
View File

@ -0,0 +1,71 @@
"""
TTS 引擎基础接口定义
"""
from abc import ABC, abstractmethod
from typing import Optional
from io import BytesIO
class TTSEngine(ABC):
"""
抽象 TTS 引擎基类
所有 TTS 引擎实现都应继承此类并实现所有抽象方法。
"""
@abstractmethod
async def synthesize(
self,
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
"""
将文本合成为语音
Args:
text: 要合成的文本
language: 语言代码,默认 zh-CN (中文)
voice: 声音/发音人 ID如果为 None 则使用默认声音
rate: 语速1.0 为正常速度,范围通常为 0.5-2.0
pitch: 音调1.0 为正常音调,范围通常为 0.5-2.0
Returns:
BytesIO 对象,包含合成后的音频数据
"""
pass
@abstractmethod
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
"""
获取指定语言支持的声音列表
Args:
language: 语言代码
Returns:
声音列表,每个元素是包含 name、voice_id 等信息的字典
"""
pass
@abstractmethod
def get_engine_name(self) -> str:
"""
获取引擎名称
Returns:
引擎名称
"""
pass
@abstractmethod
def get_engine_version(self) -> str:
"""
获取引擎版本
Returns:
版本号
"""
pass

150
tts/edge_tts_engine.py Normal file
View File

@ -0,0 +1,150 @@
"""
Edge-TTS 引擎实现
"""
import edge_tts
from typing import Optional
from io import BytesIO
from .base import TTSEngine
from utils.logger import logger
class EdgeTTSEngine(TTSEngine):
"""
Microsoft Edge TTS 引擎实现
支持多种语言和声音,免费使用。
"""
def __init__(self):
"""初始化 Edge TTS 引擎"""
self.engine_name = "edge-tts"
self.engine_version = "1.0.0"
logger.info(f"Initialized {self.engine_name} engine")
async def synthesize(
self,
text: str,
language: str = "zh-CN",
voice: Optional[str] = None,
rate: float = 1.0,
pitch: float = 1.0,
) -> BytesIO:
"""
使用 Edge TTS 将文本合成为语音
Args:
text: 要合成的文本
language: 语言代码,默认 zh-CN (中文)
voice: 声音 ID如果为 None 则使用语言默认声音
rate: 语速1.0 为正常速度
pitch: 音调1.0 为正常音调
Returns:
BytesIO 对象,包含合成后的 MP3 音频数据
"""
try:
# 如果没有指定声音,使用语言默认声音
if voice is None:
voice = self._get_default_voice(language)
logger.debug(
f"Synthesizing text with Edge TTS - "
f"language={language}, voice={voice}, rate={rate}, pitch={pitch}"
)
# 构建速率和音调字符串(+/-值的百分比形式)
rate_str = f"{(rate - 1) * 100:+.0f}%"
pitch_str = f"{(pitch - 1) * 100:+.0f}Hz"
# 创建 Edge TTS 客户端并合成
communicate = edge_tts.Communicate(
text=text,
voice=voice,
rate=rate_str,
pitch=pitch_str,
)
# 收集所有音频数据块
audio_data = BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data.write(chunk["data"])
audio_data.seek(0)
logger.debug(
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
)
return audio_data
except Exception as e:
logger.error(f"Error synthesizing text with Edge TTS: {str(e)}")
raise
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
"""
获取指定语言支持的声音列表
Args:
language: 语言代码,例如 'zh-CN''en-US'
Returns:
声音列表,包含 name、voice_id、locale 等信息
"""
try:
logger.debug(f"Fetching supported voices for language: {language}")
voices = await edge_tts.list_voices()
# 筛选指定语言的声音
filtered_voices = [
{
"name": voice.get("ShortName", ""),
"voice_id": voice.get("ShortName", ""),
"locale": voice.get("Locale", ""),
"display_name": voice.get("DisplayName", ""),
"gender": voice.get("Gender", ""),
}
for voice in voices
if voice.get("Locale", "").startswith(language.split("-")[0])
]
logger.debug(f"Found {len(filtered_voices)} voices for language {language}")
return filtered_voices
except Exception as e:
logger.error(f"Error fetching supported voices: {str(e)}")
raise
def get_engine_name(self) -> str:
"""获取引擎名称"""
return self.engine_name
def get_engine_version(self) -> str:
"""获取引擎版本"""
return self.engine_version
@staticmethod
def _get_default_voice(language: str) -> str:
"""
获取指定语言的默认声音
Args:
language: 语言代码
Returns:
默认声音 ID
"""
# 预定义的语言默认声音映射表
default_voices = {
"zh-CN": "zh-CN-XiaoxiaoNeural", # 中文(简体)- 晓晓
"zh-TW": "zh-TW-HsiaoChen", # 中文(繁体)
"en-US": "en-US-AriaNeural", # 英文(美国)
"en-GB": "en-GB-SoniaNeural", # 英文(英国)
"ja-JP": "ja-JP-NanamiNeural", # 日语
"ko-KR": "ko-KR-SunHiNeural", # 韩语
"fr-FR": "fr-FR-CelesteNeural", # 法语
"de-DE": "de-DE-ConraadNeural", # 德语
"es-ES": "es-ES-AlvaroNeural", # 西班牙语
"ru-RU": "ru-RU-DmitryNeural", # 俄语
}
return default_voices.get(language, "zh-CN-XiaoxiaoNeural")

118
tts/examples.py Normal file
View File

@ -0,0 +1,118 @@
"""
TTS 模块使用示例
演示如何使用 TTS 引擎和服务。
"""
import asyncio
from io import BytesIO
# 示例 1: 直接使用 Edge-TTS 引擎
async def example_direct_engine():
"""直接使用 EdgeTTSEngine"""
from tts.edge_tts_engine import EdgeTTSEngine
engine = EdgeTTSEngine()
print(f"Engine: {engine.get_engine_name()} v{engine.get_engine_version()}")
# 合成语音
text = "你好,我是语音合成助手。"
audio = await engine.synthesize(text, language="zh-CN")
print(f"Audio synthesized: {audio.getbuffer().nbytes} bytes")
# 获取支持的声音
voices = await engine.get_supported_voices("zh-CN")
print(f"Supported voices: {len(voices)} found")
for voice in voices[:3]:
print(f" - {voice['display_name']} ({voice['name']})")
# 示例 2: 使用工厂模式创建引擎
async def example_factory():
"""使用 TTSEngineFactory 创建引擎"""
from tts.factory import TTSEngineFactory
# 创建 Edge-TTS 引擎
engine = TTSEngineFactory.create("edge-tts")
print(f"\nUsing {engine.get_engine_name()} engine")
# 合成多种语言
texts = {
"zh-CN": "你好,世界!",
"en-US": "Hello, World!",
"ja-JP": "こんにちは、世界!",
}
for language, text in texts.items():
audio = await engine.synthesize(text, language=language)
print(f"Synthesized {language}: {audio.getbuffer().nbytes} bytes")
# 示例 3: 使用高级服务
async def example_service():
"""使用 TTSService 高级接口"""
from tts.service import TTSService
# 获取引擎信息
info = TTSService.get_engine_info()
print(f"\nTTS Service Info: {info}")
# 使用默认配置合成
text = "使用服务默认配置合成语音。"
audio = await TTSService.synthesize(text)
print(f"Synthesized with defaults: {audio.getbuffer().nbytes} bytes")
# 使用自定义参数合成
text = "这是一个更快的语音示例。"
audio = await TTSService.synthesize(text, rate=1.2)
print(f"Synthesized with rate=1.2: {audio.getbuffer().nbytes} bytes")
# 获取声音列表
voices = await TTSService.get_supported_voices()
print(f"Available voices: {len(voices)} found")
# 示例 4: 保存合成的音频到文件
async def example_save_audio():
"""合成语音并保存到文件"""
from tts.service import TTSService
text = "这是一个保存到文件的语音示例。"
audio = await TTSService.synthesize(text)
# 保存为 MP3 文件
output_file = "output_audio.mp3"
with open(output_file, "wb") as f:
f.write(audio.getvalue())
print(f"\nAudio saved to {output_file}")
async def main():
"""运行所有示例"""
print("=" * 50)
print("TTS Module Examples")
print("=" * 50)
try:
print("\n1. Direct Engine Usage")
print("-" * 50)
await example_direct_engine()
print("\n2. Factory Pattern")
print("-" * 50)
await example_factory()
print("\n3. Service Interface")
print("-" * 50)
await example_service()
print("\n4. Save Audio to File")
print("-" * 50)
await example_save_audio()
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
asyncio.run(main())

112
tts/factory.py Normal file
View File

@ -0,0 +1,112 @@
"""
TTS 引擎工厂类
"""
from enum import Enum
from typing import Optional
from .base import TTSEngine
from .edge_tts_engine import EdgeTTSEngine
from utils.logger import logger
class TTSEngineType(Enum):
"""支持的 TTS 引擎类型"""
EDGE_TTS = "edge-tts"
# 可以在这里添加更多引擎类型
# GOOGLE_TTS = "google-tts"
# BAIDU_TTS = "baidu-tts"
# AZURE_TTS = "azure-tts"
class TTSEngineFactory:
"""
TTS 引擎工厂
负责创建和管理 TTS 引擎实例。支持多引擎扩展。
"""
_engines = {
TTSEngineType.EDGE_TTS: EdgeTTSEngine,
# 添加其他引擎实现时在这里注册
}
_instances: dict[TTSEngineType, TTSEngine] = {}
@classmethod
def create(cls, engine_type: str | TTSEngineType) -> TTSEngine:
"""
创建 TTS 引擎实例(单例模式)
Args:
engine_type: 引擎类型,可以是字符串或 TTSEngineType 枚举
Returns:
TTSEngine 实例
Raises:
ValueError: 如果指定的引擎类型不支持
"""
# 转换为 TTSEngineType
if isinstance(engine_type, str):
try:
engine_type = TTSEngineType(engine_type)
except ValueError:
raise ValueError(
f"Unsupported TTS engine type: {engine_type}. "
f"Supported types: {[e.value for e in TTSEngineType]}"
)
# 返回已缓存的实例或创建新实例
if engine_type not in cls._instances:
if engine_type not in cls._engines:
raise ValueError(
f"TTS engine '{engine_type.value}' is not registered. "
f"Available engines: {list(cls._engines.keys())}"
)
engine_class = cls._engines[engine_type]
instance = engine_class()
cls._instances[engine_type] = instance
logger.info(f"Created TTS engine instance: {engine_type.value}")
return cls._instances[engine_type]
@classmethod
def register_engine(
cls, engine_type: str | TTSEngineType, engine_class: type[TTSEngine]
) -> None:
"""
注册新的 TTS 引擎类型
Args:
engine_type: 引擎类型标识
engine_class: 引擎类,必须继承 TTSEngine
Raises:
TypeError: 如果 engine_class 不是 TTSEngine 的子类
"""
if not issubclass(engine_class, TTSEngine):
raise TypeError(f"{engine_class} must be a subclass of TTSEngine")
# 转换为 TTSEngineType
if isinstance(engine_type, str):
engine_type = TTSEngineType(engine_type)
cls._engines[engine_type] = engine_class
logger.info(f"Registered TTS engine: {engine_type.value}")
@classmethod
def get_supported_engines(cls) -> list[str]:
"""
获取所有支持的引擎类型
Returns:
支持的引擎类型列表
"""
return [e.value for e in TTSEngineType]
@classmethod
def clear_instances(cls) -> None:
"""清空所有引擎实例缓存"""
cls._instances.clear()
logger.debug("Cleared TTS engine instances cache")

119
tts/service.py Normal file
View File

@ -0,0 +1,119 @@
"""
TTS 服务集成模块
提供高层 TTS 服务接口,直接使用配置文件中的 TTS 设置。
"""
from io import BytesIO
from typing import Optional
from config.settings import settings
from tts.factory import TTSEngineFactory
from tts.base import TTSEngine
from utils.logger import logger
class TTSService:
"""
TTS 服务
提供统一的 TTS 调用接口,自动使用配置文件中的引擎和参数。
"""
_engine: Optional[TTSEngine] = None
@classmethod
def _get_engine(cls) -> TTSEngine:
"""
获取 TTS 引擎实例
Returns:
TTSEngine 实例
"""
if cls._engine is None:
cls._engine = TTSEngineFactory.create(settings.TTS_ENGINE)
logger.info(
f"TTS Service initialized with engine: {settings.TTS_ENGINE}"
)
return cls._engine
@classmethod
async def synthesize(
cls,
text: str,
language: Optional[str] = None,
voice: Optional[str] = None,
rate: Optional[float] = None,
pitch: Optional[float] = None,
) -> BytesIO:
"""
将文本合成为语音
Args:
text: 要合成的文本
language: 语言代码,默认使用配置文件中的 TTS_LANGUAGE
voice: 声音 ID默认使用配置文件中的 TTS_VOICE
rate: 语速,默认使用配置文件中的 TTS_RATE
pitch: 音调,默认使用配置文件中的 TTS_PITCH
Returns:
BytesIO 对象,包含合成后的音频数据
"""
engine = cls._get_engine()
# 使用配置文件中的默认值
language = language or settings.TTS_LANGUAGE
voice = voice or settings.TTS_VOICE or None
rate = rate or settings.TTS_RATE
pitch = pitch or settings.TTS_PITCH
logger.debug(f"Synthesizing text with TTS Service: {text[:50]}...")
return await engine.synthesize(
text=text,
language=language,
voice=voice,
rate=rate,
pitch=pitch,
)
@classmethod
async def get_supported_voices(cls, language: Optional[str] = None) -> list[dict]:
"""
获取支持的声音列表
Args:
language: 语言代码,默认使用配置文件中的 TTS_LANGUAGE
Returns:
声音列表
"""
engine = cls._get_engine()
language = language or settings.TTS_LANGUAGE
return await engine.get_supported_voices(language)
@classmethod
def get_engine_info(cls) -> dict:
"""
获取当前 TTS 引擎信息
Returns:
包含引擎名称、版本、当前配置等信息的字典
"""
engine = cls._get_engine()
return {
"engine_name": engine.get_engine_name(),
"engine_version": engine.get_engine_version(),
"config": {
"language": settings.TTS_LANGUAGE,
"voice": settings.TTS_VOICE or "default",
"rate": settings.TTS_RATE,
"pitch": settings.TTS_PITCH,
},
}
@classmethod
def reset_engine(cls) -> None:
"""重置 TTS 引擎实例(仅在切换引擎时需要调用)"""
cls._engine = None
TTSEngineFactory.clear_instances()
logger.info("TTS Service engine reset")