add edge tts
This commit is contained in:
342
tts/README.md
Normal file
342
tts/README.md
Normal file
@ -0,0 +1,342 @@
|
||||
"""
|
||||
TTS 模块文档
|
||||
|
||||
本模块提供文本转语音(Text-to-Speech)的统一接口,支持多引擎扩展架构。
|
||||
"""
|
||||
|
||||
# TTS 模块使用指南
|
||||
|
||||
## 模块结构
|
||||
|
||||
```
|
||||
tts/
|
||||
├── __init__.py # 模块入口
|
||||
├── base.py # TTS 引擎基类(抽象接口)
|
||||
├── edge_tts_engine.py # Edge-TTS 引擎实现
|
||||
├── factory.py # TTS 引擎工厂类
|
||||
├── service.py # 高级 TTS 服务接口
|
||||
├── examples.py # 使用示例
|
||||
└── README.md # 本文档
|
||||
```
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 安装依赖
|
||||
|
||||
```bash
|
||||
pip install edge-tts
|
||||
```
|
||||
|
||||
### 2. 配置 TTS 引擎
|
||||
|
||||
在 `.env` 文件中配置:
|
||||
|
||||
```env
|
||||
# TTS 引擎配置
|
||||
TTS_ENGINE=edge-tts # 使用的 TTS 引擎
|
||||
TTS_LANGUAGE=zh-CN # 默认语言
|
||||
TTS_VOICE= # 默认声音(为空使用引擎默认)
|
||||
TTS_RATE=1.0 # 语速(1.0 为正常)
|
||||
TTS_PITCH=1.0 # 音调(1.0 为正常)
|
||||
```
|
||||
|
||||
### 3. 基本使用
|
||||
|
||||
#### 方法一:使用高级服务(推荐)
|
||||
|
||||
```python
|
||||
from tts.service import TTSService
|
||||
import asyncio
|
||||
|
||||
async def main():
|
||||
# 使用默认配置合成语音
|
||||
audio = await TTSService.synthesize("你好,世界!")
|
||||
|
||||
# 自定义参数
|
||||
audio = await TTSService.synthesize(
|
||||
"Hello, World!",
|
||||
language="en-US",
|
||||
rate=1.2 # 快速
|
||||
)
|
||||
|
||||
# 获取支持的声音
|
||||
voices = await TTSService.get_supported_voices()
|
||||
|
||||
# 获取引擎信息
|
||||
info = TTSService.get_engine_info()
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
#### 方法二:直接使用引擎工厂
|
||||
|
||||
```python
|
||||
from tts.factory import TTSEngineFactory
|
||||
import asyncio
|
||||
|
||||
async def main():
|
||||
# 创建引擎实例
|
||||
engine = TTSEngineFactory.create("edge-tts")
|
||||
|
||||
# 合成语音
|
||||
audio = await engine.synthesize(
|
||||
"你好,世界!",
|
||||
language="zh-CN"
|
||||
)
|
||||
|
||||
# 获取支持的声音
|
||||
voices = await engine.get_supported_voices("zh-CN")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
#### 方法三:直接使用引擎
|
||||
|
||||
```python
|
||||
from tts.edge_tts_engine import EdgeTTSEngine
|
||||
import asyncio
|
||||
|
||||
async def main():
|
||||
engine = EdgeTTSEngine()
|
||||
|
||||
audio = await engine.synthesize(
|
||||
"你好,世界!",
|
||||
voice="zh-CN-XiaoxiaoNeural",
|
||||
language="zh-CN"
|
||||
)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
## API 文档
|
||||
|
||||
### TTSService(推荐使用)
|
||||
|
||||
高级服务接口,自动使用配置文件中的设置。
|
||||
|
||||
```python
|
||||
async def synthesize(
|
||||
text: str,
|
||||
language: Optional[str] = None,
|
||||
voice: Optional[str] = None,
|
||||
rate: Optional[float] = None,
|
||||
pitch: Optional[float] = None,
|
||||
) -> BytesIO:
|
||||
"""将文本合成为语音"""
|
||||
|
||||
async def get_supported_voices(language: Optional[str] = None) -> list[dict]:
|
||||
"""获取支持的声音列表"""
|
||||
|
||||
def get_engine_info() -> dict:
|
||||
"""获取引擎信息"""
|
||||
|
||||
def reset_engine() -> None:
|
||||
"""重置引擎(仅在切换引擎时需要)"""
|
||||
```
|
||||
|
||||
### TTSEngineFactory
|
||||
|
||||
引擎工厂类,管理引擎的创建和生命周期。
|
||||
|
||||
```python
|
||||
@classmethod
|
||||
def create(engine_type: str | TTSEngineType) -> TTSEngine:
|
||||
"""创建引擎实例(单例模式)"""
|
||||
|
||||
@classmethod
|
||||
def register_engine(engine_type: str, engine_class: type[TTSEngine]) -> None:
|
||||
"""注册新的引擎类型"""
|
||||
|
||||
@classmethod
|
||||
def get_supported_engines() -> list[str]:
|
||||
"""获取所有支持的引擎"""
|
||||
```
|
||||
|
||||
### TTSEngine(基类)
|
||||
|
||||
所有引擎必须实现的接口。
|
||||
|
||||
```python
|
||||
async def synthesize(
|
||||
text: str,
|
||||
language: str = "zh-CN",
|
||||
voice: Optional[str] = None,
|
||||
rate: float = 1.0,
|
||||
pitch: float = 1.0,
|
||||
) -> BytesIO:
|
||||
"""将文本合成为语音"""
|
||||
|
||||
async def get_supported_voices(language: str = "zh-CN") -> list[dict]:
|
||||
"""获取支持的声音"""
|
||||
|
||||
def get_engine_name() -> str:
|
||||
"""获取引擎名称"""
|
||||
|
||||
def get_engine_version() -> str:
|
||||
"""获取引擎版本"""
|
||||
```
|
||||
|
||||
## 支持的语言和声音
|
||||
|
||||
### Edge-TTS 支持的主要语言
|
||||
|
||||
- **中文(简体)**: zh-CN - 晓晓 (zh-CN-XiaoxiaoNeural)
|
||||
- **中文(繁体)**: zh-TW
|
||||
- **英文(美国)**: en-US - Aria (en-US-AriaNeural)
|
||||
- **英文(英国)**: en-GB - Sonia (en-GB-SoniaNeural)
|
||||
- **日语**: ja-JP
|
||||
- **韩语**: ko-KR
|
||||
- **法语**: fr-FR
|
||||
- **德语**: de-DE
|
||||
- **西班牙语**: es-ES
|
||||
- **俄语**: ru-RU
|
||||
|
||||
### 获取完整的声音列表
|
||||
|
||||
```python
|
||||
from tts.service import TTSService
|
||||
import asyncio
|
||||
|
||||
async def main():
|
||||
voices = await TTSService.get_supported_voices("zh-CN")
|
||||
for voice in voices:
|
||||
print(f"{voice['display_name']}: {voice['name']}")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
## 扩展新的 TTS 引擎
|
||||
|
||||
### 步骤 1:创建引擎类
|
||||
|
||||
创建新文件 `tts/new_engine.py`:
|
||||
|
||||
```python
|
||||
from .base import TTSEngine
|
||||
from typing import Optional
|
||||
from io import BytesIO
|
||||
|
||||
class NewTTSEngine(TTSEngine):
|
||||
"""新的 TTS 引擎实现"""
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
language: str = "zh-CN",
|
||||
voice: Optional[str] = None,
|
||||
rate: float = 1.0,
|
||||
pitch: float = 1.0,
|
||||
) -> BytesIO:
|
||||
# 实现合成逻辑
|
||||
pass
|
||||
|
||||
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
|
||||
# 实现获取声音列表
|
||||
pass
|
||||
|
||||
def get_engine_name(self) -> str:
|
||||
return "new-engine"
|
||||
|
||||
def get_engine_version(self) -> str:
|
||||
return "1.0.0"
|
||||
```
|
||||
|
||||
### 步骤 2:在工厂中注册
|
||||
|
||||
编辑 `tts/factory.py`:
|
||||
|
||||
```python
|
||||
from .new_engine import NewTTSEngine
|
||||
|
||||
class TTSEngineType(Enum):
|
||||
EDGE_TTS = "edge-tts"
|
||||
NEW_ENGINE = "new-engine" # 添加新引擎
|
||||
|
||||
class TTSEngineFactory:
|
||||
_engines = {
|
||||
TTSEngineType.EDGE_TTS: EdgeTTSEngine,
|
||||
TTSEngineType.NEW_ENGINE: NewTTSEngine, # 注册引擎类
|
||||
}
|
||||
```
|
||||
|
||||
### 步骤 3:更新配置
|
||||
|
||||
在 `.env` 中配置使用新引擎:
|
||||
|
||||
```env
|
||||
TTS_ENGINE=new-engine
|
||||
```
|
||||
|
||||
### 步骤 4:使用新引擎
|
||||
|
||||
```python
|
||||
from tts.service import TTSService
|
||||
|
||||
# TTSService 会自动使用配置中的引擎
|
||||
audio = await TTSService.synthesize("Hello, World!")
|
||||
```
|
||||
|
||||
## REST API 端点
|
||||
|
||||
### 1. 合成语音
|
||||
|
||||
```http
|
||||
POST /api/v1/tts/synthesize
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"text": "你好,世界!",
|
||||
"language": "zh-CN",
|
||||
"voice": null,
|
||||
"rate": 1.0,
|
||||
"pitch": 1.0
|
||||
}
|
||||
```
|
||||
|
||||
### 2. 获取声音列表
|
||||
|
||||
```http
|
||||
GET /api/v1/tts/voices?language=zh-CN
|
||||
```
|
||||
|
||||
### 3. 获取支持的引擎
|
||||
|
||||
```http
|
||||
GET /api/v1/tts/engines
|
||||
```
|
||||
|
||||
### 4. 获取引擎信息
|
||||
|
||||
```http
|
||||
GET /api/v1/tts/engine-info
|
||||
```
|
||||
|
||||
## 性能优化
|
||||
|
||||
1. **引擎缓存**:TTSEngineFactory 使用单例模式缓存引擎实例
|
||||
2. **异步处理**:所有 IO 操作都是异步的,支持高并发
|
||||
3. **配置缓存**:从配置文件读取的设置只在初始化时加载一次
|
||||
|
||||
## 错误处理
|
||||
|
||||
```python
|
||||
from tts.service import TTSService
|
||||
|
||||
try:
|
||||
audio = await TTSService.synthesize("文本")
|
||||
except Exception as e:
|
||||
print(f"TTS 合成失败: {e}")
|
||||
```
|
||||
|
||||
## 许可证
|
||||
|
||||
参考主项目许可证
|
||||
|
||||
## 更新日志
|
||||
|
||||
### v1.0.0 (初始版本)
|
||||
- ✅ Edge-TTS 引擎实现
|
||||
- ✅ 工厂模式支持引擎扩展
|
||||
- ✅ 高级服务接口
|
||||
- ✅ REST API 支持
|
||||
- ✅ 多语言支持
|
||||
19
tts/__init__.py
Normal file
19
tts/__init__.py
Normal file
@ -0,0 +1,19 @@
|
||||
"""
|
||||
TTS (Text-to-Speech) 模块
|
||||
|
||||
提供统一的 TTS 引擎接口,支持多个 TTS 引擎的扩展。
|
||||
当前支持: Edge-TTS
|
||||
"""
|
||||
|
||||
from .base import TTSEngine
|
||||
from .edge_tts_engine import EdgeTTSEngine
|
||||
from .factory import TTSEngineFactory, TTSEngineType
|
||||
from .service import TTSService
|
||||
|
||||
__all__ = [
|
||||
"TTSEngine",
|
||||
"EdgeTTSEngine",
|
||||
"TTSEngineFactory",
|
||||
"TTSEngineType",
|
||||
"TTSService",
|
||||
]
|
||||
71
tts/base.py
Normal file
71
tts/base.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""
|
||||
TTS 引擎基础接口定义
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class TTSEngine(ABC):
|
||||
"""
|
||||
抽象 TTS 引擎基类
|
||||
|
||||
所有 TTS 引擎实现都应继承此类并实现所有抽象方法。
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
language: str = "zh-CN",
|
||||
voice: Optional[str] = None,
|
||||
rate: float = 1.0,
|
||||
pitch: float = 1.0,
|
||||
) -> BytesIO:
|
||||
"""
|
||||
将文本合成为语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
language: 语言代码,默认 zh-CN (中文)
|
||||
voice: 声音/发音人 ID,如果为 None 则使用默认声音
|
||||
rate: 语速,1.0 为正常速度,范围通常为 0.5-2.0
|
||||
pitch: 音调,1.0 为正常音调,范围通常为 0.5-2.0
|
||||
|
||||
Returns:
|
||||
BytesIO 对象,包含合成后的音频数据
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
|
||||
"""
|
||||
获取指定语言支持的声音列表
|
||||
|
||||
Args:
|
||||
language: 语言代码
|
||||
|
||||
Returns:
|
||||
声音列表,每个元素是包含 name、voice_id 等信息的字典
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_engine_name(self) -> str:
|
||||
"""
|
||||
获取引擎名称
|
||||
|
||||
Returns:
|
||||
引擎名称
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_engine_version(self) -> str:
|
||||
"""
|
||||
获取引擎版本
|
||||
|
||||
Returns:
|
||||
版本号
|
||||
"""
|
||||
pass
|
||||
150
tts/edge_tts_engine.py
Normal file
150
tts/edge_tts_engine.py
Normal file
@ -0,0 +1,150 @@
|
||||
"""
|
||||
Edge-TTS 引擎实现
|
||||
"""
|
||||
import edge_tts
|
||||
from typing import Optional
|
||||
from io import BytesIO
|
||||
from .base import TTSEngine
|
||||
from utils.logger import logger
|
||||
|
||||
|
||||
class EdgeTTSEngine(TTSEngine):
|
||||
"""
|
||||
Microsoft Edge TTS 引擎实现
|
||||
|
||||
支持多种语言和声音,免费使用。
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化 Edge TTS 引擎"""
|
||||
self.engine_name = "edge-tts"
|
||||
self.engine_version = "1.0.0"
|
||||
logger.info(f"Initialized {self.engine_name} engine")
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
language: str = "zh-CN",
|
||||
voice: Optional[str] = None,
|
||||
rate: float = 1.0,
|
||||
pitch: float = 1.0,
|
||||
) -> BytesIO:
|
||||
"""
|
||||
使用 Edge TTS 将文本合成为语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
language: 语言代码,默认 zh-CN (中文)
|
||||
voice: 声音 ID,如果为 None 则使用语言默认声音
|
||||
rate: 语速,1.0 为正常速度
|
||||
pitch: 音调,1.0 为正常音调
|
||||
|
||||
Returns:
|
||||
BytesIO 对象,包含合成后的 MP3 音频数据
|
||||
"""
|
||||
try:
|
||||
# 如果没有指定声音,使用语言默认声音
|
||||
if voice is None:
|
||||
voice = self._get_default_voice(language)
|
||||
|
||||
logger.debug(
|
||||
f"Synthesizing text with Edge TTS - "
|
||||
f"language={language}, voice={voice}, rate={rate}, pitch={pitch}"
|
||||
)
|
||||
|
||||
# 构建速率和音调字符串(+/-值的百分比形式)
|
||||
rate_str = f"{(rate - 1) * 100:+.0f}%"
|
||||
pitch_str = f"{(pitch - 1) * 100:+.0f}Hz"
|
||||
|
||||
# 创建 Edge TTS 客户端并合成
|
||||
communicate = edge_tts.Communicate(
|
||||
text=text,
|
||||
voice=voice,
|
||||
rate=rate_str,
|
||||
pitch=pitch_str,
|
||||
)
|
||||
|
||||
# 收集所有音频数据块
|
||||
audio_data = BytesIO()
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
audio_data.write(chunk["data"])
|
||||
|
||||
audio_data.seek(0)
|
||||
logger.debug(
|
||||
f"Successfully synthesized text. Audio size: {audio_data.getbuffer().nbytes} bytes"
|
||||
)
|
||||
return audio_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error synthesizing text with Edge TTS: {str(e)}")
|
||||
raise
|
||||
|
||||
async def get_supported_voices(self, language: str = "zh-CN") -> list[dict]:
|
||||
"""
|
||||
获取指定语言支持的声音列表
|
||||
|
||||
Args:
|
||||
language: 语言代码,例如 'zh-CN'、'en-US' 等
|
||||
|
||||
Returns:
|
||||
声音列表,包含 name、voice_id、locale 等信息
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Fetching supported voices for language: {language}")
|
||||
voices = await edge_tts.list_voices()
|
||||
|
||||
# 筛选指定语言的声音
|
||||
filtered_voices = [
|
||||
{
|
||||
"name": voice.get("ShortName", ""),
|
||||
"voice_id": voice.get("ShortName", ""),
|
||||
"locale": voice.get("Locale", ""),
|
||||
"display_name": voice.get("DisplayName", ""),
|
||||
"gender": voice.get("Gender", ""),
|
||||
}
|
||||
for voice in voices
|
||||
if voice.get("Locale", "").startswith(language.split("-")[0])
|
||||
]
|
||||
|
||||
logger.debug(f"Found {len(filtered_voices)} voices for language {language}")
|
||||
return filtered_voices
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching supported voices: {str(e)}")
|
||||
raise
|
||||
|
||||
def get_engine_name(self) -> str:
|
||||
"""获取引擎名称"""
|
||||
return self.engine_name
|
||||
|
||||
def get_engine_version(self) -> str:
|
||||
"""获取引擎版本"""
|
||||
return self.engine_version
|
||||
|
||||
@staticmethod
|
||||
def _get_default_voice(language: str) -> str:
|
||||
"""
|
||||
获取指定语言的默认声音
|
||||
|
||||
Args:
|
||||
language: 语言代码
|
||||
|
||||
Returns:
|
||||
默认声音 ID
|
||||
"""
|
||||
# 预定义的语言默认声音映射表
|
||||
default_voices = {
|
||||
"zh-CN": "zh-CN-XiaoxiaoNeural", # 中文(简体)- 晓晓
|
||||
"zh-TW": "zh-TW-HsiaoChen", # 中文(繁体)
|
||||
"en-US": "en-US-AriaNeural", # 英文(美国)
|
||||
"en-GB": "en-GB-SoniaNeural", # 英文(英国)
|
||||
"ja-JP": "ja-JP-NanamiNeural", # 日语
|
||||
"ko-KR": "ko-KR-SunHiNeural", # 韩语
|
||||
"fr-FR": "fr-FR-CelesteNeural", # 法语
|
||||
"de-DE": "de-DE-ConraadNeural", # 德语
|
||||
"es-ES": "es-ES-AlvaroNeural", # 西班牙语
|
||||
"ru-RU": "ru-RU-DmitryNeural", # 俄语
|
||||
}
|
||||
|
||||
return default_voices.get(language, "zh-CN-XiaoxiaoNeural")
|
||||
118
tts/examples.py
Normal file
118
tts/examples.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""
|
||||
TTS 模块使用示例
|
||||
|
||||
演示如何使用 TTS 引擎和服务。
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from io import BytesIO
|
||||
|
||||
# 示例 1: 直接使用 Edge-TTS 引擎
|
||||
async def example_direct_engine():
|
||||
"""直接使用 EdgeTTSEngine"""
|
||||
from tts.edge_tts_engine import EdgeTTSEngine
|
||||
|
||||
engine = EdgeTTSEngine()
|
||||
print(f"Engine: {engine.get_engine_name()} v{engine.get_engine_version()}")
|
||||
|
||||
# 合成语音
|
||||
text = "你好,我是语音合成助手。"
|
||||
audio = await engine.synthesize(text, language="zh-CN")
|
||||
print(f"Audio synthesized: {audio.getbuffer().nbytes} bytes")
|
||||
|
||||
# 获取支持的声音
|
||||
voices = await engine.get_supported_voices("zh-CN")
|
||||
print(f"Supported voices: {len(voices)} found")
|
||||
for voice in voices[:3]:
|
||||
print(f" - {voice['display_name']} ({voice['name']})")
|
||||
|
||||
|
||||
# 示例 2: 使用工厂模式创建引擎
|
||||
async def example_factory():
|
||||
"""使用 TTSEngineFactory 创建引擎"""
|
||||
from tts.factory import TTSEngineFactory
|
||||
|
||||
# 创建 Edge-TTS 引擎
|
||||
engine = TTSEngineFactory.create("edge-tts")
|
||||
print(f"\nUsing {engine.get_engine_name()} engine")
|
||||
|
||||
# 合成多种语言
|
||||
texts = {
|
||||
"zh-CN": "你好,世界!",
|
||||
"en-US": "Hello, World!",
|
||||
"ja-JP": "こんにちは、世界!",
|
||||
}
|
||||
|
||||
for language, text in texts.items():
|
||||
audio = await engine.synthesize(text, language=language)
|
||||
print(f"Synthesized {language}: {audio.getbuffer().nbytes} bytes")
|
||||
|
||||
|
||||
# 示例 3: 使用高级服务
|
||||
async def example_service():
|
||||
"""使用 TTSService 高级接口"""
|
||||
from tts.service import TTSService
|
||||
|
||||
# 获取引擎信息
|
||||
info = TTSService.get_engine_info()
|
||||
print(f"\nTTS Service Info: {info}")
|
||||
|
||||
# 使用默认配置合成
|
||||
text = "使用服务默认配置合成语音。"
|
||||
audio = await TTSService.synthesize(text)
|
||||
print(f"Synthesized with defaults: {audio.getbuffer().nbytes} bytes")
|
||||
|
||||
# 使用自定义参数合成
|
||||
text = "这是一个更快的语音示例。"
|
||||
audio = await TTSService.synthesize(text, rate=1.2)
|
||||
print(f"Synthesized with rate=1.2: {audio.getbuffer().nbytes} bytes")
|
||||
|
||||
# 获取声音列表
|
||||
voices = await TTSService.get_supported_voices()
|
||||
print(f"Available voices: {len(voices)} found")
|
||||
|
||||
|
||||
# 示例 4: 保存合成的音频到文件
|
||||
async def example_save_audio():
|
||||
"""合成语音并保存到文件"""
|
||||
from tts.service import TTSService
|
||||
|
||||
text = "这是一个保存到文件的语音示例。"
|
||||
audio = await TTSService.synthesize(text)
|
||||
|
||||
# 保存为 MP3 文件
|
||||
output_file = "output_audio.mp3"
|
||||
with open(output_file, "wb") as f:
|
||||
f.write(audio.getvalue())
|
||||
print(f"\nAudio saved to {output_file}")
|
||||
|
||||
|
||||
async def main():
|
||||
"""运行所有示例"""
|
||||
print("=" * 50)
|
||||
print("TTS Module Examples")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
print("\n1. Direct Engine Usage")
|
||||
print("-" * 50)
|
||||
await example_direct_engine()
|
||||
|
||||
print("\n2. Factory Pattern")
|
||||
print("-" * 50)
|
||||
await example_factory()
|
||||
|
||||
print("\n3. Service Interface")
|
||||
print("-" * 50)
|
||||
await example_service()
|
||||
|
||||
print("\n4. Save Audio to File")
|
||||
print("-" * 50)
|
||||
await example_save_audio()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
112
tts/factory.py
Normal file
112
tts/factory.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""
|
||||
TTS 引擎工厂类
|
||||
"""
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from .base import TTSEngine
|
||||
from .edge_tts_engine import EdgeTTSEngine
|
||||
from utils.logger import logger
|
||||
|
||||
|
||||
class TTSEngineType(Enum):
|
||||
"""支持的 TTS 引擎类型"""
|
||||
|
||||
EDGE_TTS = "edge-tts"
|
||||
# 可以在这里添加更多引擎类型
|
||||
# GOOGLE_TTS = "google-tts"
|
||||
# BAIDU_TTS = "baidu-tts"
|
||||
# AZURE_TTS = "azure-tts"
|
||||
|
||||
|
||||
class TTSEngineFactory:
|
||||
"""
|
||||
TTS 引擎工厂
|
||||
|
||||
负责创建和管理 TTS 引擎实例。支持多引擎扩展。
|
||||
"""
|
||||
|
||||
_engines = {
|
||||
TTSEngineType.EDGE_TTS: EdgeTTSEngine,
|
||||
# 添加其他引擎实现时在这里注册
|
||||
}
|
||||
|
||||
_instances: dict[TTSEngineType, TTSEngine] = {}
|
||||
|
||||
@classmethod
|
||||
def create(cls, engine_type: str | TTSEngineType) -> TTSEngine:
|
||||
"""
|
||||
创建 TTS 引擎实例(单例模式)
|
||||
|
||||
Args:
|
||||
engine_type: 引擎类型,可以是字符串或 TTSEngineType 枚举
|
||||
|
||||
Returns:
|
||||
TTSEngine 实例
|
||||
|
||||
Raises:
|
||||
ValueError: 如果指定的引擎类型不支持
|
||||
"""
|
||||
# 转换为 TTSEngineType
|
||||
if isinstance(engine_type, str):
|
||||
try:
|
||||
engine_type = TTSEngineType(engine_type)
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"Unsupported TTS engine type: {engine_type}. "
|
||||
f"Supported types: {[e.value for e in TTSEngineType]}"
|
||||
)
|
||||
|
||||
# 返回已缓存的实例或创建新实例
|
||||
if engine_type not in cls._instances:
|
||||
if engine_type not in cls._engines:
|
||||
raise ValueError(
|
||||
f"TTS engine '{engine_type.value}' is not registered. "
|
||||
f"Available engines: {list(cls._engines.keys())}"
|
||||
)
|
||||
|
||||
engine_class = cls._engines[engine_type]
|
||||
instance = engine_class()
|
||||
cls._instances[engine_type] = instance
|
||||
logger.info(f"Created TTS engine instance: {engine_type.value}")
|
||||
|
||||
return cls._instances[engine_type]
|
||||
|
||||
@classmethod
|
||||
def register_engine(
|
||||
cls, engine_type: str | TTSEngineType, engine_class: type[TTSEngine]
|
||||
) -> None:
|
||||
"""
|
||||
注册新的 TTS 引擎类型
|
||||
|
||||
Args:
|
||||
engine_type: 引擎类型标识
|
||||
engine_class: 引擎类,必须继承 TTSEngine
|
||||
|
||||
Raises:
|
||||
TypeError: 如果 engine_class 不是 TTSEngine 的子类
|
||||
"""
|
||||
if not issubclass(engine_class, TTSEngine):
|
||||
raise TypeError(f"{engine_class} must be a subclass of TTSEngine")
|
||||
|
||||
# 转换为 TTSEngineType
|
||||
if isinstance(engine_type, str):
|
||||
engine_type = TTSEngineType(engine_type)
|
||||
|
||||
cls._engines[engine_type] = engine_class
|
||||
logger.info(f"Registered TTS engine: {engine_type.value}")
|
||||
|
||||
@classmethod
|
||||
def get_supported_engines(cls) -> list[str]:
|
||||
"""
|
||||
获取所有支持的引擎类型
|
||||
|
||||
Returns:
|
||||
支持的引擎类型列表
|
||||
"""
|
||||
return [e.value for e in TTSEngineType]
|
||||
|
||||
@classmethod
|
||||
def clear_instances(cls) -> None:
|
||||
"""清空所有引擎实例缓存"""
|
||||
cls._instances.clear()
|
||||
logger.debug("Cleared TTS engine instances cache")
|
||||
119
tts/service.py
Normal file
119
tts/service.py
Normal file
@ -0,0 +1,119 @@
|
||||
"""
|
||||
TTS 服务集成模块
|
||||
|
||||
提供高层 TTS 服务接口,直接使用配置文件中的 TTS 设置。
|
||||
"""
|
||||
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
from config.settings import settings
|
||||
from tts.factory import TTSEngineFactory
|
||||
from tts.base import TTSEngine
|
||||
from utils.logger import logger
|
||||
|
||||
|
||||
class TTSService:
|
||||
"""
|
||||
TTS 服务
|
||||
|
||||
提供统一的 TTS 调用接口,自动使用配置文件中的引擎和参数。
|
||||
"""
|
||||
|
||||
_engine: Optional[TTSEngine] = None
|
||||
|
||||
@classmethod
|
||||
def _get_engine(cls) -> TTSEngine:
|
||||
"""
|
||||
获取 TTS 引擎实例
|
||||
|
||||
Returns:
|
||||
TTSEngine 实例
|
||||
"""
|
||||
if cls._engine is None:
|
||||
cls._engine = TTSEngineFactory.create(settings.TTS_ENGINE)
|
||||
logger.info(
|
||||
f"TTS Service initialized with engine: {settings.TTS_ENGINE}"
|
||||
)
|
||||
return cls._engine
|
||||
|
||||
@classmethod
|
||||
async def synthesize(
|
||||
cls,
|
||||
text: str,
|
||||
language: Optional[str] = None,
|
||||
voice: Optional[str] = None,
|
||||
rate: Optional[float] = None,
|
||||
pitch: Optional[float] = None,
|
||||
) -> BytesIO:
|
||||
"""
|
||||
将文本合成为语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
language: 语言代码,默认使用配置文件中的 TTS_LANGUAGE
|
||||
voice: 声音 ID,默认使用配置文件中的 TTS_VOICE
|
||||
rate: 语速,默认使用配置文件中的 TTS_RATE
|
||||
pitch: 音调,默认使用配置文件中的 TTS_PITCH
|
||||
|
||||
Returns:
|
||||
BytesIO 对象,包含合成后的音频数据
|
||||
"""
|
||||
engine = cls._get_engine()
|
||||
|
||||
# 使用配置文件中的默认值
|
||||
language = language or settings.TTS_LANGUAGE
|
||||
voice = voice or settings.TTS_VOICE or None
|
||||
rate = rate or settings.TTS_RATE
|
||||
pitch = pitch or settings.TTS_PITCH
|
||||
|
||||
logger.debug(f"Synthesizing text with TTS Service: {text[:50]}...")
|
||||
|
||||
return await engine.synthesize(
|
||||
text=text,
|
||||
language=language,
|
||||
voice=voice,
|
||||
rate=rate,
|
||||
pitch=pitch,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def get_supported_voices(cls, language: Optional[str] = None) -> list[dict]:
|
||||
"""
|
||||
获取支持的声音列表
|
||||
|
||||
Args:
|
||||
language: 语言代码,默认使用配置文件中的 TTS_LANGUAGE
|
||||
|
||||
Returns:
|
||||
声音列表
|
||||
"""
|
||||
engine = cls._get_engine()
|
||||
language = language or settings.TTS_LANGUAGE
|
||||
return await engine.get_supported_voices(language)
|
||||
|
||||
@classmethod
|
||||
def get_engine_info(cls) -> dict:
|
||||
"""
|
||||
获取当前 TTS 引擎信息
|
||||
|
||||
Returns:
|
||||
包含引擎名称、版本、当前配置等信息的字典
|
||||
"""
|
||||
engine = cls._get_engine()
|
||||
return {
|
||||
"engine_name": engine.get_engine_name(),
|
||||
"engine_version": engine.get_engine_version(),
|
||||
"config": {
|
||||
"language": settings.TTS_LANGUAGE,
|
||||
"voice": settings.TTS_VOICE or "default",
|
||||
"rate": settings.TTS_RATE,
|
||||
"pitch": settings.TTS_PITCH,
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def reset_engine(cls) -> None:
|
||||
"""重置 TTS 引擎实例(仅在切换引擎时需要调用)"""
|
||||
cls._engine = None
|
||||
TTSEngineFactory.clear_instances()
|
||||
logger.info("TTS Service engine reset")
|
||||
Reference in New Issue
Block a user