This commit is contained in:
344
tts/CONFIG_TEMPLATE.md
Normal file
344
tts/CONFIG_TEMPLATE.md
Normal file
@ -0,0 +1,344 @@
|
||||
# CosyVoice 配置模板
|
||||
|
||||
## .env 文件配置示例
|
||||
|
||||
将以下内容添加到项目的 `.env` 文件中:
|
||||
|
||||
```env
|
||||
# CosyVoice API 配置
|
||||
COSYVOICE_API_URL=http://192.168.1.200:8000/tts/zero_shot
|
||||
COSYVOICE_TIMEOUT=30
|
||||
|
||||
# TTS 引擎选择 (可选)
|
||||
TTS_ENGINE=cosyvoice # 或 edge-tts
|
||||
```
|
||||
|
||||
## config/app.py 配置示例
|
||||
|
||||
添加以下代码到配置文件中:
|
||||
|
||||
```python
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
|
||||
class CosyVoiceSettings(BaseSettings):
|
||||
"""CosyVoice 配置"""
|
||||
api_url: str = "http://192.168.1.200:8000/tts/zero_shot"
|
||||
timeout: float = 30.0
|
||||
|
||||
class Config:
|
||||
env_prefix = "COSYVOICE_"
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""应用程序设置"""
|
||||
# ... 其他设置 ...
|
||||
|
||||
# TTS 设置
|
||||
default_tts_engine: str = "cosyvoice" # 默认使用 cosyvoice
|
||||
cosyvoice: CosyVoiceSettings = CosyVoiceSettings()
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
```
|
||||
|
||||
## 应用程序初始化示例
|
||||
|
||||
在 `main.py` 中初始化 CosyVoice:
|
||||
|
||||
```python
|
||||
from fastapi import FastAPI
|
||||
from tts.factory import TTSEngineFactory
|
||||
from config.app import settings
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
"""应用启动时初始化 TTS 引擎"""
|
||||
logger.info("Initializing TTS engines...")
|
||||
|
||||
# 预加载 CosyVoice 引擎
|
||||
try:
|
||||
engine = TTSEngineFactory.create(settings.default_tts_engine)
|
||||
logger.info(f"TTS engine initialized: {engine.get_engine_name()}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize TTS engine: {e}")
|
||||
# 可以在这里设置备用引擎
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown():
|
||||
"""应用关闭时清理资源"""
|
||||
logger.info("Cleaning up TTS engines...")
|
||||
|
||||
# 清空引擎缓存
|
||||
TTSEngineFactory.clear_instances()
|
||||
```
|
||||
|
||||
## FastAPI 路由配置示例
|
||||
|
||||
创建 `api/v1/tts_cosyvoice_routes.py`:
|
||||
|
||||
```python
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from tts.factory import TTSEngineFactory
|
||||
from tts.cosyvoice_engine import CosyVoiceEngine
|
||||
from utils.logger import logger
|
||||
from fastapi.responses import StreamingResponse
|
||||
import io
|
||||
|
||||
router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
|
||||
|
||||
class SynthesizeRequest(BaseModel):
|
||||
"""语音合成请求"""
|
||||
text: str
|
||||
speaker_id: str
|
||||
language: str = "zh-CN"
|
||||
|
||||
class SynthesizeResponse(BaseModel):
|
||||
"""语音合成响应"""
|
||||
status: str
|
||||
size: int
|
||||
message: str = ""
|
||||
|
||||
@router.post("/cosyvoice/synthesize", response_model=SynthesizeResponse)
|
||||
async def synthesize_with_cosyvoice(request: SynthesizeRequest):
|
||||
"""
|
||||
使用 CosyVoice 合成语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
speaker_id: 发音人 ID (zero_shot_spk_id)
|
||||
language: 语言代码,默认 zh-CN
|
||||
|
||||
Returns:
|
||||
包含音频大小的响应
|
||||
"""
|
||||
try:
|
||||
if not request.text:
|
||||
raise ValueError("text cannot be empty")
|
||||
|
||||
if not request.speaker_id:
|
||||
raise ValueError("speaker_id is required")
|
||||
|
||||
logger.debug(f"Synthesizing: {request.text[:50]}...")
|
||||
|
||||
# 创建 CosyVoice 引擎
|
||||
engine = TTSEngineFactory.create("cosyvoice")
|
||||
|
||||
# 合成语音
|
||||
audio = await engine.synthesize(
|
||||
text=request.text,
|
||||
voice=request.speaker_id,
|
||||
language=request.language
|
||||
)
|
||||
|
||||
logger.info(f"Synthesis successful: {len(audio.getvalue())} bytes")
|
||||
|
||||
return SynthesizeResponse(
|
||||
status="success",
|
||||
size=len(audio.getvalue()),
|
||||
message="Synthesis completed successfully"
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
logger.warning(f"Validation error: {e}")
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Synthesis error: {e}")
|
||||
raise HTTPException(status_code=500, detail="TTS synthesis failed")
|
||||
|
||||
|
||||
@router.post("/cosyvoice/synthesize-audio")
|
||||
async def synthesize_and_download(request: SynthesizeRequest):
|
||||
"""
|
||||
使用 CosyVoice 合成语音并返回音频文件
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
speaker_id: 发音人 ID
|
||||
language: 语言代码
|
||||
|
||||
Returns:
|
||||
音频文件流
|
||||
"""
|
||||
try:
|
||||
engine = TTSEngineFactory.create("cosyvoice")
|
||||
|
||||
audio = await engine.synthesize(
|
||||
text=request.text,
|
||||
voice=request.speaker_id,
|
||||
language=request.language
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(audio.getvalue()),
|
||||
media_type="audio/wav",
|
||||
headers={"Content-Disposition": "attachment; filename=synthesis.wav"}
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Synthesis error: {e}")
|
||||
raise HTTPException(status_code=500, detail="TTS synthesis failed")
|
||||
|
||||
|
||||
@router.get("/cosyvoice/info")
|
||||
async def get_cosyvoice_info():
|
||||
"""获取 CosyVoice 引擎信息"""
|
||||
try:
|
||||
engine = TTSEngineFactory.create("cosyvoice")
|
||||
|
||||
return {
|
||||
"name": engine.get_engine_name(),
|
||||
"version": engine.get_engine_version(),
|
||||
"type": "cosyvoice",
|
||||
"api_url": "http://192.168.1.200:8000/tts/zero_shot",
|
||||
"requires_speaker_id": True,
|
||||
"supported_languages": ["zh-CN"]
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail="Failed to get engine info")
|
||||
|
||||
|
||||
@router.get("/supported-engines")
|
||||
async def get_supported_engines():
|
||||
"""获取所有支持的 TTS 引擎"""
|
||||
from tts.factory import TTSEngineFactory
|
||||
|
||||
engines = TTSEngineFactory.get_supported_engines()
|
||||
return {
|
||||
"supported_engines": engines,
|
||||
"count": len(engines)
|
||||
}
|
||||
```
|
||||
|
||||
## 在现有路由中添加 CosyVoice 支持
|
||||
|
||||
如果已有 `api/v1/tts_routes.py`,可以添加 CosyVoice 端点:
|
||||
|
||||
```python
|
||||
# 在现有路由中添加
|
||||
from tts.factory import TTSEngineFactory
|
||||
|
||||
@router.post("/synthesize")
|
||||
async def synthesize(text: str, engine: str = "edge-tts", voice: str = None):
|
||||
"""
|
||||
使用指定引擎合成语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
engine: 引擎类型 (edge-tts 或 cosyvoice)
|
||||
voice: 声音/发音人 ID (对于 cosyvoice 必需)
|
||||
"""
|
||||
try:
|
||||
tts_engine = TTSEngineFactory.create(engine)
|
||||
|
||||
if engine == "cosyvoice" and not voice:
|
||||
raise ValueError("voice parameter is required for cosyvoice engine")
|
||||
|
||||
audio = await tts_engine.synthesize(
|
||||
text=text,
|
||||
voice=voice
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"engine": engine,
|
||||
"size": len(audio.getvalue())
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
```
|
||||
|
||||
## Docker 环境配置
|
||||
|
||||
如果使用 Docker,在 `Dockerfile` 中确保已安装 httpx:
|
||||
|
||||
```dockerfile
|
||||
FROM python:3.10-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 复制 requirements.txt 并安装依赖
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 确保 httpx 已安装
|
||||
RUN pip install --no-cache-dir httpx>=0.24.0
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["python", "main.py"]
|
||||
```
|
||||
|
||||
## 发音人 ID 配置
|
||||
|
||||
创建 `config/speakers.py` 管理发音人列表:
|
||||
|
||||
```python
|
||||
"""
|
||||
发音人 ID 配置
|
||||
|
||||
根据实际部署的 CosyVoice 服务配置发音人列表
|
||||
"""
|
||||
|
||||
COSYVOICE_SPEAKERS = {
|
||||
"female_standard": {
|
||||
"id": "female_standard_speaker_001",
|
||||
"name": "女性标准发音",
|
||||
"description": "CosyVoice 女性标准发音人",
|
||||
"gender": "female",
|
||||
"language": "zh-CN"
|
||||
},
|
||||
"female_gentle": {
|
||||
"id": "female_gentle_speaker_001",
|
||||
"name": "女性温柔发音",
|
||||
"description": "CosyVoice 女性温柔发音人",
|
||||
"gender": "female",
|
||||
"language": "zh-CN"
|
||||
},
|
||||
"male_standard": {
|
||||
"id": "male_standard_speaker_001",
|
||||
"name": "男性标准发音",
|
||||
"description": "CosyVoice 男性标准发音人",
|
||||
"gender": "male",
|
||||
"language": "zh-CN"
|
||||
},
|
||||
# 根据实际情况添加更多发音人
|
||||
}
|
||||
|
||||
def get_speaker_id(speaker_key: str) -> str:
|
||||
"""获取发音人 ID"""
|
||||
speaker = COSYVOICE_SPEAKERS.get(speaker_key)
|
||||
if not speaker:
|
||||
raise ValueError(f"Unknown speaker: {speaker_key}")
|
||||
return speaker["id"]
|
||||
|
||||
def get_all_speakers():
|
||||
"""获取所有发音人列表"""
|
||||
return COSYVOICE_SPEAKERS
|
||||
```
|
||||
|
||||
在路由中使用:
|
||||
|
||||
```python
|
||||
from config.speakers import get_speaker_id
|
||||
|
||||
@router.post("/tts/synthesize")
|
||||
async def synthesize(text: str, speaker: str = "female_standard"):
|
||||
"""使用命名发音人合成语音"""
|
||||
try:
|
||||
speaker_id = get_speaker_id(speaker)
|
||||
engine = TTSEngineFactory.create("cosyvoice")
|
||||
audio = await engine.synthesize(text=text, voice=speaker_id)
|
||||
return {"status": "success"}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
选择适合您项目的配置方式,并根据实际情况调整参数。
|
||||
Reference in New Issue
Block a user