Files
meme/tts/CONFIG_TEMPLATE.md
konjacpotato 6772699cfe
Some checks failed
Gitea Actions Demo / deploy (push) Failing after 2s
commit code
2025-12-29 19:34:39 +08:00

345 lines
9.0 KiB
Markdown
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# CosyVoice 配置模板
## .env 文件配置示例
将以下内容添加到项目的 `.env` 文件中:
```env
# CosyVoice API 配置
COSYVOICE_API_URL=http://192.168.1.200:8000/tts/zero_shot
COSYVOICE_TIMEOUT=30
# TTS 引擎选择 (可选)
TTS_ENGINE=cosyvoice # 或 edge-tts
```
## config/app.py 配置示例
添加以下代码到配置文件中:
```python
from pydantic_settings import BaseSettings
from typing import Optional
class CosyVoiceSettings(BaseSettings):
"""CosyVoice 配置"""
api_url: str = "http://192.168.1.200:8000/tts/zero_shot"
timeout: float = 30.0
class Config:
env_prefix = "COSYVOICE_"
class Settings(BaseSettings):
"""应用程序设置"""
# ... 其他设置 ...
# TTS 设置
default_tts_engine: str = "cosyvoice" # 默认使用 cosyvoice
cosyvoice: CosyVoiceSettings = CosyVoiceSettings()
class Config:
env_file = ".env"
```
## 应用程序初始化示例
`main.py` 中初始化 CosyVoice
```python
from fastapi import FastAPI
from tts.factory import TTSEngineFactory
from config.app import settings
app = FastAPI()
@app.on_event("startup")
async def startup():
"""应用启动时初始化 TTS 引擎"""
logger.info("Initializing TTS engines...")
# 预加载 CosyVoice 引擎
try:
engine = TTSEngineFactory.create(settings.default_tts_engine)
logger.info(f"TTS engine initialized: {engine.get_engine_name()}")
except Exception as e:
logger.error(f"Failed to initialize TTS engine: {e}")
# 可以在这里设置备用引擎
@app.on_event("shutdown")
async def shutdown():
"""应用关闭时清理资源"""
logger.info("Cleaning up TTS engines...")
# 清空引擎缓存
TTSEngineFactory.clear_instances()
```
## FastAPI 路由配置示例
创建 `api/v1/tts_cosyvoice_routes.py`
```python
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from tts.factory import TTSEngineFactory
from tts.cosyvoice_engine import CosyVoiceEngine
from utils.logger import logger
from fastapi.responses import StreamingResponse
import io
router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
class SynthesizeRequest(BaseModel):
"""语音合成请求"""
text: str
speaker_id: str
language: str = "zh-CN"
class SynthesizeResponse(BaseModel):
"""语音合成响应"""
status: str
size: int
message: str = ""
@router.post("/cosyvoice/synthesize", response_model=SynthesizeResponse)
async def synthesize_with_cosyvoice(request: SynthesizeRequest):
"""
使用 CosyVoice 合成语音
Args:
text: 要合成的文本
speaker_id: 发音人 ID (zero_shot_spk_id)
language: 语言代码,默认 zh-CN
Returns:
包含音频大小的响应
"""
try:
if not request.text:
raise ValueError("text cannot be empty")
if not request.speaker_id:
raise ValueError("speaker_id is required")
logger.debug(f"Synthesizing: {request.text[:50]}...")
# 创建 CosyVoice 引擎
engine = TTSEngineFactory.create("cosyvoice")
# 合成语音
audio = await engine.synthesize(
text=request.text,
voice=request.speaker_id,
language=request.language
)
logger.info(f"Synthesis successful: {len(audio.getvalue())} bytes")
return SynthesizeResponse(
status="success",
size=len(audio.getvalue()),
message="Synthesis completed successfully"
)
except ValueError as e:
logger.warning(f"Validation error: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Synthesis error: {e}")
raise HTTPException(status_code=500, detail="TTS synthesis failed")
@router.post("/cosyvoice/synthesize-audio")
async def synthesize_and_download(request: SynthesizeRequest):
"""
使用 CosyVoice 合成语音并返回音频文件
Args:
text: 要合成的文本
speaker_id: 发音人 ID
language: 语言代码
Returns:
音频文件流
"""
try:
engine = TTSEngineFactory.create("cosyvoice")
audio = await engine.synthesize(
text=request.text,
voice=request.speaker_id,
language=request.language
)
return StreamingResponse(
io.BytesIO(audio.getvalue()),
media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=synthesis.wav"}
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Synthesis error: {e}")
raise HTTPException(status_code=500, detail="TTS synthesis failed")
@router.get("/cosyvoice/info")
async def get_cosyvoice_info():
"""获取 CosyVoice 引擎信息"""
try:
engine = TTSEngineFactory.create("cosyvoice")
return {
"name": engine.get_engine_name(),
"version": engine.get_engine_version(),
"type": "cosyvoice",
"api_url": "http://192.168.1.200:8000/tts/zero_shot",
"requires_speaker_id": True,
"supported_languages": ["zh-CN"]
}
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to get engine info")
@router.get("/supported-engines")
async def get_supported_engines():
"""获取所有支持的 TTS 引擎"""
from tts.factory import TTSEngineFactory
engines = TTSEngineFactory.get_supported_engines()
return {
"supported_engines": engines,
"count": len(engines)
}
```
## 在现有路由中添加 CosyVoice 支持
如果已有 `api/v1/tts_routes.py`,可以添加 CosyVoice 端点:
```python
# 在现有路由中添加
from tts.factory import TTSEngineFactory
@router.post("/synthesize")
async def synthesize(text: str, engine: str = "edge-tts", voice: str = None):
"""
使用指定引擎合成语音
Args:
text: 要合成的文本
engine: 引擎类型 (edge-tts 或 cosyvoice)
voice: 声音/发音人 ID (对于 cosyvoice 必需)
"""
try:
tts_engine = TTSEngineFactory.create(engine)
if engine == "cosyvoice" and not voice:
raise ValueError("voice parameter is required for cosyvoice engine")
audio = await tts_engine.synthesize(
text=text,
voice=voice
)
return {
"status": "success",
"engine": engine,
"size": len(audio.getvalue())
}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
```
## Docker 环境配置
如果使用 Docker`Dockerfile` 中确保已安装 httpx
```dockerfile
FROM python:3.10-slim
WORKDIR /app
# 复制 requirements.txt 并安装依赖
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 确保 httpx 已安装
RUN pip install --no-cache-dir httpx>=0.24.0
COPY . .
CMD ["python", "main.py"]
```
## 发音人 ID 配置
创建 `config/speakers.py` 管理发音人列表:
```python
"""
发音人 ID 配置
根据实际部署的 CosyVoice 服务配置发音人列表
"""
COSYVOICE_SPEAKERS = {
"female_standard": {
"id": "female_standard_speaker_001",
"name": "女性标准发音",
"description": "CosyVoice 女性标准发音人",
"gender": "female",
"language": "zh-CN"
},
"female_gentle": {
"id": "female_gentle_speaker_001",
"name": "女性温柔发音",
"description": "CosyVoice 女性温柔发音人",
"gender": "female",
"language": "zh-CN"
},
"male_standard": {
"id": "male_standard_speaker_001",
"name": "男性标准发音",
"description": "CosyVoice 男性标准发音人",
"gender": "male",
"language": "zh-CN"
},
# 根据实际情况添加更多发音人
}
def get_speaker_id(speaker_key: str) -> str:
"""获取发音人 ID"""
speaker = COSYVOICE_SPEAKERS.get(speaker_key)
if not speaker:
raise ValueError(f"Unknown speaker: {speaker_key}")
return speaker["id"]
def get_all_speakers():
"""获取所有发音人列表"""
return COSYVOICE_SPEAKERS
```
在路由中使用:
```python
from config.speakers import get_speaker_id
@router.post("/tts/synthesize")
async def synthesize(text: str, speaker: str = "female_standard"):
"""使用命名发音人合成语音"""
try:
speaker_id = get_speaker_id(speaker)
engine = TTSEngineFactory.create("cosyvoice")
audio = await engine.synthesize(text=text, voice=speaker_id)
return {"status": "success"}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
```
---
选择适合您项目的配置方式,并根据实际情况调整参数。