commit code

2025-12-29 19:34:39 +08:00
parent 87160c5265
commit 6772699cfe
22 changed files with 2268 additions and 70 deletions
--- a/tts/CONFIG_TEMPLATE.md
+++ b/tts/CONFIG_TEMPLATE.md
@ -0,0 +1,344 @@
+# CosyVoice 配置模板
+
+## .env 文件配置示例
+
+将以下内容添加到项目的 `.env` 文件中：
+
+```env
+# CosyVoice API 配置
+COSYVOICE_API_URL=http://192.168.1.200:8000/tts/zero_shot
+COSYVOICE_TIMEOUT=30
+
+# TTS 引擎选择 (可选)
+TTS_ENGINE=cosyvoice  # 或 edge-tts
+```
+
+## config/app.py 配置示例
+
+添加以下代码到配置文件中：
+
+```python
+from pydantic_settings import BaseSettings
+from typing import Optional
+
+class CosyVoiceSettings(BaseSettings):
+    """CosyVoice 配置"""
+    api_url: str = "http://192.168.1.200:8000/tts/zero_shot"
+    timeout: float = 30.0
+    
+    class Config:
+        env_prefix = "COSYVOICE_"
+
+
+class Settings(BaseSettings):
+    """应用程序设置"""
+    # ... 其他设置 ...
+    
+    # TTS 设置
+    default_tts_engine: str = "cosyvoice"  # 默认使用 cosyvoice
+    cosyvoice: CosyVoiceSettings = CosyVoiceSettings()
+    
+    class Config:
+        env_file = ".env"
+```
+
+## 应用程序初始化示例
+
+在 `main.py` 中初始化 CosyVoice：
+
+```python
+from fastapi import FastAPI
+from tts.factory import TTSEngineFactory
+from config.app import settings
+
+app = FastAPI()
+
+@app.on_event("startup")
+async def startup():
+    """应用启动时初始化 TTS 引擎"""
+    logger.info("Initializing TTS engines...")
+    
+    # 预加载 CosyVoice 引擎
+    try:
+        engine = TTSEngineFactory.create(settings.default_tts_engine)
+        logger.info(f"TTS engine initialized: {engine.get_engine_name()}")
+    except Exception as e:
+        logger.error(f"Failed to initialize TTS engine: {e}")
+        # 可以在这里设置备用引擎
+
+@app.on_event("shutdown")
+async def shutdown():
+    """应用关闭时清理资源"""
+    logger.info("Cleaning up TTS engines...")
+    
+    # 清空引擎缓存
+    TTSEngineFactory.clear_instances()
+```
+
+## FastAPI 路由配置示例
+
+创建 `api/v1/tts_cosyvoice_routes.py`：
+
+```python
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+from tts.factory import TTSEngineFactory
+from tts.cosyvoice_engine import CosyVoiceEngine
+from utils.logger import logger
+from fastapi.responses import StreamingResponse
+import io
+
+router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
+
+class SynthesizeRequest(BaseModel):
+    """语音合成请求"""
+    text: str
+    speaker_id: str
+    language: str = "zh-CN"
+
+class SynthesizeResponse(BaseModel):
+    """语音合成响应"""
+    status: str
+    size: int
+    message: str = ""
+
+@router.post("/cosyvoice/synthesize", response_model=SynthesizeResponse)
+async def synthesize_with_cosyvoice(request: SynthesizeRequest):
+    """
+    使用 CosyVoice 合成语音
+    
+    Args:
+        text: 要合成的文本
+        speaker_id: 发音人 ID (zero_shot_spk_id)
+        language: 语言代码，默认 zh-CN
+    
+    Returns:
+        包含音频大小的响应
+    """
+    try:
+        if not request.text:
+            raise ValueError("text cannot be empty")
+        
+        if not request.speaker_id:
+            raise ValueError("speaker_id is required")
+        
+        logger.debug(f"Synthesizing: {request.text[:50]}...")
+        
+        # 创建 CosyVoice 引擎
+        engine = TTSEngineFactory.create("cosyvoice")
+        
+        # 合成语音
+        audio = await engine.synthesize(
+            text=request.text,
+            voice=request.speaker_id,
+            language=request.language
+        )
+        
+        logger.info(f"Synthesis successful: {len(audio.getvalue())} bytes")
+        
+        return SynthesizeResponse(
+            status="success",
+            size=len(audio.getvalue()),
+            message="Synthesis completed successfully"
+        )
+    
+    except ValueError as e:
+        logger.warning(f"Validation error: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Synthesis error: {e}")
+        raise HTTPException(status_code=500, detail="TTS synthesis failed")
+
+
+@router.post("/cosyvoice/synthesize-audio")
+async def synthesize_and_download(request: SynthesizeRequest):
+    """
+    使用 CosyVoice 合成语音并返回音频文件
+    
+    Args:
+        text: 要合成的文本
+        speaker_id: 发音人 ID
+        language: 语言代码
+    
+    Returns:
+        音频文件流
+    """
+    try:
+        engine = TTSEngineFactory.create("cosyvoice")
+        
+        audio = await engine.synthesize(
+            text=request.text,
+            voice=request.speaker_id,
+            language=request.language
+        )
+        
+        return StreamingResponse(
+            io.BytesIO(audio.getvalue()),
+            media_type="audio/wav",
+            headers={"Content-Disposition": "attachment; filename=synthesis.wav"}
+        )
+    
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Synthesis error: {e}")
+        raise HTTPException(status_code=500, detail="TTS synthesis failed")
+
+
+@router.get("/cosyvoice/info")
+async def get_cosyvoice_info():
+    """获取 CosyVoice 引擎信息"""
+    try:
+        engine = TTSEngineFactory.create("cosyvoice")
+        
+        return {
+            "name": engine.get_engine_name(),
+            "version": engine.get_engine_version(),
+            "type": "cosyvoice",
+            "api_url": "http://192.168.1.200:8000/tts/zero_shot",
+            "requires_speaker_id": True,
+            "supported_languages": ["zh-CN"]
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail="Failed to get engine info")
+
+
+@router.get("/supported-engines")
+async def get_supported_engines():
+    """获取所有支持的 TTS 引擎"""
+    from tts.factory import TTSEngineFactory
+    
+    engines = TTSEngineFactory.get_supported_engines()
+    return {
+        "supported_engines": engines,
+        "count": len(engines)
+    }
+```
+
+## 在现有路由中添加 CosyVoice 支持
+
+如果已有 `api/v1/tts_routes.py`，可以添加 CosyVoice 端点：
+
+```python
+# 在现有路由中添加
+from tts.factory import TTSEngineFactory
+
+@router.post("/synthesize")
+async def synthesize(text: str, engine: str = "edge-tts", voice: str = None):
+    """
+    使用指定引擎合成语音
+    
+    Args:
+        text: 要合成的文本
+        engine: 引擎类型 (edge-tts 或 cosyvoice)
+        voice: 声音/发音人 ID (对于 cosyvoice 必需)
+    """
+    try:
+        tts_engine = TTSEngineFactory.create(engine)
+        
+        if engine == "cosyvoice" and not voice:
+            raise ValueError("voice parameter is required for cosyvoice engine")
+        
+        audio = await tts_engine.synthesize(
+            text=text,
+            voice=voice
+        )
+        
+        return {
+            "status": "success",
+            "engine": engine,
+            "size": len(audio.getvalue())
+        }
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+```
+
+## Docker 环境配置
+
+如果使用 Docker，在 `Dockerfile` 中确保已安装 httpx：
+
+```dockerfile
+FROM python:3.10-slim
+
+WORKDIR /app
+
+# 复制 requirements.txt 并安装依赖
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 确保 httpx 已安装
+RUN pip install --no-cache-dir httpx>=0.24.0
+
+COPY . .
+
+CMD ["python", "main.py"]
+```
+
+## 发音人 ID 配置
+
+创建 `config/speakers.py` 管理发音人列表：
+
+```python
+"""
+发音人 ID 配置
+
+根据实际部署的 CosyVoice 服务配置发音人列表
+"""
+
+COSYVOICE_SPEAKERS = {
+    "female_standard": {
+        "id": "female_standard_speaker_001",
+        "name": "女性标准发音",
+        "description": "CosyVoice 女性标准发音人",
+        "gender": "female",
+        "language": "zh-CN"
+    },
+    "female_gentle": {
+        "id": "female_gentle_speaker_001",
+        "name": "女性温柔发音",
+        "description": "CosyVoice 女性温柔发音人",
+        "gender": "female",
+        "language": "zh-CN"
+    },
+    "male_standard": {
+        "id": "male_standard_speaker_001",
+        "name": "男性标准发音",
+        "description": "CosyVoice 男性标准发音人",
+        "gender": "male",
+        "language": "zh-CN"
+    },
+    # 根据实际情况添加更多发音人
+}
+
+def get_speaker_id(speaker_key: str) -> str:
+    """获取发音人 ID"""
+    speaker = COSYVOICE_SPEAKERS.get(speaker_key)
+    if not speaker:
+        raise ValueError(f"Unknown speaker: {speaker_key}")
+    return speaker["id"]
+
+def get_all_speakers():
+    """获取所有发音人列表"""
+    return COSYVOICE_SPEAKERS
+```
+
+在路由中使用：
+
+```python
+from config.speakers import get_speaker_id
+
+@router.post("/tts/synthesize")
+async def synthesize(text: str, speaker: str = "female_standard"):
+    """使用命名发音人合成语音"""
+    try:
+        speaker_id = get_speaker_id(speaker)
+        engine = TTSEngineFactory.create("cosyvoice")
+        audio = await engine.synthesize(text=text, voice=speaker_id)
+        return {"status": "success"}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+```
+
+---
+
+选择适合您项目的配置方式，并根据实际情况调整参数。