add edge tts

This commit is contained in:
2025-11-28 20:27:10 +08:00
parent f796a3833b
commit 87160c5265
20 changed files with 3589 additions and 3 deletions

View File

@ -3,6 +3,8 @@ from llm import prompt
from utils.logger import logger
import datetime
from llm.generate_podcast import generate_topics
import os
import asyncio
from models.script import Script
from config.database import SessionLocal
@ -152,8 +154,110 @@ def job_generate_script():
db.rollback()
logger.error(f"Failed to generate/save full script: {e}")
def job_synthesize_podcast_audio():
"""定时任务:从数据库读取最新完整脚本,调用 TTS 服务生成整期播客音频并保存到磁盘。"""
logger.info("Starting job_synthesize_podcast_audio")
db = SessionLocal()
try:
script = db.query(Script).filter_by(project="梗文化研究所").order_by(Script.create_time.desc()).first()
if not script or not script.content:
logger.warning("No script found for synthesizing podcast audio.")
return
data = json.loads(script.content)
full_script = data.get("script") or {}
if not full_script:
logger.warning("No 'script' section found in latest Script record.")
return
title = full_script.get("title") or f"podcast_{script.id}"
script_items = full_script.get("script", [])
if not script_items:
logger.warning("Empty script items, nothing to synthesize.")
return
# 按段落分别合成(使用 TTSService 的 text/voice 参数),再拼接音频
from config.settings import settings
from tts.service import TTSService
# 角色到声音的映射(可按需扩展或放到配置中)
role_voice_map = {
"host": settings.TTS_VOICE or "zh-CN-XiaoxiaoNeural",
"guest": "zh-CN-YunxiNeural",
# fallback for other roles
"default": settings.TTS_VOICE or "zh-CN-XiaoxiaoNeural",
}
segment_audio_bytes = []
for idx, item in enumerate(script_items):
role = (item.get("role") or "").lower()
text = item.get("text", "").strip()
if not text:
continue
voice = role_voice_map.get(role, role_voice_map["default"]) if role else role_voice_map["default"]
try:
logger.debug(f"Synthesizing segment {idx} role={role} voice={voice} text='{text[:30]}...'")
seg_audio = asyncio.run(TTSService.synthesize(text=text, voice=voice, language=settings.TTS_LANGUAGE))
segment_audio_bytes.append((idx, role or "segment", seg_audio))
logger.debug(f"Synthesized segment {idx} role={role} size={seg_audio.getbuffer().nbytes}")
except Exception as e:
logger.error(f"Failed to synthesize segment {idx} (role={role}): {e}")
if not segment_audio_bytes:
logger.warning("No audio segments synthesized; aborting podcast save.")
return
# 保存或合并音频:优先使用 pydub (ffmpeg),否则保存为独立段文件
out_dir = os.path.join("output", "podcasts")
os.makedirs(out_dir, exist_ok=True)
safe_title = "_".join(title.split())
final_filename = f"{safe_title}_{script.subject}_{script.id}.mp3"
final_path = os.path.join(out_dir, final_filename)
try:
from pydub import AudioSegment
combined = None
for idx, role, seg in sorted(segment_audio_bytes, key=lambda x: x[0]):
seg.seek(0)
audio_seg = AudioSegment.from_file(seg, format="mp3")
if combined is None:
combined = audio_seg
else:
combined = combined + audio_seg
if combined is not None:
combined.export(final_path, format="mp3")
logger.info(f"Saved combined podcast audio to {final_path}")
return
except Exception as e:
logger.warning(f"pydub/ffmpeg not available or merge failed: {e}; falling back to per-segment files")
# 回退:保存每个分段为独立文件,并记录它们
segment_paths = []
for idx, role, seg in sorted(segment_audio_bytes, key=lambda x: x[0]):
seg.seek(0)
seg_filename = f"{safe_title}_{script.subject}_{script.id}_seg{idx}_{role}.mp3"
seg_path = os.path.join(out_dir, seg_filename)
with open(seg_path, "wb") as fw:
fw.write(seg.getvalue())
segment_paths.append(seg_path)
logger.info(f"Saved {len(segment_paths)} segment files to {out_dir}; combined file not created")
except Exception as e:
logger.error(f"Failed to synthesize/save podcast audio: {e}")
finally:
db.close()
# For manual testing
if __name__ == "__main__":
# job_generate_topics()
# job_generate_bits()
job_generate_script()
# job_generate_script()
job_synthesize_podcast_audio()