This commit is contained in:
80
scheduler/job_story_portal.py
Normal file
80
scheduler/job_story_portal.py
Normal file
@ -0,0 +1,80 @@
|
||||
import json
|
||||
from utils.logger import logger
|
||||
import datetime
|
||||
import os
|
||||
import asyncio
|
||||
from models.script import Script
|
||||
from config.database import SessionLocal
|
||||
from llm.generate_daily_article import generate_daily_article
|
||||
|
||||
project_name = "故事任意门"
|
||||
|
||||
# for daily article generation
|
||||
def job_generate_daily_article():
|
||||
"""定时任务:生成每日文章并保存至数据库。"""
|
||||
|
||||
# 1. 调用 LLM 生成每日文章
|
||||
content = generate_daily_article()
|
||||
if not content:
|
||||
logger.warning("No daily article generated.")
|
||||
return
|
||||
|
||||
# 2. 保存至数据库
|
||||
# subject 以当前日期为准,格式 YYYY-MM-DD
|
||||
today_str = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||
article_title= content["阶段4_今日文章"]["文章标题"]
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# 查询是否已存在 project+subject 唯一记录
|
||||
script = db.query(Script).filter_by(project=project_name, subject=today_str).first()
|
||||
if script:
|
||||
# 存在则更新内容
|
||||
script.content = json.dumps(content, ensure_ascii=False, separators=(",", ":"))
|
||||
db.commit()
|
||||
logger.info(f"Updated script for {today_str} with {article_title}.")
|
||||
else:
|
||||
# 不存在则新建
|
||||
script = Script(
|
||||
project=project_name,
|
||||
subject=today_str,
|
||||
content=json.dumps(content, ensure_ascii=False, separators=(",", ":"))
|
||||
)
|
||||
db.add(script)
|
||||
db.commit()
|
||||
logger.info(f"Saved script for {today_str} with {article_title}.")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f"Failed to save/update script for {today_str}: {e}")
|
||||
|
||||
# 3. 生成音频
|
||||
try:
|
||||
from tts.service import TTSService
|
||||
|
||||
article_text = content["阶段4_今日文章"]["文章正文"]
|
||||
logger.debug(f"Synthesizing daily article audio for '{article_title}'")
|
||||
article_audio = asyncio.run(TTSService.synthesize(
|
||||
text=article_text,
|
||||
voice="yanglan",
|
||||
language="zh-CN"
|
||||
))
|
||||
|
||||
if not article_audio:
|
||||
logger.warning("No audio synthesized for daily article.")
|
||||
return
|
||||
|
||||
# 保存音频文件
|
||||
out_dir = os.path.join("output", project_name)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
safe_title = "_".join(article_title.split())
|
||||
audio_filename = f"{safe_title}_{today_str}.wav"
|
||||
audio_path = os.path.join(out_dir, audio_filename)
|
||||
with open(audio_path, "wb") as fw:
|
||||
fw.write(article_audio.getvalue())
|
||||
logger.info(f"Saved daily article audio to {audio_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to synthesize/save daily article audio: {e}")
|
||||
|
||||
# For manual testing
|
||||
if __name__ == "__main__":
|
||||
# 每日文章生成
|
||||
job_generate_daily_article()
|
||||
@ -31,7 +31,7 @@ def job_generate_topics():
|
||||
script = db.query(Script).filter_by(project="梗文化研究所", subject=today_str).first()
|
||||
if script:
|
||||
# 存在则更新内容
|
||||
script.content = json.dumps(content, ensure_ascii=False, indent=2)
|
||||
script.content = json.dumps(content, ensure_ascii=False, separators=(",", ":"))
|
||||
db.commit()
|
||||
logger.info(f"Updated script for {today_str} with {len(topics)} topics.")
|
||||
else:
|
||||
@ -39,7 +39,7 @@ def job_generate_topics():
|
||||
script = Script(
|
||||
project="梗文化研究所",
|
||||
subject=today_str,
|
||||
content=json.dumps(content, ensure_ascii=False, indent=2)
|
||||
content=json.dumps(content, ensure_ascii=False, separators=(",", ":"))
|
||||
)
|
||||
db.add(script)
|
||||
db.commit()
|
||||
@ -96,7 +96,7 @@ def job_generate_bits():
|
||||
logger.debug(f"Generated bits for meme '{meme_name}': {bit}")
|
||||
bits.append(bit)
|
||||
content = {"topics": topics, "bits": bits}
|
||||
script.content = json.dumps(content, ensure_ascii=False, indent=2)
|
||||
script.content = json.dumps(content, ensure_ascii=False, separators=(",", ":"))
|
||||
db.commit()
|
||||
logger.info(f"Saved bits for meme '{meme_name}' with {len(bits)} segments.")
|
||||
except Exception as e:
|
||||
@ -141,13 +141,13 @@ def job_generate_script():
|
||||
parts.append("角度:" + "; ".join(top.get("angles", [])))
|
||||
|
||||
research_text = "\n".join(parts)
|
||||
materials_text = research_text + "\n\n" + json.dumps(bits, ensure_ascii=False, indent=2)
|
||||
materials_text = research_text + "\n\n" + json.dumps(bits, ensure_ascii=False, separators=(",", ":"))
|
||||
|
||||
# 调用 LLM 生成完整脚本
|
||||
from llm.generate_podcast import generate_script
|
||||
full_script = generate_script(meme_name, materials_text)
|
||||
content = {"topics": topics, "bits": bits, "script": full_script}
|
||||
script.content = json.dumps(content, ensure_ascii=False, indent=2)
|
||||
script.content = json.dumps(content, ensure_ascii=False, separators=(",", ":"))
|
||||
db.commit()
|
||||
logger.info(f"Saved full script for meme '{meme_name}'.")
|
||||
except Exception as e:
|
||||
@ -183,10 +183,12 @@ def job_synthesize_podcast_audio():
|
||||
|
||||
# 角色到声音的映射(可按需扩展或放到配置中)
|
||||
role_voice_map = {
|
||||
"host": settings.TTS_VOICE or "zh-CN-XiaoxiaoNeural",
|
||||
"guest": "zh-CN-YunxiNeural",
|
||||
"host": settings.TTS_VOICE or "yanglan",
|
||||
"guest_a": "zhisheng",
|
||||
"guest_b": "trump",
|
||||
"guest_c": "tangseng",
|
||||
# fallback for other roles
|
||||
"default": settings.TTS_VOICE or "zh-CN-XiaoxiaoNeural",
|
||||
"default": settings.TTS_VOICE or "yanglan",
|
||||
}
|
||||
|
||||
segment_audio_bytes = []
|
||||
@ -210,46 +212,66 @@ def job_synthesize_podcast_audio():
|
||||
if not segment_audio_bytes:
|
||||
logger.warning("No audio segments synthesized; aborting podcast save.")
|
||||
return
|
||||
|
||||
# 保存每个分段为独立文件,并记录它们
|
||||
segment_out_dir = os.path.join("output", "segments")
|
||||
if os.path.exists(segment_out_dir):
|
||||
# 删除旧文件
|
||||
for f in os.listdir(segment_out_dir):
|
||||
os.remove(os.path.join(segment_out_dir, f))
|
||||
else:
|
||||
os.makedirs(segment_out_dir, exist_ok=True)
|
||||
segment_paths = []
|
||||
safe_title = "_".join(title.split())
|
||||
for idx, role, seg in sorted(segment_audio_bytes, key=lambda x: x[0]):
|
||||
seg.seek(0)
|
||||
seg_filename = f"{safe_title}_{script.subject}_{script.id}_seg{idx}_{role}.wav"
|
||||
seg_path = os.path.join(segment_out_dir, seg_filename)
|
||||
with open(seg_path, "wb") as fw:
|
||||
fw.write(seg.getvalue())
|
||||
segment_paths.append(seg_path)
|
||||
|
||||
logger.info(f"Saved {len(segment_paths)} segment files to {segment_out_dir}; combined file not created")
|
||||
|
||||
# 从segment_out_dir读取音频并合并
|
||||
segment_audio_to_combined_bytes = []
|
||||
for seg_path in segment_paths:
|
||||
with open(seg_path, "rb") as fr:
|
||||
audio_data = fr.read()
|
||||
from io import BytesIO
|
||||
segment_audio_to_combined_bytes.append((0, "segment", BytesIO(audio_data)))
|
||||
# seg = segment_audio_to_combined_bytes[0][2]
|
||||
# data = seg.getvalue()
|
||||
# print("LEN:", len(data))
|
||||
# print("HEAD (hex):", data[:32].hex())
|
||||
# print("HEAD (ascii):", data[:32])
|
||||
|
||||
# 保存或合并音频:优先使用 pydub (ffmpeg),否则保存为独立段文件
|
||||
out_dir = os.path.join("output", "podcasts")
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
safe_title = "_".join(title.split())
|
||||
final_filename = f"{safe_title}_{script.subject}_{script.id}.mp3"
|
||||
final_filename = f"{safe_title}_{script.subject}_{script.id}.wav"
|
||||
final_path = os.path.join(out_dir, final_filename)
|
||||
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
|
||||
combined = None
|
||||
for idx, role, seg in sorted(segment_audio_bytes, key=lambda x: x[0]):
|
||||
for idx, role, seg in sorted(segment_audio_to_combined_bytes, key=lambda x: x[0]):
|
||||
seg.seek(0)
|
||||
audio_seg = AudioSegment.from_file(seg, format="mp3")
|
||||
audio_seg = AudioSegment.from_file(seg, format="wav")
|
||||
if combined is None:
|
||||
combined = audio_seg
|
||||
else:
|
||||
combined = combined + audio_seg
|
||||
|
||||
if combined is not None:
|
||||
combined.export(final_path, format="mp3")
|
||||
combined.export(final_path, format="wav")
|
||||
logger.info(f"Saved combined podcast audio to {final_path}")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"pydub/ffmpeg not available or merge failed: {e}; falling back to per-segment files")
|
||||
|
||||
# 回退:保存每个分段为独立文件,并记录它们
|
||||
segment_paths = []
|
||||
for idx, role, seg in sorted(segment_audio_bytes, key=lambda x: x[0]):
|
||||
seg.seek(0)
|
||||
seg_filename = f"{safe_title}_{script.subject}_{script.id}_seg{idx}_{role}.mp3"
|
||||
seg_path = os.path.join(out_dir, seg_filename)
|
||||
with open(seg_path, "wb") as fw:
|
||||
fw.write(seg.getvalue())
|
||||
segment_paths.append(seg_path)
|
||||
|
||||
logger.info(f"Saved {len(segment_paths)} segment files to {out_dir}; combined file not created")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to synthesize/save podcast audio: {e}")
|
||||
finally:
|
||||
@ -257,7 +279,14 @@ def job_synthesize_podcast_audio():
|
||||
|
||||
# For manual testing
|
||||
if __name__ == "__main__":
|
||||
# 选题策划和背景素材搜集
|
||||
# job_generate_topics()
|
||||
|
||||
# 脱口秀段子创作
|
||||
# job_generate_bits()
|
||||
|
||||
# 完整播客脚本生成
|
||||
# job_generate_script()
|
||||
job_synthesize_podcast_audio()
|
||||
|
||||
# 播客音频合成
|
||||
# job_synthesize_podcast_audio()
|
||||
Reference in New Issue
Block a user