Files
edward/task/hot_topic/real_estate_story.py
konjacpotato aab9708fde
All checks were successful
Gitea Actions Demo / deploy (push) Successful in 26s
task: add short story logic
2026-02-15 21:21:48 +08:00

86 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from datetime import datetime, timedelta
import json
from task.manager_task import execute_task
from config.database import SessionLocal
from models import SourceContent, Article
from utils import logger
from llm import LLMThinkingEngine
def story_edit_task():
with SessionLocal() as db:
# 获取昨天至今的最新10条帖子信息
today_contents = db.query(SourceContent).filter(
SourceContent.create_time >= (datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1))
).order_by(SourceContent.create_time.desc()).limit(10).all()
if len(today_contents) == 0:
logger.info("story_edit_task finish, content size 0")
return
logger.info(f"story_edit_task get {len(today_contents)} contents")
llm_engine = LLMThinkingEngine(system_prompt_file="real_estate_story_system_prompt.txt")
for content in today_contents:
logger.info(f"story_edit_task content id: {content.id}, title: {content.link}, platform: {content.platform}")
story = llm_engine.think(f"故事素材:{content.content}")
logger.info(f"story_edit_task content id: {content.id} story: {story}")
# llm生成的结果有时不是json结构会在前后增加一些文本需要提取出json部分进行解析
try:
json_start = story.find("{")
json_end = story.rfind("}") + 1
if json_start != -1 and json_end != -1:
story = story[json_start:json_end]
else:
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式无法提取故事内容")
continue
except json.JSONDecodeError:
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式无法解析故事内容")
continue
# 将生成的故事写入Article表
json_story = json.loads(story)
title = json_story.get("title", "无标题")
paragraphs = json_story.get("body", ["无内容"])
article_content = "\n".join(paragraphs)
article = Article(
title=title,
keywords=None,
content=article_content,
used=False
)
db.add(article)
db.commit()
# break # 目前先处理一条内容,后续再改成批量处理
llm_engine = LLMThinkingEngine(system_prompt_file="real_estate_story_short_system_prompt.txt")
for content in today_contents:
logger.info(f"story_edit_task content id: {content.id}, title: {content.link}, platform: {content.platform}")
story = llm_engine.think(f"故事素材:{content.content}")
logger.info(f"story_edit_task content id: {content.id} story: {story}")
# llm生成的结果有时不是json结构会在前后增加一些文本需要提取出json部分进行解析
try:
json_start = story.find("{")
json_end = story.rfind("}") + 1
if json_start != -1 and json_end != -1:
story = story[json_start:json_end]
else:
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式无法提取故事内容")
continue
except json.JSONDecodeError:
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式无法解析故事内容")
continue
# 将生成的故事写入Article表
json_story = json.loads(story)
title = json_story.get("title", "无标题")
paragraphs = json_story.get("body", ["无内容"])
article_content = "\n".join(paragraphs)
article = Article(
title=title,
keywords=None,
content=article_content,
used=False
)
db.add(article)
db.commit()
# break # 目前先处理一条内容,后续再改成批量处理
if __name__ == "__main__":
execute_task(story_edit_task)