86 lines
4.3 KiB
Python
86 lines
4.3 KiB
Python
from datetime import datetime, timedelta
|
||
import json
|
||
from task.manager_task import execute_task
|
||
from config.database import SessionLocal
|
||
from models import SourceContent, Article
|
||
from utils import logger
|
||
from llm import LLMThinkingEngine
|
||
|
||
|
||
def story_edit_task():
|
||
with SessionLocal() as db:
|
||
# 获取昨天至今的最新10条帖子信息
|
||
today_contents = db.query(SourceContent).filter(
|
||
SourceContent.create_time >= (datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1))
|
||
).order_by(SourceContent.create_time.desc()).limit(10).all()
|
||
if len(today_contents) == 0:
|
||
logger.info("story_edit_task finish, content size 0")
|
||
return
|
||
logger.info(f"story_edit_task get {len(today_contents)} contents")
|
||
|
||
llm_engine = LLMThinkingEngine(system_prompt_file="real_estate_story_system_prompt.txt")
|
||
for content in today_contents:
|
||
logger.info(f"story_edit_task content id: {content.id}, title: {content.link}, platform: {content.platform}")
|
||
story = llm_engine.think(f"故事素材:{content.content}")
|
||
logger.info(f"story_edit_task content id: {content.id} story: {story}")
|
||
# llm生成的结果有时不是json结构,会在前后增加一些文本,需要提取出json部分进行解析
|
||
try:
|
||
json_start = story.find("{")
|
||
json_end = story.rfind("}") + 1
|
||
if json_start != -1 and json_end != -1:
|
||
story = story[json_start:json_end]
|
||
else:
|
||
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式,无法提取故事内容")
|
||
continue
|
||
except json.JSONDecodeError:
|
||
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式,无法解析故事内容")
|
||
continue
|
||
# 将生成的故事写入Article表
|
||
json_story = json.loads(story)
|
||
title = json_story.get("title", "无标题")
|
||
paragraphs = json_story.get("body", ["无内容"])
|
||
article_content = "\n".join(paragraphs)
|
||
article = Article(
|
||
title=title,
|
||
keywords=None,
|
||
content=article_content,
|
||
used=False
|
||
)
|
||
db.add(article)
|
||
db.commit()
|
||
# break # 目前先处理一条内容,后续再改成批量处理
|
||
|
||
llm_engine = LLMThinkingEngine(system_prompt_file="real_estate_story_short_system_prompt.txt")
|
||
for content in today_contents:
|
||
logger.info(f"story_edit_task content id: {content.id}, title: {content.link}, platform: {content.platform}")
|
||
story = llm_engine.think(f"故事素材:{content.content}")
|
||
logger.info(f"story_edit_task content id: {content.id} story: {story}")
|
||
# llm生成的结果有时不是json结构,会在前后增加一些文本,需要提取出json部分进行解析
|
||
try:
|
||
json_start = story.find("{")
|
||
json_end = story.rfind("}") + 1
|
||
if json_start != -1 and json_end != -1:
|
||
story = story[json_start:json_end]
|
||
else:
|
||
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式,无法提取故事内容")
|
||
continue
|
||
except json.JSONDecodeError:
|
||
logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式,无法解析故事内容")
|
||
continue
|
||
# 将生成的故事写入Article表
|
||
json_story = json.loads(story)
|
||
title = json_story.get("title", "无标题")
|
||
paragraphs = json_story.get("body", ["无内容"])
|
||
article_content = "\n".join(paragraphs)
|
||
article = Article(
|
||
title=title,
|
||
keywords=None,
|
||
content=article_content,
|
||
used=False
|
||
)
|
||
db.add(article)
|
||
db.commit()
|
||
# break # 目前先处理一条内容,后续再改成批量处理
|
||
|
||
if __name__ == "__main__":
|
||
execute_task(story_edit_task) |