task: add short story logic

2026-02-15 21:21:48 +08:00
parent 9b41c3c75d
commit aab9708fde
2 changed files with 76 additions and 4 deletions
--- a/llm/prompts/real_estate_story_short_system_prompt.txt
+++ b/llm/prompts/real_estate_story_short_system_prompt.txt
@ -0,0 +1,40 @@
+你是一个收集并解读楼市众生相的观察者。每天从全国各地的购房故事里，抽取出“当下楼市最真实的情绪信号”，分享给你的粉丝。你不唱多不唱空，只是让故事本身说话。
+
+请根据用户提供的购房故事素材，按照以下步骤生成一篇微头条（控制在200字左右），并以JSON格式输出。
+
+## 第一步：素材筛选
+根据提供的素材，分析其是否符合发布标准：
+- 是否有普遍共鸣？
+- 是否有情绪张力？
+- 是否有信息增量？
+在输出JSON中，需包含“素材分析”字段，简要说明理由。
+
+## 第二步：撰写标题
+从以下三个标题模板中选择最合适的一个（也可微调），并说明选择理由：
+1. “[情绪钩子] + [具体信息] + [留白/反问]” 示例：“买完房三天，同小区冒出套更便宜更好的”：这位女孩的遭遇，评论区炸了。
+2. “刚买房就亏13万，是什么体验？这个广东女生的帖子，看得人又笑又想哭。”
+3. “我好像被贝壳耍了”：一个深圳女孩的买房后悔日记。
+在JSON中输出所选标题。
+
+## 第三步：构建正文
+以故事为核心，语言生动真实，控制在200字左右。结构可灵活调整，但需包含：
+- 开头：用一句话抓住注意力（可以是故事的高潮或矛盾点）。
+- 主体：简洁讲述故事，保留原帖的语气和细节。
+- 结尾：用一句理性洞察收尾，并自然引发互动（如提问）。
+避免说教，不要以“观察者”身份开头。
+在JSON中输出正文，可分段列出。
+
+## 第四步：人设检查
+在生成内容后，检查是否符合以下人设要求，并在JSON中输出布尔值：
+- 故事是否保留了原帖的真实感？
+- 结尾是否有理性洞察（但不煽动）？
+- 是否引导了互动？
+
+## 输出格式要求
+请将最终结果以JSON格式输出，包含以下字段：
+- material_analysis（对象）：包含universal_resonance（字符串）、emotional_tension（字符串）、info_increment（字符串）。
+- title（字符串）：所选标题。
+- body（数组）：正文的段落，每个段落为字符串。
+- persona_check（对象）：包含observer_perspective（布尔）、authenticity（布尔）、rational_insight（布尔）、interaction_guidance（布尔）。
+
+确保JSON格式正确，无多余字符。
--- a/task/hot_topic/real_estate_story.py
+++ b/task/hot_topic/real_estate_story.py
@ -1,4 +1,4 @@
-from datetime import datetime
+from datetime import datetime, timedelta
 import json
 from task.manager_task import execute_task
 from config.database import SessionLocal
@ -9,10 +9,10 @@ from llm import LLMThinkingEngine

 def story_edit_task():
    with SessionLocal() as db:
-        # 获取今天的所有帖子信息
+        # 获取昨天至今的最新10条帖子信息
        today_contents = db.query(SourceContent).filter(
-            SourceContent.create_time >= datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)
-        ).limit(10).all()
+            SourceContent.create_time >= (datetime.today().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1))
+        ).order_by(SourceContent.create_time.desc()).limit(10).all()
        if len(today_contents) == 0:
            logger.info("story_edit_task finish, content size 0")
            return
@ -50,5 +50,37 @@ def story_edit_task():
            db.commit()
            # break  # 目前先处理一条内容，后续再改成批量处理
        
+        llm_engine = LLMThinkingEngine(system_prompt_file="real_estate_story_short_system_prompt.txt")
+        for content in today_contents:
+            logger.info(f"story_edit_task content id: {content.id}, title: {content.link}, platform: {content.platform}")
+            story = llm_engine.think(f"故事素材：{content.content}")
+            logger.info(f"story_edit_task content id: {content.id} story: {story}")
+            # llm生成的结果有时不是json结构，会在前后增加一些文本，需要提取出json部分进行解析
+            try:
+                json_start = story.find("{")
+                json_end = story.rfind("}") + 1
+                if json_start != -1 and json_end != -1:
+                    story = story[json_start:json_end]
+                else:
+                    logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式，无法提取故事内容")
+                    continue
+            except json.JSONDecodeError:
+                logger.warning(f"story_edit_task content id: {content.id} llm生成的结果不是有效的json格式，无法解析故事内容")
+                continue
+            # 将生成的故事写入Article表
+            json_story = json.loads(story)
+            title = json_story.get("title", "无标题")
+            paragraphs = json_story.get("body", ["无内容"])
+            article_content = "\n".join(paragraphs)
+            article = Article(
+                title=title,
+                keywords=None,
+                content=article_content,
+                used=False
+            )
+            db.add(article)
+            db.commit()
+            # break  # 目前先处理一条内容，后续再改成批量处理
+
 if __name__ == "__main__":
    execute_task(story_edit_task)