import edward

2025-11-12 21:19:26 +08:00
commit 5267db8a0d
48 changed files with 1848 additions and 0 deletions
--- a/task/default/default_task.py
+++ b/task/default/default_task.py
@ -0,0 +1,8 @@
+import time
+
+from log.log_manager import logger
+
+
+def default_task():
+    logger.info("default_task")
+    logger.info(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
--- a/task/hot_topic/manual_execution_script.py
+++ b/task/hot_topic/manual_execution_script.py
@ -0,0 +1,67 @@
+from database.database import get_session
+from database.thotcontent.crud import get_hot_content_by_topic_id
+from database.thottopic.crud import get_latest_hot_topic, update_hot_topic, get_hot_topic_by_id
+from llm.local.ollama import Ollama
+from log.log_manager import log
+
+if __name__ == '__main__':
+    with get_session() as db:
+        # 1. 获取热点话题
+        latest_hot_topic = get_hot_topic_by_id(db, 265)  # 根据话题ID获取特定的热点话题
+        # latest_hot_topic = get_latest_hot_topic(db)  # 获取最新的热点话题
+        topic = latest_hot_topic.topic
+        print(latest_hot_topic)
+        # 2. 获取话题内容
+        hot_contents = get_hot_content_by_topic_id(db, latest_hot_topic.id)
+        for hot_content in hot_contents:
+            print(hot_content)
+            # 统计hot_content.content的字数
+            print(len(hot_content.content))
+        topic_content = [hot_content.content for hot_content in hot_contents]
+        print(topic_content)
+        print(topic_content[0])
+        print('----------------------------------------------------')
+        print(topic_content[1])
+        print('----------------------------------------------------')
+        print(topic_content[2])
+        print('----------------------------------------------------')
+        input_message = (
+            """
+            你是一个专业的编辑。你的任务是根据提供的话题和素材，生成一片口述稿。
+            要求如下：
+            1 字数控制在200到1000字之间。
+            话题如下：
+            """
+            +
+            topic
+            +
+            """
+            素材是三位网友的见解：
+            """
+            +
+            """
+            第一位网友说：
+            """
+            +
+            topic_content[0]
+            +
+            """
+            第二位网友说：
+            """
+            +
+            topic_content[1]
+            +
+            """
+            第三位网友说：
+            """
+            +
+            topic_content[2]
+        )
+        ollama = Ollama()
+        if not ollama.is_service_running():
+            log("ai_summary_task finish, ollama service not running")
+        else:
+            result = ollama.generate_text(input_message)
+            log(result)
+            latest_hot_topic.ai_script = result
+            update_hot_topic(db, latest_hot_topic)
--- a/task/hot_topic/script_task.py
+++ b/task/hot_topic/script_task.py
@ -0,0 +1,73 @@
+import json
+from json import JSONDecodeError
+
+from database.database import get_session
+from database.tvideoscript.video_script import get_today_video_script, update_video_script
+from llm.local.ollama import Ollama
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def ai_script_task():
+    with get_session() as db:
+        # 1. 获取今日的热点话题列表
+        video_scripts = get_today_video_script(db)
+        if len(video_scripts) == 0:
+            log("ai_script_task finish, task size 0")
+            return
+        log(f"ai script task size {len(video_scripts)}")
+
+        ollama = Ollama()
+        if not ollama.is_service_running():
+            log("ai_script_task finish, ollama service not running")
+            return
+
+        for video_script in video_scripts:
+            topic = video_script.title
+            log(f'generate script for topic: {topic}')
+            # 2. 获取话题内容
+            content = video_script.content
+            input_message = (
+"""
+## 角色
+- 你是一个资深编辑。
+
+## 目标
+- 从输入的素材中选取有用的信息。
+
+## 任务描述
+- 从contents中选取最能吸引人的段落或句子，使读者产生兴趣和共鸣。请确保这些内容具有情感张力、戏剧性、趣味性或引发思考的价值。选取两部分内容，分别使用键“content_one”和“content_two”。每部分不少于100个汉字。两部分内容字数之和不多于600个汉字。
+
+## 要求
+- 严格遵守字数要求。
+- 直接输出内容。
+- 内容为JSON格式。
+
+## 素材如下
+""" + content
+            )
+            # log(input_message)
+            # 3. 调用ollama生成话题脚本
+            llm_result = ollama.generate_text(input_message)
+            log(llm_result)
+            try:
+                llm_result = json.loads(llm_result)
+            except JSONDecodeError as e:
+                log(f"ai_script_task error: {e}. skip topic: {topic}")
+                continue
+            video_script.script = (
+f"""{video_script.title}
+{video_script.description}
+一位网友说：
+{llm_result["content_one"]}
+另一位网友说:
+{llm_result["content_two"]}
+关于这个问题大家有什么看法呢？
+欢迎评论区留言
+"""
+            )
+            # 4. 保存话题脚本
+            update_video_script(db, video_script)
+
+if __name__ == "__main__":
+    execute_task(ai_script_task)
--- a/task/manager_task.py
+++ b/task/manager_task.py
@ -0,0 +1,108 @@
+import importlib
+import time
+from functools import partial
+
+from apscheduler.schedulers.blocking import BlockingScheduler
+
+from config import config
+from database.database import get_session
+from database.tscheduler.crud import get_tasks_by_executor
+from log.log_manager import log
+
+"""
+这是一个特殊的任务，负责管理任务，命名为管理者任务。
+
+工作流程：
+1 检索数据库任务数据表
+2 检查是否已经在任务队列中，如果不在则添加
+
+任务执行时间间隔为600秒。
+
+"""
+
+
+def log_task_execution(task_name: str, start_time: float, end_time: float = None):
+    """辅助函数，记录任务的开始和结束日志"""
+    start_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(start_time))
+    end_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time))
+    if end_time is None:
+        log(f"{task_name} start execute at {start_time_str}")
+    else:
+        elapsed_time = end_time - start_time
+        log(f"{task_name} end execute at {end_time_str}, use time {elapsed_time:.2f} seconds")
+
+
+def execute_task(task: callable):
+    """执行任务并记录日志"""
+    start_time = time.time()
+    log_task_execution(task.__name__, start_time)  # 先记录开始时间
+    task()
+    end_time = time.time()
+    log_task_execution(task.__name__, start_time, end_time)  # 记录结束时间
+
+# 从数据库加载任务
+def load_tasks(scheduler: BlockingScheduler):
+    with get_session() as db:
+        tasks = get_tasks_by_executor(db, config.scheduler_name)
+        for task in tasks:
+            module_path = task.module_path
+            function_name = task.function_name
+            trigger = task.trigger
+            interval_seconds = task.interval_seconds
+            task_id = task.id
+
+            # 动态导入模块和函数
+            module = importlib.import_module(module_path)
+            task_function = partial(execute_task, getattr(module, function_name))
+
+            # 检查任务是否已存在
+            if not scheduler.get_job(str(task_id)):
+                if trigger == "interval":
+                    scheduler.add_job(
+                        task_function,
+                        "interval",
+                        seconds=interval_seconds,
+                        id=str(task_id),
+                        replace_existing=True
+                    )
+                    log(f"Task {task.task_name} added with interval {interval_seconds} seconds")
+                elif trigger == "cron":
+                    # 解析 cron 表达式的字段
+                    fields = task.cron_expression.split()
+                    # 确保字段长度符合七字段格式
+                    if len(fields) != 7:
+                        raise ValueError("无效的 Quartz cron 表达式")
+                    # 替换 Quartz 风格的 `?` 为 APScheduler 可接受的 `*`
+                    if fields[5] == '?':
+                        fields[5] = '*'  # 替换 `day_of_week` 字段中的 `?`
+                    # 使用 cron 表达式的字段添加任务
+                    scheduler.add_job(
+                        task_function,  # 要执行的任务
+                        'cron',  # 使用 cron 触发器
+                        second=fields[0],  # 秒
+                        minute=fields[1],  # 分钟
+                        hour=fields[2],  # 小时
+                        day=fields[3],  # 日期
+                        month=fields[4],  # 月份
+                        day_of_week=fields[5],  # 星期
+                        year=fields[6],  # 年份
+                        id=str(task_id),
+                        replace_existing=True
+                    )
+                    log(f"Task {task.task_name} added with cron {task.cron_expression}")
+                elif trigger == "date":
+                    scheduler.add_job(
+                        task_function,
+                        "date",
+                        run_date=task["run_date_and_time"],
+                        id=str(task_id),
+                        replace_existing=True
+                    )
+                    log(f"Task {task.task_name} added with date {task.execution_date}")
+                else:
+                    log(f"Invalid trigger type: {trigger}")
+
+
+# 管理者任务
+def manager_task(scheduler: BlockingScheduler):
+    load_tasks(scheduler)
--- a/task/material_distribution.py
+++ b/task/material_distribution.py
@ -0,0 +1,17 @@
+from database.database import get_session
+from database.tmaterial.crud import receive_news
+from database.tnews.crud import get_news_unprocessed, set_news_usage
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def distribution_task():
+    with get_session() as db:
+        news_list = get_news_unprocessed(db)
+        receive_news(db, news_list)
+        set_news_usage(db, news_list)
+        log(f'distributed {len(news_list)} news')
+
+
+if __name__ == '__main__':
+    execute_task(distribution_task)
--- a/task/news/reference_message.py
+++ b/task/news/reference_message.py
@ -0,0 +1,49 @@
+import re
+import time
+
+from database.database import get_session
+from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content
+from database.tcontentdispatch.model import TContentDispatch
+from database.tmaterial.crud import update_material_by_id, \
+    get_materials_for_generate_news
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def generate_news_task():
+    with get_session() as db:
+        # 1. 构建今日新闻文章标题，格式：今日新闻yyyy-MM-dd
+        title = ("今日新闻" +
+                 time.strftime("%Y", time.localtime()) + '年' +
+                 time.strftime("%m", time.localtime()) + '月' +
+                 time.strftime("%d", time.localtime()) + '日')
+        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
+        content_dispatch = get_content_by_title_and_category(db, title, "新鲜事")
+        content = ""
+        if content_dispatch is not None:
+            content = content_dispatch.content
+            # 从最后一条获取并计算开始编号
+            result = re.findall(r'(?<!\.)\d+\. ', content)
+            start_num = int(re.findall(r'\d+', result[-1])[-1]) + 1
+        else:
+            content_dispatch = TContentDispatch(category="新鲜事", title=title, ai_generate=1)
+            start_num = 1
+        # 3. 从新闻素材数据表获取房产类的新闻列表
+        news_list = get_materials_for_generate_news(db)
+        # 4. 拼接成文章正文content
+        for i, news in enumerate(news_list, start=start_num):  # Using enumerate to control the index starting from 1
+            content += f"{i}. {news.ai_summary}\n"
+        # 5. 把content写入数据库
+        if content is not None and content != "" and news_list is not None:
+            content_dispatch.content = content
+            content_dispatch.is_sent = False
+            create_or_update_content(db, content_dispatch)
+        # 6. 把news_list更新入数据库更新字段is_usage为True
+        for news in news_list:
+            news.is_usage = True
+            update_material_by_id(db, news)
+        log(f"generate_news_task finish, news count {start_num + len(news_list)}")
+
+
+if __name__ == "__main__":
+    execute_task(generate_news_task)
--- a/task/news/revisal.py
+++ b/task/news/revisal.py
@ -0,0 +1,54 @@
+import time
+
+from database.database import get_session
+from database.tcontentdispatch.curd import get_content_by_title_and_category, update
+from llm.local.ollama import Ollama
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def ai_edit(input_message: str) -> str:
+    log(f"ai_edit start execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
+    ollama = Ollama()
+    response = ollama.generate_text(input_message)
+    log(f"ai_edit end execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
+    return response
+
+def revisal_task():
+    with get_session() as db:
+        # 1. 构建今日新闻文章标题，格式：今日新闻yyyy-MM-dd
+        title = ("今日新闻" +
+                 time.strftime("%Y", time.localtime()) + '年' +
+                 time.strftime("%m", time.localtime()) + '月' +
+                 time.strftime("%d", time.localtime()) + '日')
+        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
+        content_dispatch = get_content_by_title_and_category(db, title, "新鲜事")
+        ai_content = ""
+        if content_dispatch and content_dispatch.content:
+            # 3. 执行AI编辑
+            input_message = (('按照规则编辑提供的内容。规则如下：\n'
+                             '1 去除重复内容\n'
+                             '2 不要故意删除内容\n'
+                             '3 重新编号\n'
+                             '4 不要出现空行\n'
+                             '5 不要出现类似"以下是根据您提供的规则编辑后的内容"等提示信息，直接输出编辑后的内容\n'
+                             '内容如下：\n')
+                             + content_dispatch.content)
+            ai_content = ai_edit(input_message)
+            print(content_dispatch.content)
+            print("-----------------------------------------------------------")
+            print(ai_content)
+        # 4. 去掉ai_content中的空行
+        ai_content = "\n".join([line for line in ai_content.split("\n") if line.strip()])
+        # 5. 把content写入数据库
+        if ai_content:
+            content_dispatch.ai_content = ai_content
+            content_dispatch.is_sent = False
+            update(db)
+        # 获取ai_content的行数
+        lines = ai_content.strip().split("\n")
+        log(f"revisal news task finish, news count: {len(lines)}, news words: {len(ai_content)}")
+
+
+if __name__ == "__main__":
+    execute_task(revisal_task)
--- a/task/real_estate/reference_message.py
+++ b/task/real_estate/reference_message.py
@ -0,0 +1,47 @@
+import re
+import time
+
+from database.database import get_session
+from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content
+from database.tcontentdispatch.model import TContentDispatch
+from database.tmaterial.crud import update_material_by_id, \
+    get_materials_for_generate_real_estate_reference_message
+from task.manager_task import execute_task
+
+
+def generate_real_estate_reference_message_task():
+    with get_session() as db:
+        # 1. 构建楼市参考消息文章标题，格式：楼市参考消息yyyy-MM-dd
+        title = ("楼市参考消息" +
+                 time.strftime("%Y", time.localtime()) + '年' +
+                 time.strftime("%m", time.localtime()) + '月' +
+                 time.strftime("%d", time.localtime()) + '日')
+        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
+        content_dispatch = get_content_by_title_and_category(db, title, "房地产")
+        content = ""
+        if content_dispatch is not None:
+            content = content_dispatch.content
+            # 从最后一条获取并计算开始编号
+            result = re.findall(r'(?<!\.)\d+\. ', content)
+            start_num = int(re.findall(r'\d+', result[-1])[-1]) + 1
+        else:
+            content_dispatch = TContentDispatch(category="房地产", title=title, ai_generate=1)
+            start_num = 1
+        # 3. 从新闻素材数据表获取房产类的新闻列表
+        news_list = get_materials_for_generate_real_estate_reference_message(db)
+        # 4. 拼接成文章正文content
+        for i, news in enumerate(news_list, start=start_num):  # Using enumerate to control the index starting from 1
+            content += f"{i}. {news.ai_summary}\n"
+        # 5. 把content写入数据库
+        if content is not None and news_list is not None:
+            content_dispatch.content = content
+            content_dispatch.is_sent = False
+            create_or_update_content(db, content_dispatch)
+        # 6. 把news_list更新入数据库更新字段is_usage为True
+        for news in news_list:
+            news.is_usage = True
+            update_material_by_id(db, news)
+
+
+if __name__ == "__main__":
+    execute_task(generate_real_estate_reference_message_task)
--- a/task/real_estate/revisal.py
+++ b/task/real_estate/revisal.py
@ -0,0 +1,50 @@
+import time
+
+from database.database import get_session
+from database.tcontentdispatch.curd import get_content_by_title_and_category, update
+from llm.local.ollama import Ollama
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def ai_edit_with_ollama(input_message: str) -> str:
+    log(f"ai_edit_with_ollama start execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
+    ollama = Ollama()
+    response = ollama.generate_text(input_message)
+    log(f"ai_edit_with_ollama end execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
+    return response
+
+def revisal_task():
+    with get_session() as db:
+        # 1. 构建楼市参考消息文章标题，格式：楼市参考消息yyyy-MM-dd
+        title = ("楼市参考消息" +
+                 time.strftime("%Y", time.localtime()) + '年' +
+                 time.strftime("%m", time.localtime()) + '月' +
+                 time.strftime("%d", time.localtime()) + '日')
+        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
+        content_dispatch = get_content_by_title_and_category(db, title, "房地产")
+        ai_content = ""
+        if content_dispatch and content_dispatch.content:
+            print(content_dispatch.content)
+            # 3. 执行AI编辑
+            input_message = (('按照规则编辑提供的内容。规则如下：\n'
+                             '1 以每行内容作为一个处理单元，去掉与房地产、楼市无关的内容。如果整行内容都无关则全部去掉，否则全部保留。\n'
+                             '2 去除重复内容\n'
+                             '3 不要故意删除内容\n'
+                             '5 不要出现空行\n'
+                             '6 重新编号\n'
+                             '7 不要出现类似"以下是根据您提供的规则编辑后的内容"等提示信息，直接输出编辑后的内容\n'
+                             '内容如下：\n')
+                             + content_dispatch.content)
+            ai_content = ai_edit_with_ollama(input_message)
+            print("-----------------------------------------------------------")
+            print(ai_content)
+        # 4. 把content写入数据库
+        if ai_content:
+            content_dispatch.ai_content = ai_content
+            content_dispatch.is_sent = False
+            update(db)
+
+
+if __name__ == "__main__":
+    execute_task(revisal_task)
--- a/task/summary/ai_summary.py
+++ b/task/summary/ai_summary.py
@ -0,0 +1,38 @@
+from database.database import get_session
+from database.tmaterial.crud import get_material_need_summary, update_material
+from llm.kimi import Kimi
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def ai_summary(input_message: str, instance) -> str:
+    response = instance.generate(input_message)
+    log(response)
+    return response
+
+def ai_summary_task():
+    with get_session() as db:
+        news_list = get_material_need_summary(db)
+        if len(news_list) == 0:
+            log("ai_summary_task finish, task size 0")
+            return
+        kimi = Kimi()
+        for news in news_list:
+            # input_message = news.url + "\n阅读并生成百字以内的摘要"
+            # input_message = "请将以下文章内容用一句话进行概括，提炼出最核心的观点或主题，简洁明了，不超过 100 字：\n" + news.url
+            log(news.url)
+            input_message = (
+                    "请将以下新闻内容提炼成一句话，简洁明了地传达事件的核心信息，包括谁、什么、何时、何地和为何，字数不超过 100 字："
+                    + news.url
+            )
+            summary = ai_summary(input_message, kimi)
+            updates = {"ai_summary": summary}
+            update_material(db, news.id, updates)
+        kimi.quit()
+        log(f"ai_summary_task finish, task size {len(news_list)}")
+
+
+if __name__ == '__main__':
+    # input_message_ = "https://www.ofweek.com/ai/2024-12/ART-201700-8500-30653318.html\n阅读并生成200字以内的摘要"
+    # logger.info(ai_summary(input_message_))
+    execute_task(ai_summary_task)
--- a/task/summary/ai_summary_ollama.py
+++ b/task/summary/ai_summary_ollama.py
@ -0,0 +1,53 @@
+from database.database import get_session
+from database.tmaterial.crud import get_material_need_summary, update_material
+from llm.local.ollama import Ollama
+from log.log_manager import log
+from task.manager_task import execute_task
+
+
+def ai_summary(input_message: str, instance) -> str:
+    response = instance.generate(input_message)
+    log(response)
+    return response
+
+def ai_summary_task():
+    with get_session() as db:
+        news_list = get_material_need_summary(db)
+        if len(news_list) == 0:
+            log("ai_summary_task finish, task size 0")
+            return
+        log(f"ai summary task size {len(news_list)}")
+        ollama = Ollama()
+        if not ollama.is_service_running():
+            log("ai_summary_task finish, ollama service not running")
+            return
+        for news in news_list:
+            input_message = (
+"""
+请为以下新闻生成严格单段落的中文摘要，要求：
+1. 保持段落连贯性，不使用任何分段符号（包括空行、缩进或序号）
+2. 核心要素按此顺序呈现：
+   [时间]>[地点]>[主体机构]>[关键事件]>[量化影响]
+3. 采用"总-分"结构：
+   - 首句陈述核心事实（包含最关键的时间地点主体）
+   - 中间展开关键细节（使用衔接词：同时/此外/值得注意的是）
+   - 结尾说明当前状态/后续影响
+4. 字数严格控制在100个字符以内
+5. 禁止使用项目符号、引文格式等非连贯文本元素
+
+新闻原文：
+"""
+ news.content
+            )
+            summary = ollama.generate_text(input_message)
+            log(f'{news.url} {summary}')
+            # 判断summary是否是一段话
+            if '\n' in summary:
+                summary = 'summary formate error'
+            updates = {"ai_summary": summary}
+            update_material(db, news.id, updates)
+        log(f"ai_summary_task finish, task size {len(news_list)}")
+
+
+if __name__ == '__main__':
+    execute_task(ai_summary_task)
--- a/task/tech/reference_message.py
+++ b/task/tech/reference_message.py
@ -0,0 +1,47 @@
+import re
+import time
+
+from database.database import get_session
+from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content
+from database.tcontentdispatch.model import TContentDispatch
+from database.tmaterial.crud import update_material_by_id, \
+    get_materials_for_generate_tech_reference_message
+from task.manager_task import execute_task
+
+
+def generate_reference_message_task():
+    with get_session() as db:
+        # 1. 构建AI参考消息文章标题，格式：楼市参考消息yyyy-MM-dd
+        title = ("科技参考消息" +
+                 time.strftime("%Y", time.localtime()) + '年' +
+                 time.strftime("%m", time.localtime()) + '月' +
+                 time.strftime("%d", time.localtime()) + '日')
+        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
+        content_dispatch = get_content_by_title_and_category(db, title, "科技")
+        content = ""
+        if content_dispatch is not None:
+            content = content_dispatch.content
+            # 从最后一条获取并计算开始编号
+            result = re.findall(r'(?<!\.)\d+\. ', content)
+            start_num = int(re.findall(r'\d+', result[-1])[-1]) + 1
+        else:
+            content_dispatch = TContentDispatch(category="科技", title=title, ai_generate=1)
+            start_num = 1
+        # 3. 从新闻素材数据表获取AI类的新闻列表
+        news_list = get_materials_for_generate_tech_reference_message(db)
+        # 4. 拼接成文章正文content
+        for i, news in enumerate(news_list, start=start_num):  # Using enumerate to control the index starting from 1
+            content += f"{i}. {news.ai_summary}\n"
+        # 5. 把content写入数据库
+        if content is not None and news_list is not None:
+            content_dispatch.content = content
+            content_dispatch.is_sent = False
+            create_or_update_content(db, content_dispatch)
+        # 6. 把news_list更新入数据库更新字段is_usage为True
+        for news in news_list:
+            news.is_usage = True
+            update_material_by_id(db, news)
+
+
+if __name__ == "__main__":
+    execute_task(generate_reference_message_task)