import edward

2025-11-12 21:19:26 +08:00
commit 5267db8a0d
48 changed files with 1848 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,6 @@
 .git
 .gitea
 .gitignore
 Readme.md
 Dockerfile
 docker-compose.yml
--- a/.gitea/workflows/deploy-workflow.yml
+++ b/.gitea/workflows/deploy-workflow.yml
@ -0,0 +1,30 @@
 name: Gitea Actions Demo
 run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀
 on: [push]
 jobs:
  deploy:
    runs-on: ubuntu-latest
    container: 
      image: gitea/runner-images:ubuntu-latest
    steps:
      - name: clone project code
        run: git clone ${{ gitea.server_url }}/${{ gitea.repository }} .
      - name: List files
        run: ls -la
      - name: Stop running containers
        run: |
          docker compose down || true
      - name: Remove old image
        run: |
          IMAGE_NAME=$(basename "$PWD")
          echo "Removing old image: $IMAGE_NAME"
          docker images | grep "$IMAGE_NAME" && docker rmi -f $(docker images "$IMAGE_NAME" -q) || echo "No old image found."
      - name: Build new image
        run: |
          docker build -t $(basename "$PWD"):latest .
      - name: Start containers
        run: |
          docker compose up -d
      - name: Show container status
        run: docker ps
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 __pycache__
--- a/19
+++ b/19
@ -0,0 +1,19 @@
 # 使用官方轻量级 Python 基础镜像
 FROM python:3.12-slim
 # 设置工作目录（容器内路径）
 WORKDIR /app
 # 将项目文件复制到容器中
 COPY . /app
 # （可选）如果你有 requirements.txt，则先复制并安装依赖
 RUN if [ -f requirements.txt ]; then \
        pip install --no-cache-dir -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/; \
    fi
 # 设置环境变量（防止 Python 缓存文件）
 ENV PYTHONUNBUFFERED=1
 # 启动命令
 CMD ["python", "edward.py"]
--- a/Readme.md
+++ b/Readme.md
@ -0,0 +1,9 @@
 # Edward（缩写为Ed）
 许多编辑职位上的名字为 Ed（也与“编辑”Editor的缩写一致）
 ## environment
 - conda create -n edward python=3.12
 - conda activate edward
 - pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple
--- a/config/config.py
+++ b/config/config.py
@ -0,0 +1,4 @@
 # scheduler name
 scheduler_name = 'edward'
 # scheduler interval in seconds
 scheduler_interval = 3600
--- a/database/database.py
+++ b/database/database.py
@ -0,0 +1,37 @@
 from contextlib import contextmanager
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker, declarative_base
 from log.log_manager import logger
 Base = declarative_base()
 DATABASE_URL = 'postgresql+psycopg://postgres:K8u3fg0o@47.119.128.161:60001/squirrel'
 engine = create_engine(
    DATABASE_URL,
    pool_size=10,
    max_overflow=20,
    pool_timeout=30,
    pool_recycle=1800,  # 防止数据库端连接过期
    connect_args={
        'connect_timeout': 15,
        'keepalives_idle': 60,
        'keepalives_interval': 10,
        'keepalives_count': 5
    }
 )
 Base.metadata.create_all(engine)
@contextmanager
 def get_session():
    session = sessionmaker(bind=engine)()
    try:
        yield session
        session.commit()  # 自动提交成功的事务
    except Exception as e:
        session.rollback()  # 异常时回滚
        logger.error(f"Database operation failed: {str(e)}")
        raise  # 重新抛出异常
    finally:
        session.close()  # 确保会话关闭
--- a/database/tcontentdispatch/curd.py
+++ b/database/tcontentdispatch/curd.py
@ -0,0 +1,50 @@
 from database.tcontentdispatch.model import TContentDispatch
 def create_content(db, content: TContentDispatch):
    db.add(content)
    db.commit()
    db.refresh(content)
    return content
 def create_or_update_content(db, content: TContentDispatch):
    content_in_db = db.query(TContentDispatch).filter(TContentDispatch.id == content.id).first()
    if content_in_db:
        # Update existing content
        db.commit()  # Save changes to the database
        db.refresh(content)  # Refresh the object with the updated values
    else:
        # Create new content if not found in DB
        db.add(content)
        db.commit()  # Save new content to the database
        db.refresh(content)  # Refresh to get the content's updated state (e.g., ID if it's auto-generated)
 def get_content_by_id(db, content_id: int):
    return db.query(TContentDispatch).filter(TContentDispatch.id == content_id).first()
 def get_content_by_title_and_category(db, title: str, category: str):
    return db.query(TContentDispatch).filter(
        TContentDispatch.title == title,
        TContentDispatch.category == category
    ).first()
 def update_content(db, content_id: int, updates: dict):
    content = db.query(TContentDispatch).filter(TContentDispatch.id == content_id).first()
    if content:
        for key, value in updates.items():
            setattr(content, key, value)
        db.commit()
        db.refresh(content)
    return content
 def update(db):
    db.commit()
 def delete_content(db, content_id: int):
    content = db.query(TContentDispatch).filter(TContentDispatch.id == content_id).first()
    if content:
        db.delete(content)
        db.commit()
    return content
--- a/database/tcontentdispatch/model.py
+++ b/database/tcontentdispatch/model.py
@ -0,0 +1,27 @@
 from sqlalchemy import Column, Integer, String, Text, Boolean, TIMESTAMP, func
 from sqlalchemy.dialects.postgresql import BIGINT
 from dataclasses import dataclass
 from database.database import Base
@dataclass
 class TContentDispatch(Base):
    __tablename__ = 't_content_dispatch'
    id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='自动递增的唯一任务ID')
    creation_date: str = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='记录数据条目的创建时间')
    is_sent: bool = Column(Boolean, default=False, nullable=False, comment='表示数据条目是否已被发送')
    category: str = Column(String(255), nullable=False, comment='类别')
    title: str = Column(String(255), nullable=False, comment='标题')
    cover_image: str = Column(Text, nullable=True, comment='封面')
    poster_image: str = Column(Text, nullable=True, comment='海报')
    opening_text: str = Column(Text, nullable=True, comment='开头语')
    content: str = Column(Text, nullable=False, comment='内容')
    ai_content: str = Column(Text, nullable=True, comment='AI编辑的内容')
    closing_text: str = Column(Text, nullable=True, comment='结束语')
    is_scheduled: bool = Column(Boolean, default=False, nullable=False, comment='是否设置为定时发送')
    schedule_time: str = Column(TIMESTAMP, nullable=True, comment='定时发送的具体时间')
    format: str = Column(String(50), nullable=True, comment='数据条目的格式')
    ai_generate: int = Column(Integer, default=0, nullable=False, comment='是否AI生成。0否，1部分参与，2是。')
    def __repr__(self):
        return f"<TContentDispatch(id={self.id}, title={self.title}, category={self.category})>"
--- a/database/thotcontent/crud.py
+++ b/database/thotcontent/crud.py
@ -0,0 +1,77 @@
 from database.thotcontent.model import THotContent
 from log.log_manager import logger
 def create_hot_content(db, hot_content: THotContent):
    db.add(hot_content)
    db.commit()
    db.refresh(hot_content)
    return hot_content
 # 插入数据库之前判断数据库中是否已经存在，根据news.url 判断
 def create_content_if_url_not_exists(db, hot_content: THotContent):
    # 检查是否已经存在具有相同 URL 的记录
    existing_content = db.query(THotContent).filter(THotContent.url == hot_content.url).first()
    if existing_content:
        # 如果记录已存在，直接返回已有的记录
        return existing_content
    # 如果记录不存在，插入新的记录
    db.add(hot_content)
    db.commit()
    db.refresh(hot_content)
    return hot_content
 def create_contents_top3_if_url_not_exists(db, contents: list[THotContent]):
    logger.info(f"采集到内容数量：{len(contents)}，存入数据库前三")
    # 按照 THotContent.content_upvote_count 对contents进行排序
    contents.sort(key=lambda x: x.content_upvote_count, reverse=True)
    # 保留 contents 的前3条
    contents = contents[:3]
    inserted_contents = []  # 用于保存实际插入的新闻记录
    for content in contents:
        # 检查是否已经存在具有相同 URL 的记录
        existing_content = db.query(THotContent).filter(THotContent.url == content.url).first()
        if not existing_content:
            # 如果记录不存在，插入新的记录
            db.add(content)
            inserted_contents.append(content)
    # 批量提交所有插入的记录
    db.commit()
    # 刷新所有新插入的记录
    for content in inserted_contents:
        db.refresh(content)
    return inserted_contents
 def get_hot_content_by_id(db, hot_content_id: int):
    return db.query(THotContent).filter(THotContent.id == hot_content_id).first()
 def get_hot_content_by_topic_id(db, topic_id: int):
    return db.query(THotContent).filter(THotContent.topic_id == topic_id).all()
 def get_hot_contents(db, skip: int = 0, limit: int = 100):
    return db.query(THotContent).offset(skip).limit(limit).all()
 def update_hot_content(db, hot_content_id: int, updates: dict):
    hot_content = db.query(THotContent).filter(THotContent.id == hot_content_id).first()
    if hot_content:
        for key, value in updates.items():
            setattr(hot_content, key, value)
        db.commit()
        db.refresh(hot_content)
    return hot_content
 def delete_hot_content(db, hot_content_id: int):
    hot_content = db.query(THotContent).filter(THotContent.id == hot_content_id).first()
    if hot_content:
        db.delete(hot_content)
        db.commit()
--- a/database/thotcontent/model.py
+++ b/database/thotcontent/model.py
@ -0,0 +1,23 @@
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
 from sqlalchemy import Column, Integer, String, BIGINT, TIMESTAMP, func
 from sqlalchemy.dialects.postgresql import BIGINT
 from database.database import Base
@dataclass
 class THotContent(Base):
    __tablename__ = 't_hot_content'
    id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='序号')
    topic_id: int = Column(BIGINT, nullable=False, comment='关联话题ID')
    url: Optional[str] = Column(String, nullable=True, comment='内容链接')
    content: Optional[str] = Column(String, nullable=True, comment='内容详情')
    content_upvote_count: Optional[int] = Column(BIGINT, nullable=True, comment='内容点赞数量')
    content_comment_count: Optional[int] = Column(Integer, nullable=True, comment='内容评论数量')
    create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
    def __repr__(self):
        return f"<THotContent(id={self.id}, topic_id={self.topic_id}, url={self.url}, content_upvote_count={self.content_upvote_count})>"
--- a/database/thottopic/crud.py
+++ b/database/thottopic/crud.py
@ -0,0 +1,87 @@
 from datetime import datetime
 from sqlalchemy import func
 from database.thottopic.model import THotTopic
 def create_hot_topic(db, hot_topic: THotTopic):
    db.add(hot_topic)
    db.commit()
    db.refresh(hot_topic)
    return hot_topic
 # 插入数据库之前判断数据库中是否已经存在，根据news.url 判断
 def create_topic_if_url_not_exists(db, hot_topic: THotTopic):
    # 检查是否已经存在具有相同 URL 的记录
    existing_topic = db.query(THotTopic).filter(THotTopic.url == hot_topic.url).first()
    if existing_topic:
        # 如果记录已存在，直接返回已有的记录
        return existing_topic
    # 如果记录不存在，插入新的记录
    db.add(hot_topic)
    db.commit()
    db.refresh(hot_topic)
    return hot_topic
 def create_topics_if_url_not_exists(db, topics: list[THotTopic]):
    inserted_topics = []  # 用于保存实际插入的新闻记录
    for topic in topics:
        # 检查是否已经存在具有相同 URL 的记录
        existing_topic = db.query(THotTopic).filter(THotTopic.url == topic.url).first()
        if not existing_topic:
            # 如果记录不存在，插入新的记录
            db.add(topic)
            inserted_topics.append(topic)
    # 批量提交所有插入的记录
    db.commit()
    # 刷新所有新插入的记录
    for topic in inserted_topics:
        db.refresh(topic)
    return inserted_topics
 def get_hot_topic_by_id(db, hot_topic_id: int):
    return db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first()
 def get_hot_topics(db, skip: int = 0, limit: int = 100):
    return db.query(THotTopic).offset(skip).limit(limit).all()
 # 根据THotTopic.update_time排序，获取最新的THotTopic
 def get_latest_hot_topic(db):
    return db.query(THotTopic).order_by(THotTopic.update_time.desc()).first()
 # 根据THotTopic.create_time获取今日的所有THotTopic
 def get_today_hot_topic(db):
    today = datetime.now().date()
    return db.query(THotTopic).filter(func.date(THotTopic.create_time) == today).all()
 def update_hot_topic(db, hot_topic: THotTopic):
    db.merge(hot_topic)
    db.commit()
    db.refresh(hot_topic)
    return hot_topic
 # def update_hot_topic(db, hot_topic_id: int, updates: dict):
 #     db.query(THotTopic).filter(THotTopic.id == hot_topic_id).update(updates)
 #     db.commit()
 #     return db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first()
 def delete_hot_topic(db, hot_topic_id: int):
    hot_topic = db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first()
    if hot_topic:
        db.delete(hot_topic)
        db.commit()
    return hot_topic
--- a/database/thottopic/model.py
+++ b/database/thottopic/model.py
@ -0,0 +1,34 @@
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
 from sqlalchemy import Column, String, Integer, TIMESTAMP, func
 from sqlalchemy.dialects.postgresql import BIGINT
 from database.database import Base
@dataclass
 class THotTopic(Base):
    __tablename__ = 't_hot_topic'
    id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='序号')
    topic: str = Column(String, nullable=False, comment='话题')
    topic_description: Optional[str] = Column(String, nullable=True, comment='话题描述')
    url: Optional[str] = Column(String, nullable=True, comment='话题链接')
    source: Optional[str] = Column(String, nullable=True, comment='话题来源')
    keywords: Optional[str] = Column(String, nullable=True, comment='话题关键词')
    content_count: int = Column(Integer, default=0, nullable=False, comment='话题内容数量')
    comment_count: int = Column(Integer, default=0, nullable=False, comment='话题评论数量')
    follower_count: int = Column(Integer, default=0, nullable=False, comment='话题关注者数量')
    date_created: Optional[datetime] = Column(TIMESTAMP(timezone=True), nullable=True, comment='话题创建时间')
    date_modified: Optional[datetime] = Column(TIMESTAMP(timezone=True), nullable=True, comment='话题修改时间')
    top_content_url: Optional[str] = Column(String, nullable=True, comment='热内内容链接')
    top_content_upvote_count: Optional[int] = Column(BIGINT, nullable=True, comment='热门内容点赞数量')
    top_content_comment_count: Optional[int] = Column(Integer, nullable=True, comment='热门内容评论数量')
    create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
    update_time: Optional[datetime] = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='更新时间')
    ai_script: Optional[str] = Column(String, nullable=True, comment='内容脚本')
    def __repr__(self):
        return f"<THotTopic(topic={self.topic}, url={self.url}, id={self.id}, source={self.source}, content_count={self.content_count})>"
--- a/database/tmaterial/crud.py
+++ b/database/tmaterial/crud.py
@ -0,0 +1,129 @@
 from database.tmaterial.model import TMaterial
 from database.tnews.model import TNews
 def create_material(db, material: TMaterial):
    db.add(material)
    db.commit()
    db.refresh(material)
    return material
 def receive_news(db, news_list: list[TNews]):
    material_list = []
    for news in news_list:
        material = TMaterial()
        material.title = news.title
        material.summary = news.summary
        material.url = news.url
        material.content = news.content
        material.occurrence_date = news.occurrence_date
        material.source = news.source
        material.primary_category = news.primary_category
        material.secondary_category = news.secondary_category
        material.tertiary_category = news.tertiary_category
        material.label = news.label
        material_list.append(material)
    create_materials_if_url_not_exists(db, material_list)
 # 插入数据库之前判断数据库中是否已经存在，根据TMaterial.url 判断
 def create_material_if_url_not_exists(db, material: TMaterial):
    # 检查是否已经存在具有相同 URL 的记录
    existing_material = db.query(TMaterial).filter(TMaterial.url == material.url).first()
    if existing_material:
        # 如果记录已存在，直接返回已有的记录
        return existing_material
    # 如果记录不存在，插入新的记录
    db.add(material)
    db.commit()
    db.refresh(material)
    return material
 def create_materials_if_url_not_exists(db, material_list: list[TMaterial]):
    inserted_materials = []  # 用于保存实际插入的新闻记录
    for material in material_list:
        # 检查是否已经存在具有相同 URL 的记录
        existing_materials = db.query(TMaterial).filter(TMaterial.url == material.url).first()
        if not existing_materials:
            # 如果记录不存在，插入新的记录
            db.add(material)
            inserted_materials.append(material)
    # 批量提交所有插入的记录
    db.commit()
    # 刷新所有新插入的记录
    for material in inserted_materials:
        db.refresh(material)
    return inserted_materials
 def get_material_by_id(db, material_id: int):
    return db.query(TMaterial).filter(TMaterial.id == material_id).first()
 def get_material_need_summary(db):
    return db.query(TMaterial).filter(TMaterial.ai_summary == None).all()
 def get_materials_for_generate_reference_message(db, news_type: str) -> list[TMaterial]:
    return db.query(TMaterial).filter(
        TMaterial.type == news_type,
        TMaterial.ai_summary != None,
        TMaterial.is_usage == False
    ).order_by(TMaterial.occurrence_date.desc()).all()
 def get_materials_for_generate_real_estate_reference_message(db) -> list[TMaterial]:
    return db.query(TMaterial).filter(
        TMaterial.primary_category == '新闻类',
        TMaterial.secondary_category == '经济类',
        TMaterial.tertiary_category == '房地产',
        TMaterial.ai_summary != None,
        TMaterial.ai_summary != 'summary formate error',
        TMaterial.is_usage == False
    ).order_by(TMaterial.occurrence_date.desc()).all()
 def get_materials_for_generate_news(db) -> list[TMaterial]:
    return db.query(TMaterial).filter(
        TMaterial.label == '资讯',
        TMaterial.ai_summary != None,
        TMaterial.ai_summary != 'summary formate error',
        TMaterial.is_usage == False
    ).order_by(TMaterial.occurrence_date.desc()).all()
 def get_materials_for_generate_tech_reference_message(db) -> list[TMaterial]:
    return db.query(TMaterial).filter(
        TMaterial.primary_category == '新闻类',
        TMaterial.secondary_category == '科技类',
        TMaterial.ai_summary != None,
        TMaterial.ai_summary != 'summary formate error',
        TMaterial.is_usage == False
    ).order_by(TMaterial.occurrence_date.desc()).all()
 def update_material_by_id(db, news: TMaterial):
    db.merge(news)
    db.commit()
 def update_material(db, material_id: int, updates: dict):
    material = db.query(TMaterial).filter(TMaterial.id == material_id).first()
    if material:
        for key, value in updates.items():
            setattr(material, key, value)
        db.commit()
        db.refresh(material)
    return material
 def delete_material(db, material_id: int):
    material = db.query(TMaterial).filter(TMaterial.id == material_id).first()
    if material:
        db.delete(material)
        db.commit()
    return material
--- a/database/tmaterial/model.py
+++ b/database/tmaterial/model.py
@ -0,0 +1,29 @@
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
 from sqlalchemy import Column, String, Boolean, DateTime, BigInteger, text, INT
 from database.database import Base
@dataclass
 class TMaterial(Base):
    __tablename__ = 't_material'
    id: int = Column(BigInteger, primary_key=True, autoincrement=True, comment='编号')
    title: Optional[str] = Column(String, nullable=True, comment='标题')
    summary: Optional[str] = Column(String, nullable=True, comment='摘要')
    url: Optional[str] = Column(String, nullable=True, comment='链接')
    content: Optional[str] = Column(String, nullable=True, comment='内容/正文')
    occurrence_date: Optional[datetime] = Column(DateTime(timezone=True), nullable=True, comment='发布日期')
    source: Optional[str] = Column(String, nullable=True, comment='来源')
    primary_category: str = Column(String, nullable=True, comment='一级类别')
    secondary_category: str = Column(String, nullable=True, comment='二级类别')
    tertiary_category: str = Column(String, nullable=True, comment='三级类别')
    label: str = Column(String, nullable=True, comment='标签')
    lang: str = Column(String, nullable=False, default='zh', comment='语言')
    quality_score: int = Column(INT, nullable=True, comment='质量评分')
    ai_summary: Optional[str] = Column(String, nullable=True, comment='AI摘要')
    ai_title: Optional[str] = Column(String, nullable=True, comment='AI标题')
    ai_content: Optional[str] = Column(String, nullable=True, comment='AI正文')
    is_usage: bool = Column(Boolean, nullable=False, default=False, server_default=text('false'), comment='是否已用')
    create_time: datetime = Column(DateTime(timezone=True), nullable=False, server_default=text('now()'), comment='创建日期')
--- a/database/tnews/crud.py
+++ b/database/tnews/crud.py
@ -0,0 +1,15 @@
 from database.tnews.model import TNews
 def get_news_unprocessed(db):
    return db.query(TNews).filter(
        TNews.is_usage == False,
        TNews.content != None,
        TNews.content != 'not found element'
    ).all()
 def set_news_usage(db, news_list : list[TNews]):
    for news in news_list:
        news.is_usage = True
    db.commit()
--- a/database/tnews/model.py
+++ b/database/tnews/model.py
@ -0,0 +1,25 @@
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
 from sqlalchemy import Column, String, Boolean, DateTime, BigInteger, text, INT
 from database.database import Base
@dataclass
 class TNews(Base):
    __tablename__ = 't_news'
    id: int = Column(BigInteger, primary_key=True, autoincrement=True, comment='编号')
    title: Optional[str] = Column(String, nullable=True, comment='标题')
    summary: Optional[str] = Column(String, nullable=True, comment='摘要')
    url: Optional[str] = Column(String, nullable=True, comment='链接')
    content: Optional[str] = Column(String, nullable=True, comment='内容/正文')
    occurrence_date: Optional[datetime] = Column(DateTime(timezone=True), nullable=True, comment='发布日期')
    source: Optional[str] = Column(String, nullable=True, comment='来源')
    primary_category: str = Column(String, nullable=True, comment='一级类别')
    secondary_category: str = Column(String, nullable=True, comment='二级类别')
    tertiary_category: str = Column(String, nullable=True, comment='三级类别')
    label: str = Column(String, nullable=True, comment='标签')
    lang: str = Column(String, nullable=False, default='zh', comment='语言')
    is_usage: bool = Column(Boolean, nullable=False, default=False, server_default=text('false'), comment='是否已用')
    create_time: datetime = Column(DateTime(timezone=True), nullable=False, server_default=text('now()'), comment='创建日期')
--- a/database/tscheduler/crud.py
+++ b/database/tscheduler/crud.py
@ -0,0 +1,35 @@
 from database.tscheduler.model import TScheduler
 def create_task(db, task: TScheduler):
    db.add(task)
    db.commit()
    db.refresh(task)
    return task
 def get_task_by_id(db, task_id: int):
    return db.query(TScheduler).filter(TScheduler.id == task_id).first()
 def get_active_tasks(db):
    return db.query(TScheduler).filter(TScheduler.active == True).all()
 def get_tasks_by_executor(db, executor: str):
    return db.query(TScheduler).filter(
        TScheduler.executor == executor,
        TScheduler.active == True
    ).all()
 def update_task(db, task_id: int, updates: dict):
    task = db.query(TScheduler).filter(TScheduler.id == task_id).first()
    if task:
        for key, value in updates.items():
            setattr(task, key, value)
        db.commit()
        db.refresh(task)
    return task
 def delete_task(db, task_id: int):
    task = db.query(TScheduler).filter(TScheduler.id == task_id).first()
    if task:
        db.delete(task)
        db.commit()
    return task
--- a/database/tscheduler/model.py
+++ b/database/tscheduler/model.py
@ -0,0 +1,26 @@
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
 from sqlalchemy import Column, Integer, String, Boolean, Text, DateTime
 from database.database import Base
@dataclass
 class TScheduler(Base):
    __tablename__ = 't_scheduler'
    id: int = Column(Integer, primary_key=True, autoincrement=True, comment='自动递增的唯一任务ID')
    task_name: str = Column(String(64), nullable=False, comment='任务名称')
    trigger: str = Column(String(10), nullable=False, comment='调度方式，interval、cron、date')
    interval_seconds: Optional[int] = Column(Integer, nullable=True, comment='固定时间间隔（秒），用于 interval 类型')
    cron_expression: Optional[str] = Column(String(255), nullable=True, comment='CRON 表达式，用于 cron 类型')
    execution_date: Optional[datetime] = Column(DateTime, nullable=True, comment='执行时间，用于 date 类型')
    task_payload: Optional[str] = Column(Text, nullable=True, comment='任务相关的参数或数据')
    active: Optional[bool] = Column(Boolean, default=False, nullable=True, comment='任务状态，是否启用')
    executor: Optional[str] = Column(String(32), nullable=True, comment='任务执行者')
    handler: Optional[str] = Column(String(32), nullable=True, comment='任务执行程序')
    last_run: Optional[datetime] = Column(DateTime, nullable=True, comment='上一次执行时间')
    next_run: Optional[datetime] = Column(DateTime, nullable=True, comment='下一次执行时间')
    create_time: datetime = Column(DateTime, default=datetime.utcnow, nullable=True, comment='创建时间')
    update_time: datetime = Column(DateTime, default=datetime.utcnow, nullable=True, comment='更新时间')
    module_path: Optional[str] = Column(String(255), nullable=True, comment='任务逻辑所在模块名称')
    function_name: Optional[str] = Column(String(256), nullable=True, comment='任务逻辑的函数名称')
--- a/database/tvideoscript/video_script.py
+++ b/database/tvideoscript/video_script.py
@ -0,0 +1,57 @@
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Optional
 from sqlalchemy import Column, String, TIMESTAMP, func
 from database.database import Base, get_session
 from utils import utils
@dataclass
 class VideoScript(Base):
    __tablename__ = 't_video_script'
    id: str = Column(String, primary_key=True, comment='唯一标识')
    title: str = Column(String, nullable=False, comment='标题')
    description: Optional[str] = Column(String, nullable=True, comment='描述')
    keywords: Optional[str] = Column(String, nullable=True, comment='话题关键词')
    url: str = Column(String, nullable=False, comment='话题链接')
    script: str = Column(String, nullable=True, comment='视频脚本')
    content: str = Column(String, nullable=True, comment='话题内容')
    create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
    def __repr__(self):
        return f"<VideoScript(topic={self.topic}, url={self.url}, id={self.id}, description={self.description}, keywords={self.keywords})>"
 def create_video_script(video_script: VideoScript):
    if video_script.id is None:
        video_script.id = utils.get_md5(video_script.url)
    with get_session() as db:
        db.add(video_script)
        db.commit()
        db.refresh(video_script)
        return video_script
 def video_script_not_exists(url_list: list):
    """
    url_list如果在数据库中已经存在，则去除掉
    :param url_list:
    :return:
    """
    with get_session() as db:
        video_scripts = db.query(VideoScript).filter(VideoScript.url.in_(url_list)).all()
        for video_script in video_scripts:
            url_list.remove(video_script.url)
        return url_list
 def get_today_video_script(db):
    today = datetime.now().date()
    return db.query(VideoScript).filter(func.date(VideoScript.create_time) == today).all()
 def update_video_script(db, video_script: VideoScript):
    db.merge(video_script)
    db.commit()
    db.refresh(video_script)
    return video_script
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,7 @@
 version: "3.8"
 services:
  app:
    image: edward:latest
    container_name: edward
    restart: always
--- a/edward.py
+++ b/edward.py
@ -0,0 +1,35 @@
 import datetime
 from functools import partial
 from apscheduler.events import EVENT_JOB_ERROR
 from apscheduler.schedulers.blocking import BlockingScheduler
 from config import config
 from log.log_manager import log, logger
 from task.manager_task import manager_task
 def job_error_listener(event):
    if event.exception:
        logger.error(f"Job {event.job_id} crashed: {str(event.exception)}")
        # 可添加邮件/钉钉告警逻辑
 if __name__ == '__main__':
    scheduler = BlockingScheduler()
    # 每隔config.scheduler_interval秒执行一次任务，同时设定第一次执行在程序启动后10秒后执行
    scheduler.add_job(
        partial(manager_task, scheduler),
        'interval',
        seconds=config.scheduler_interval,
        jitter=30,  # 添加随机抖动避免任务雪崩
        next_run_time=datetime.datetime.now() + datetime.timedelta(seconds=10)  # 替代 date 触发器
    )
    # 添加任务错误监听器
    scheduler.add_listener(job_error_listener, EVENT_JOB_ERROR)
    try:
        log("started successfully.")
        scheduler.start()  # 阻塞运行
    except (KeyboardInterrupt, SystemExit):
        log("Shutting down ...")
--- a/llm/kimi.py
+++ b/llm/kimi.py
@ -0,0 +1,55 @@
 from DrissionPage import Chromium, ChromiumOptions
 from DrissionPage.errors import ElementNotFoundError
 class Kimi:
    def __init__(self, base_timeout=30):
        self.base_timeout = base_timeout
        """Initialize the Chromium browser with specified timeout settings."""
        # co = ChromiumOptions().auto_port()
        co = ChromiumOptions()
        co.set_timeouts(base=self.base_timeout)
        self.browser = Chromium(addr_or_opts=co)
        self.tab = self.browser.latest_tab
        self.tab.get('https://kimi.moonshot.cn/')
        self.first_time = True
    def chat(self, input_message, wait_time=10):
        """Send a message through the chat interface and retrieve the response."""
        try:
            chat_input = self.tab.ele('.editorContentEditable___FZJd9')
            chat_input.wait.enabled()
            chat_input.input(input_message)
            self.tab.wait(3)
            self.tab.ele('#send-button').wait.enabled()
            self.tab.ele('#send-button').click()
            self.tab.wait(wait_time)
            chat_input.input(' ')
            self.tab.ele('#send-button').wait.enabled()
            output = self.tab.ele('#^chat-markdown').text
        except ElementNotFoundError:
            output = ''
        return output
    def generate(self, input_message, wait_time=10):
        if not self.first_time:
            new_session_btn = self.tab.ele('.myAgentTool___Y1_mC')
            new_session_btn.wait.enabled()
            new_session_btn.click()
            self.tab.wait(3)
        self.first_time = False
        return self.chat(input_message, wait_time)
    def search(self, input_message, wait_time=10):
        return self.chat(input_message, wait_time)
    def quit(self):
        self.browser.quit()
 if __name__ == "__main__":
    kimi = Kimi()
    message = '今日新闻'
    response = kimi.search(message)
    kimi.quit()
    print(response)
--- a/llm/local/ollama.py
+++ b/llm/local/ollama.py
@ -0,0 +1,56 @@
 import requests
 class Ollama:
    def __init__(self, model="qwen2.5:14b", server_url="http://192.168.1.200:11434"):
        self.model = model
        self.server_url = server_url
    def is_service_running(self):
        """检查 Ollama 服务是否正在运行"""
        try:
            url = f"{self.server_url}/api/generate"
            headers = {"Content-Type": "application/json"}
            payload = {
                "model": self.model,
                "prompt": "ping",  # 使用轻量级的 prompt 测试
                "stream": False
            }
            output = requests.post(url, headers=headers, json=payload, timeout=60)
            if output.status_code == 200:
                return True
            else:
                return False
        except requests.exceptions.RequestException:
            return False
    def generate_text(self, input_message: str):
        url = f"{self.server_url}/api/generate"
        headers = {"Content-Type": "application/json"}
        payload = {
            "model": self.model,
            "prompt": input_message,
            "stream": False,  # 关闭流式传输
            "options": {
                "temperature": 0.7,
                "top_p": 0.9,
                "num_ctx": 20480
            }
        }
        try:
            output = requests.post(url, headers=headers, json=payload)
            output.raise_for_status()  # 检查HTTP请求是否成功
            result = output.json()
            return result.get("response", "No response from model.")
        except requests.exceptions.RequestException as e:
            return f"Error calling Ollama API: {e}"
 if __name__ == "__main__":
    # 示例提示文本
    ollama = Ollama()
    print("Ollama 服务是否正在运行：", ollama.is_service_running())
    prompt = "请用中文描述机器学习的基本概念。"
    response = ollama.generate_text(prompt)
    print("模型生成的文本：")
    print(response)
--- a/llm/tongyi.py
+++ b/llm/tongyi.py
@ -0,0 +1,41 @@
 from DrissionPage import Chromium, ChromiumOptions
 class TongYi:
    def __init__(self, base_timeout=30):
        self.base_timeout = base_timeout
        """Initialize the Chromium browser with specified timeout settings."""
        # co = ChromiumOptions().auto_port()
        co = ChromiumOptions()
        co.set_timeouts(base=self.base_timeout)
        self.browser = Chromium(addr_or_opts=co)
        self.tab = self.browser.latest_tab
        self.tab.get('https://tongyi.aliyun.com/')
    def chat(self, input_message, wait_time=10):
        """Send a message through the chat interface and retrieve the response."""
        chat_input = self.tab.ele('@placeholder=千事不决问通义')
        chat_input.input(input_message)
        self.tab.ele('.operateBtn--zFx6rSR0').click()
        self.tab.wait(wait_time)
        chat_input.input('over')
        self.tab.ele('.operateBtn--zFx6rSR0').wait.enabled()
        output = self.tab.ele('.containerWrap--lFLVsVCe').text
        return output
    def generate(self, input_message, wait_time=10):
        return self.chat(input_message, wait_time)
    def search(self, input_message, wait_time=10):
        return self.chat(input_message, wait_time)
    def quit(self):
        self.browser.quit()
 if __name__ == "__main__":
    tong_yi = TongYi()
    message = '今日新闻'
    response = tong_yi.search(message)
    tong_yi.quit()
    print(response)
--- a/log/log_manager.py
+++ b/log/log_manager.py
@ -0,0 +1,70 @@
 import logging.config
 import sys
 import config.config
 """
 Usage:
 1 code
 from log.log_manager import logger
 logger.info("Starting Jarvas")
 2 app start
 python demo.py --logconfig=log_prod.config
 当前目录下的log_prod.config是一份参考配置
 """
 # default logging config for development
 LOG_DEV_CONFIG = {
    "version": 1,
    "disable_existing_loggers": False,
    "loggers": {
        "root": {
            "level": "INFO",
            "handlers": ["consoleHandler"]
        }
    },
    "handlers": {
        "consoleHandler": {
            "class": "logging.StreamHandler",
            "level": "INFO",
            "formatter": "verbose",
            "stream": sys.stdout
        }
    },
    "formatters": {
        "verbose": {
            "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
            "datefmt": "%Y-%m-%d %H:%M:%S"
        }
    }
 }
 log_config_message = ""
 # 获取命令行参数
 args = sys.argv
 # 查找包含 'logconfig' 的参数
 logconfig_param = next((arg for arg in args if '--logconfig' in arg), None)
 logconfig_value = None
 if logconfig_param:
    # 如果找到了 logconfig 参数，提取其值
    _, logconfig_value = logconfig_param.split('=')  # 以 '=' 分割
    log_config_message = f"--logconfig value: {logconfig_value}"
 else:
    log_config_message = "没有找到 --logconfig 参数，使用默认log配置"
 if logconfig_value:
    # 使用入参日志配置文件
    logging.config.fileConfig(logconfig_value)
 else:
    # 使用默认日志配置
    logging.config.dictConfig(LOG_DEV_CONFIG)
 logger = logging.getLogger('root')
 # 打印日志配置信息
 logger.info(log_config_message)
 def log(message: str):
    logger.info(f'{config.config.scheduler_name} {message}')
--- a/log/log_prod.config
+++ b/log/log_prod.config
@ -0,0 +1,22 @@
 [loggers]
 keys=root
 [handlers]
 keys=fileHandler
 [formatters]
 keys=verbose
 [logger_root]
 level=INFO
 handlers=fileHandler
 [handler_fileHandler]
 class=FileHandler
 level=INFO
 formatter=verbose
 args=('x:/log/edward/app.log', 'a')
 [formatter_verbose]
 format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
 datefmt=%Y-%m-%d %H:%M:%S
--- a/log/log_test.config
+++ b/log/log_test.config
@ -0,0 +1,27 @@
 [loggers]
 keys=root
 [handlers]
 keys=fileHandler,consoleHandler
 [formatters]
 keys=verbose
 [logger_root]
 level=INFO
 handlers=fileHandler,consoleHandler
 [handler_fileHandler]
 class=FileHandler
 level=INFO
 formatter=verbose
 args=('x:/log/edward/app.log', 'a', 'utf-8')
 [handler_consoleHandler]
 class=logging.StreamHandler
 level=INFO
 formatter=verbose
 [formatter_verbose]
 format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
 datefmt=%Y-%m-%d %H:%M:%S
--- a/requirements.txt
+++ b/requirements.txt
--- a/run_ai_summary_with_local_ollama.py
+++ b/run_ai_summary_with_local_ollama.py
@ -0,0 +1,5 @@
 from task.manager_task import execute_task
 from task.summary.ai_summary_ollama import ai_summary_task
 if __name__ == '__main__':
    execute_task(ai_summary_task)
--- a/search/search_from_doubao.py
+++ b/search/search_from_doubao.py
@ -0,0 +1,31 @@
 from DrissionPage import Chromium, ChromiumOptions
 class DouBao:
    def __init__(self, base_timeout=300):
        self.base_timeout = base_timeout
        """Initialize the Chromium browser with specified timeout settings."""
        co = ChromiumOptions().auto_port()
        co.set_timeouts(base=self.base_timeout)
        self.browser = Chromium(addr_or_opts=co)
        self.tab = self.browser.latest_tab
    def search(self, input_message, wait_time=10):
        """Send a message through the chat interface and retrieve the response."""
        self.tab.get('https://www.doubao.com/')
        chat_input = self.tab.ele('@data-testid=chat_input_input')
        chat_input.input(input_message)
        self.tab.ele('#flow-end-msg-send').click()
        self.tab.wait(wait_time)
        self.tab.ele('@data-testid=chat_input_local_break_button').wait.hidden()
        output = self.tab.eles('@data-testid=message_text_content')[1].text
        return output
    def quit(self):
        self.browser.quit()
 if __name__ == "__main__":
    dou_bao = DouBao()
    message = '今日新闻'
    response = dou_bao.search(message)
    print(response)
--- a/search/search_from_kimi.py
+++ b/search/search_from_kimi.py
@ -0,0 +1,35 @@
 from DrissionPage import Chromium, ChromiumOptions
 class Kimi:
    def __init__(self, base_timeout=300):
        self.base_timeout = base_timeout
        """Initialize the Chromium browser with specified timeout settings."""
        co = ChromiumOptions().auto_port()
        co.set_timeouts(base=self.base_timeout)
        self.browser = Chromium(addr_or_opts=co)
        self.tab = self.browser.latest_tab
    def search(self, input_message, wait_time=10):
        """Send a message through the chat interface and retrieve the response."""
        self.tab.get('https://kimi.moonshot.cn/')
        chat_input = self.tab.ele('.editorContentEditable___FZJd9')
        chat_input.wait.enabled()
        chat_input.input(input_message)
        self.tab.wait(10)
        self.tab.ele('#send-button').click()
        self.tab.wait(wait_time)
        chat_input.input(' ')
        self.tab.ele('#send-button').wait.enabled()
        output = self.tab.ele('#^chat-markdown').text
        return output
    def quit(self):
        self.browser.quit()
 if __name__ == "__main__":
    kimi = Kimi()
    message = '今日新闻'
    response = kimi.search(message)
    kimi.quit()
    print(response)
--- a/search/search_from_tongyi.py
+++ b/search/search_from_tongyi.py
@ -0,0 +1,33 @@
 from DrissionPage import Chromium, ChromiumOptions
 class TongYi:
    def __init__(self, base_timeout=300):
        self.base_timeout = base_timeout
        """Initialize the Chromium browser with specified timeout settings."""
        co = ChromiumOptions().auto_port()
        co.set_timeouts(base=self.base_timeout)
        self.browser = Chromium(addr_or_opts=co)
        self.tab = self.browser.latest_tab
    def search(self, input_message, wait_time=10):
        """Send a message through the chat interface and retrieve the response."""
        self.tab.get('https://tongyi.aliyun.com/')
        chat_input = self.tab.ele('@placeholder=千事不决问通义')
        chat_input.input(input_message)
        self.tab.ele('.operateBtn--zFx6rSR0').click()
        self.tab.wait(wait_time)
        chat_input.input('over')
        self.tab.ele('.operateBtn--zFx6rSR0').wait.enabled()
        output = self.tab.ele('.containerWrap--lFLVsVCe').text
        return output
    def quit(self):
        self.browser.quit()
 if __name__ == "__main__":
    tong_yi = TongYi()
    message = '今日新闻'
    response = tong_yi.search(message)
    print(response)
--- a/search/search_manager.py
+++ b/search/search_manager.py
@ -0,0 +1,44 @@
 from database.database_manager import DatabaseManager
 from search.search_from_doubao import DouBao
 from search.search_from_kimi import Kimi
 from search.search_from_tongyi import TongYi
 def send_dispatch_content_inner(content: str, category: str, title: str):
    """send dispatch content"""
    db_manager = DatabaseManager()
    db_manager.send_dispatch_content(category=category, title=title, content=content, ai_generate=1)
 def search(input_text: str):
    """search messages"""
    kimi = Kimi()
    kimi_search_result = kimi.search(input_text)
    kimi.quit()
    tong_yi = TongYi()
    tong_yi_search_result = tong_yi.search(input_text)
    tong_yi.quit()
    dou_bao = DouBao()
    dou_bao_search_result = dou_bao.search(input_text)
    dou_bao.quit()
    return f"{kimi_search_result}\n{tong_yi_search_result}\n{dou_bao_search_result}"
 def summarize(text: str) -> str:
    """Summarizes the given text."""
    prompt = ("请根据提供的草稿，生成一篇完整的文章。\n"
              "要求去重和格式化。\n"
              "整篇文章全部采用并且只使用简洁的有序列表格式，不需要小标题，例如：\n1、今天天气不错\n2、张三捉住一只猫\n"
              "\n以下是草稿内容：\n{}").format(text)
    kimi = Kimi()
    kimi_inference_result = kimi.search(prompt)
    kimi.quit()
    print(kimi_inference_result)
    return kimi_inference_result
 def editor(subject: str, category: str, title: str):
    """这是一个编辑工具，根据提供的主题，进行素材检索和文章生成"""
    draft = search(subject)
    kimi_inference_result = summarize(draft)
    send_dispatch_content_inner(kimi_inference_result, category, title)
--- a/task/default/default_task.py
+++ b/task/default/default_task.py
@ -0,0 +1,8 @@
 import time
 from log.log_manager import logger
 def default_task():
    logger.info("default_task")
    logger.info(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
--- a/task/hot_topic/manual_execution_script.py
+++ b/task/hot_topic/manual_execution_script.py
@ -0,0 +1,67 @@
 from database.database import get_session
 from database.thotcontent.crud import get_hot_content_by_topic_id
 from database.thottopic.crud import get_latest_hot_topic, update_hot_topic, get_hot_topic_by_id
 from llm.local.ollama import Ollama
 from log.log_manager import log
 if __name__ == '__main__':
    with get_session() as db:
        # 1. 获取热点话题
        latest_hot_topic = get_hot_topic_by_id(db, 265)  # 根据话题ID获取特定的热点话题
        # latest_hot_topic = get_latest_hot_topic(db)  # 获取最新的热点话题
        topic = latest_hot_topic.topic
        print(latest_hot_topic)
        # 2. 获取话题内容
        hot_contents = get_hot_content_by_topic_id(db, latest_hot_topic.id)
        for hot_content in hot_contents:
            print(hot_content)
            # 统计hot_content.content的字数
            print(len(hot_content.content))
        topic_content = [hot_content.content for hot_content in hot_contents]
        print(topic_content)
        print(topic_content[0])
        print('----------------------------------------------------')
        print(topic_content[1])
        print('----------------------------------------------------')
        print(topic_content[2])
        print('----------------------------------------------------')
        input_message = (
            """
            你是一个专业的编辑。你的任务是根据提供的话题和素材，生成一片口述稿。
            要求如下：
            1 字数控制在200到1000字之间。
            话题如下：
            """
            +
            topic
            +
            """
            素材是三位网友的见解：
            """
            +
            """
            第一位网友说：
            """
            +
            topic_content[0]
            +
            """
            第二位网友说：
            """
            +
            topic_content[1]
            +
            """
            第三位网友说：
            """
            +
            topic_content[2]
        )
        ollama = Ollama()
        if not ollama.is_service_running():
            log("ai_summary_task finish, ollama service not running")
        else:
            result = ollama.generate_text(input_message)
            log(result)
            latest_hot_topic.ai_script = result
            update_hot_topic(db, latest_hot_topic)
--- a/task/hot_topic/script_task.py
+++ b/task/hot_topic/script_task.py
@ -0,0 +1,73 @@
 import json
 from json import JSONDecodeError
 from database.database import get_session
 from database.tvideoscript.video_script import get_today_video_script, update_video_script
 from llm.local.ollama import Ollama
 from log.log_manager import log
 from task.manager_task import execute_task
 def ai_script_task():
    with get_session() as db:
        # 1. 获取今日的热点话题列表
        video_scripts = get_today_video_script(db)
        if len(video_scripts) == 0:
            log("ai_script_task finish, task size 0")
            return
        log(f"ai script task size {len(video_scripts)}")
        ollama = Ollama()
        if not ollama.is_service_running():
            log("ai_script_task finish, ollama service not running")
            return
        for video_script in video_scripts:
            topic = video_script.title
            log(f'generate script for topic: {topic}')
            # 2. 获取话题内容
            content = video_script.content
            input_message = (
 """
 ## 角色
 - 你是一个资深编辑。
 ## 目标
 - 从输入的素材中选取有用的信息。
 ## 任务描述
 - 从contents中选取最能吸引人的段落或句子，使读者产生兴趣和共鸣。请确保这些内容具有情感张力、戏剧性、趣味性或引发思考的价值。选取两部分内容，分别使用键“content_one”和“content_two”。每部分不少于100个汉字。两部分内容字数之和不多于600个汉字。
 ## 要求
 - 严格遵守字数要求。
 - 直接输出内容。
 - 内容为JSON格式。
 ## 素材如下
 """ + content
            )
            # log(input_message)
            # 3. 调用ollama生成话题脚本
            llm_result = ollama.generate_text(input_message)
            log(llm_result)
            try:
                llm_result = json.loads(llm_result)
            except JSONDecodeError as e:
                log(f"ai_script_task error: {e}. skip topic: {topic}")
                continue
            video_script.script = (
 f"""{video_script.title}
 {video_script.description}
 一位网友说：
 {llm_result["content_one"]}
 另一位网友说:
 {llm_result["content_two"]}
 关于这个问题大家有什么看法呢？
 欢迎评论区留言
 """
            )
            # 4. 保存话题脚本
            update_video_script(db, video_script)
 if __name__ == "__main__":
    execute_task(ai_script_task)
--- a/task/manager_task.py
+++ b/task/manager_task.py
@ -0,0 +1,108 @@
 import importlib
 import time
 from functools import partial
 from apscheduler.schedulers.blocking import BlockingScheduler
 from config import config
 from database.database import get_session
 from database.tscheduler.crud import get_tasks_by_executor
 from log.log_manager import log
 """
 这是一个特殊的任务，负责管理任务，命名为管理者任务。
 工作流程：
 1 检索数据库任务数据表
 2 检查是否已经在任务队列中，如果不在则添加
 任务执行时间间隔为600秒。
 """
 def log_task_execution(task_name: str, start_time: float, end_time: float = None):
    """辅助函数，记录任务的开始和结束日志"""
    start_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(start_time))
    end_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time))
    if end_time is None:
        log(f"{task_name} start execute at {start_time_str}")
    else:
        elapsed_time = end_time - start_time
        log(f"{task_name} end execute at {end_time_str}, use time {elapsed_time:.2f} seconds")
 def execute_task(task: callable):
    """执行任务并记录日志"""
    start_time = time.time()
    log_task_execution(task.__name__, start_time)  # 先记录开始时间
    task()
    end_time = time.time()
    log_task_execution(task.__name__, start_time, end_time)  # 记录结束时间
 # 从数据库加载任务
 def load_tasks(scheduler: BlockingScheduler):
    with get_session() as db:
        tasks = get_tasks_by_executor(db, config.scheduler_name)
        for task in tasks:
            module_path = task.module_path
            function_name = task.function_name
            trigger = task.trigger
            interval_seconds = task.interval_seconds
            task_id = task.id
            # 动态导入模块和函数
            module = importlib.import_module(module_path)
            task_function = partial(execute_task, getattr(module, function_name))
            # 检查任务是否已存在
            if not scheduler.get_job(str(task_id)):
                if trigger == "interval":
                    scheduler.add_job(
                        task_function,
                        "interval",
                        seconds=interval_seconds,
                        id=str(task_id),
                        replace_existing=True
                    )
                    log(f"Task {task.task_name} added with interval {interval_seconds} seconds")
                elif trigger == "cron":
                    # 解析 cron 表达式的字段
                    fields = task.cron_expression.split()
                    # 确保字段长度符合七字段格式
                    if len(fields) != 7:
                        raise ValueError("无效的 Quartz cron 表达式")
                    # 替换 Quartz 风格的 `?` 为 APScheduler 可接受的 `*`
                    if fields[5] == '?':
                        fields[5] = '*'  # 替换 `day_of_week` 字段中的 `?`
                    # 使用 cron 表达式的字段添加任务
                    scheduler.add_job(
                        task_function,  # 要执行的任务
                        'cron',  # 使用 cron 触发器
                        second=fields[0],  # 秒
                        minute=fields[1],  # 分钟
                        hour=fields[2],  # 小时
                        day=fields[3],  # 日期
                        month=fields[4],  # 月份
                        day_of_week=fields[5],  # 星期
                        year=fields[6],  # 年份
                        id=str(task_id),
                        replace_existing=True
                    )
                    log(f"Task {task.task_name} added with cron {task.cron_expression}")
                elif trigger == "date":
                    scheduler.add_job(
                        task_function,
                        "date",
                        run_date=task["run_date_and_time"],
                        id=str(task_id),
                        replace_existing=True
                    )
                    log(f"Task {task.task_name} added with date {task.execution_date}")
                else:
                    log(f"Invalid trigger type: {trigger}")
 # 管理者任务
 def manager_task(scheduler: BlockingScheduler):
    load_tasks(scheduler)
--- a/task/material_distribution.py
+++ b/task/material_distribution.py
@ -0,0 +1,17 @@
 from database.database import get_session
 from database.tmaterial.crud import receive_news
 from database.tnews.crud import get_news_unprocessed, set_news_usage
 from log.log_manager import log
 from task.manager_task import execute_task
 def distribution_task():
    with get_session() as db:
        news_list = get_news_unprocessed(db)
        receive_news(db, news_list)
        set_news_usage(db, news_list)
        log(f'distributed {len(news_list)} news')
 if __name__ == '__main__':
    execute_task(distribution_task)
--- a/task/news/reference_message.py
+++ b/task/news/reference_message.py
@ -0,0 +1,49 @@
 import re
 import time
 from database.database import get_session
 from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content
 from database.tcontentdispatch.model import TContentDispatch
 from database.tmaterial.crud import update_material_by_id, \
    get_materials_for_generate_news
 from log.log_manager import log
 from task.manager_task import execute_task
 def generate_news_task():
    with get_session() as db:
        # 1. 构建今日新闻文章标题，格式：今日新闻yyyy-MM-dd
        title = ("今日新闻" +
                 time.strftime("%Y", time.localtime()) + '年' +
                 time.strftime("%m", time.localtime()) + '月' +
                 time.strftime("%d", time.localtime()) + '日')
        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
        content_dispatch = get_content_by_title_and_category(db, title, "新鲜事")
        content = ""
        if content_dispatch is not None:
            content = content_dispatch.content
            # 从最后一条获取并计算开始编号
            result = re.findall(r'(?<!\.)\d+\. ', content)
            start_num = int(re.findall(r'\d+', result[-1])[-1]) + 1
        else:
            content_dispatch = TContentDispatch(category="新鲜事", title=title, ai_generate=1)
            start_num = 1
        # 3. 从新闻素材数据表获取房产类的新闻列表
        news_list = get_materials_for_generate_news(db)
        # 4. 拼接成文章正文content
        for i, news in enumerate(news_list, start=start_num):  # Using enumerate to control the index starting from 1
            content += f"{i}. {news.ai_summary}\n"
        # 5. 把content写入数据库
        if content is not None and content != "" and news_list is not None:
            content_dispatch.content = content
            content_dispatch.is_sent = False
            create_or_update_content(db, content_dispatch)
        # 6. 把news_list更新入数据库更新字段is_usage为True
        for news in news_list:
            news.is_usage = True
            update_material_by_id(db, news)
        log(f"generate_news_task finish, news count {start_num + len(news_list)}")
 if __name__ == "__main__":
    execute_task(generate_news_task)
--- a/task/news/revisal.py
+++ b/task/news/revisal.py
@ -0,0 +1,54 @@
 import time
 from database.database import get_session
 from database.tcontentdispatch.curd import get_content_by_title_and_category, update
 from llm.local.ollama import Ollama
 from log.log_manager import log
 from task.manager_task import execute_task
 def ai_edit(input_message: str) -> str:
    log(f"ai_edit start execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
    ollama = Ollama()
    response = ollama.generate_text(input_message)
    log(f"ai_edit end execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
    return response
 def revisal_task():
    with get_session() as db:
        # 1. 构建今日新闻文章标题，格式：今日新闻yyyy-MM-dd
        title = ("今日新闻" +
                 time.strftime("%Y", time.localtime()) + '年' +
                 time.strftime("%m", time.localtime()) + '月' +
                 time.strftime("%d", time.localtime()) + '日')
        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
        content_dispatch = get_content_by_title_and_category(db, title, "新鲜事")
        ai_content = ""
        if content_dispatch and content_dispatch.content:
            # 3. 执行AI编辑
            input_message = (('按照规则编辑提供的内容。规则如下：\n'
                             '1 去除重复内容\n'
                             '2 不要故意删除内容\n'
                             '3 重新编号\n'
                             '4 不要出现空行\n'
                             '5 不要出现类似"以下是根据您提供的规则编辑后的内容"等提示信息，直接输出编辑后的内容\n'
                             '内容如下：\n')
                             + content_dispatch.content)
            ai_content = ai_edit(input_message)
            print(content_dispatch.content)
            print("-----------------------------------------------------------")
            print(ai_content)
        # 4. 去掉ai_content中的空行
        ai_content = "\n".join([line for line in ai_content.split("\n") if line.strip()])
        # 5. 把content写入数据库
        if ai_content:
            content_dispatch.ai_content = ai_content
            content_dispatch.is_sent = False
            update(db)
        # 获取ai_content的行数
        lines = ai_content.strip().split("\n")
        log(f"revisal news task finish, news count: {len(lines)}, news words: {len(ai_content)}")
 if __name__ == "__main__":
    execute_task(revisal_task)
--- a/task/real_estate/reference_message.py
+++ b/task/real_estate/reference_message.py
@ -0,0 +1,47 @@
 import re
 import time
 from database.database import get_session
 from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content
 from database.tcontentdispatch.model import TContentDispatch
 from database.tmaterial.crud import update_material_by_id, \
    get_materials_for_generate_real_estate_reference_message
 from task.manager_task import execute_task
 def generate_real_estate_reference_message_task():
    with get_session() as db:
        # 1. 构建楼市参考消息文章标题，格式：楼市参考消息yyyy-MM-dd
        title = ("楼市参考消息" +
                 time.strftime("%Y", time.localtime()) + '年' +
                 time.strftime("%m", time.localtime()) + '月' +
                 time.strftime("%d", time.localtime()) + '日')
        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
        content_dispatch = get_content_by_title_and_category(db, title, "房地产")
        content = ""
        if content_dispatch is not None:
            content = content_dispatch.content
            # 从最后一条获取并计算开始编号
            result = re.findall(r'(?<!\.)\d+\. ', content)
            start_num = int(re.findall(r'\d+', result[-1])[-1]) + 1
        else:
            content_dispatch = TContentDispatch(category="房地产", title=title, ai_generate=1)
            start_num = 1
        # 3. 从新闻素材数据表获取房产类的新闻列表
        news_list = get_materials_for_generate_real_estate_reference_message(db)
        # 4. 拼接成文章正文content
        for i, news in enumerate(news_list, start=start_num):  # Using enumerate to control the index starting from 1
            content += f"{i}. {news.ai_summary}\n"
        # 5. 把content写入数据库
        if content is not None and news_list is not None:
            content_dispatch.content = content
            content_dispatch.is_sent = False
            create_or_update_content(db, content_dispatch)
        # 6. 把news_list更新入数据库更新字段is_usage为True
        for news in news_list:
            news.is_usage = True
            update_material_by_id(db, news)
 if __name__ == "__main__":
    execute_task(generate_real_estate_reference_message_task)
--- a/task/real_estate/revisal.py
+++ b/task/real_estate/revisal.py
@ -0,0 +1,50 @@
 import time
 from database.database import get_session
 from database.tcontentdispatch.curd import get_content_by_title_and_category, update
 from llm.local.ollama import Ollama
 from log.log_manager import log
 from task.manager_task import execute_task
 def ai_edit_with_ollama(input_message: str) -> str:
    log(f"ai_edit_with_ollama start execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
    ollama = Ollama()
    response = ollama.generate_text(input_message)
    log(f"ai_edit_with_ollama end execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}")
    return response
 def revisal_task():
    with get_session() as db:
        # 1. 构建楼市参考消息文章标题，格式：楼市参考消息yyyy-MM-dd
        title = ("楼市参考消息" +
                 time.strftime("%Y", time.localtime()) + '年' +
                 time.strftime("%m", time.localtime()) + '月' +
                 time.strftime("%d", time.localtime()) + '日')
        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
        content_dispatch = get_content_by_title_and_category(db, title, "房地产")
        ai_content = ""
        if content_dispatch and content_dispatch.content:
            print(content_dispatch.content)
            # 3. 执行AI编辑
            input_message = (('按照规则编辑提供的内容。规则如下：\n'
                             '1 以每行内容作为一个处理单元，去掉与房地产、楼市无关的内容。如果整行内容都无关则全部去掉，否则全部保留。\n'
                             '2 去除重复内容\n'
                             '3 不要故意删除内容\n'
                             '5 不要出现空行\n'
                             '6 重新编号\n'
                             '7 不要出现类似"以下是根据您提供的规则编辑后的内容"等提示信息，直接输出编辑后的内容\n'
                             '内容如下：\n')
                             + content_dispatch.content)
            ai_content = ai_edit_with_ollama(input_message)
            print("-----------------------------------------------------------")
            print(ai_content)
        # 4. 把content写入数据库
        if ai_content:
            content_dispatch.ai_content = ai_content
            content_dispatch.is_sent = False
            update(db)
 if __name__ == "__main__":
    execute_task(revisal_task)
--- a/task/summary/ai_summary.py
+++ b/task/summary/ai_summary.py
@ -0,0 +1,38 @@
 from database.database import get_session
 from database.tmaterial.crud import get_material_need_summary, update_material
 from llm.kimi import Kimi
 from log.log_manager import log
 from task.manager_task import execute_task
 def ai_summary(input_message: str, instance) -> str:
    response = instance.generate(input_message)
    log(response)
    return response
 def ai_summary_task():
    with get_session() as db:
        news_list = get_material_need_summary(db)
        if len(news_list) == 0:
            log("ai_summary_task finish, task size 0")
            return
        kimi = Kimi()
        for news in news_list:
            # input_message = news.url + "\n阅读并生成百字以内的摘要"
            # input_message = "请将以下文章内容用一句话进行概括，提炼出最核心的观点或主题，简洁明了，不超过 100 字：\n" + news.url
            log(news.url)
            input_message = (
                    "请将以下新闻内容提炼成一句话，简洁明了地传达事件的核心信息，包括谁、什么、何时、何地和为何，字数不超过 100 字："
                    + news.url
            )
            summary = ai_summary(input_message, kimi)
            updates = {"ai_summary": summary}
            update_material(db, news.id, updates)
        kimi.quit()
        log(f"ai_summary_task finish, task size {len(news_list)}")
 if __name__ == '__main__':
    # input_message_ = "https://www.ofweek.com/ai/2024-12/ART-201700-8500-30653318.html\n阅读并生成200字以内的摘要"
    # logger.info(ai_summary(input_message_))
    execute_task(ai_summary_task)
--- a/task/summary/ai_summary_ollama.py
+++ b/task/summary/ai_summary_ollama.py
@ -0,0 +1,53 @@
 from database.database import get_session
 from database.tmaterial.crud import get_material_need_summary, update_material
 from llm.local.ollama import Ollama
 from log.log_manager import log
 from task.manager_task import execute_task
 def ai_summary(input_message: str, instance) -> str:
    response = instance.generate(input_message)
    log(response)
    return response
 def ai_summary_task():
    with get_session() as db:
        news_list = get_material_need_summary(db)
        if len(news_list) == 0:
            log("ai_summary_task finish, task size 0")
            return
        log(f"ai summary task size {len(news_list)}")
        ollama = Ollama()
        if not ollama.is_service_running():
            log("ai_summary_task finish, ollama service not running")
            return
        for news in news_list:
            input_message = (
 """
 请为以下新闻生成严格单段落的中文摘要，要求：
 1. 保持段落连贯性，不使用任何分段符号（包括空行、缩进或序号）
 2. 核心要素按此顺序呈现：
   [时间]>[地点]>[主体机构]>[关键事件]>[量化影响]
 3. 采用"总-分"结构：
   - 首句陈述核心事实（包含最关键的时间地点主体）
   - 中间展开关键细节（使用衔接词：同时/此外/值得注意的是）
   - 结尾说明当前状态/后续影响
 4. 字数严格控制在100个字符以内
 5. 禁止使用项目符号、引文格式等非连贯文本元素
 新闻原文：
 """
 + news.content
            )
            summary = ollama.generate_text(input_message)
            log(f'{news.url} {summary}')
            # 判断summary是否是一段话
            if '\n' in summary:
                summary = 'summary formate error'
            updates = {"ai_summary": summary}
            update_material(db, news.id, updates)
        log(f"ai_summary_task finish, task size {len(news_list)}")
 if __name__ == '__main__':
    execute_task(ai_summary_task)
--- a/task/tech/reference_message.py
+++ b/task/tech/reference_message.py
@ -0,0 +1,47 @@
 import re
 import time
 from database.database import get_session
 from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content
 from database.tcontentdispatch.model import TContentDispatch
 from database.tmaterial.crud import update_material_by_id, \
    get_materials_for_generate_tech_reference_message
 from task.manager_task import execute_task
 def generate_reference_message_task():
    with get_session() as db:
        # 1. 构建AI参考消息文章标题，格式：楼市参考消息yyyy-MM-dd
        title = ("科技参考消息" +
                 time.strftime("%Y", time.localtime()) + '年' +
                 time.strftime("%m", time.localtime()) + '月' +
                 time.strftime("%d", time.localtime()) + '日')
        # 2. 从内容分发数据表获取当前标题和分类的文章是否存在
        content_dispatch = get_content_by_title_and_category(db, title, "科技")
        content = ""
        if content_dispatch is not None:
            content = content_dispatch.content
            # 从最后一条获取并计算开始编号
            result = re.findall(r'(?<!\.)\d+\. ', content)
            start_num = int(re.findall(r'\d+', result[-1])[-1]) + 1
        else:
            content_dispatch = TContentDispatch(category="科技", title=title, ai_generate=1)
            start_num = 1
        # 3. 从新闻素材数据表获取AI类的新闻列表
        news_list = get_materials_for_generate_tech_reference_message(db)
        # 4. 拼接成文章正文content
        for i, news in enumerate(news_list, start=start_num):  # Using enumerate to control the index starting from 1
            content += f"{i}. {news.ai_summary}\n"
        # 5. 把content写入数据库
        if content is not None and news_list is not None:
            content_dispatch.content = content
            content_dispatch.is_sent = False
            create_or_update_content(db, content_dispatch)
        # 6. 把news_list更新入数据库更新字段is_usage为True
        for news in news_list:
            news.is_usage = True
            update_material_by_id(db, news)
 if __name__ == "__main__":
    execute_task(generate_reference_message_task)
--- a/utils/time_utils.py
+++ b/utils/time_utils.py
@ -0,0 +1,50 @@
 import datetime
 import re
 from log.log_manager import logger
 def process_time(time_str):
    """Processes and converts a time string into a datetime object."""
    current_time = datetime.datetime.now(datetime.timezone.utc)
    if '分钟前' in time_str:
        minutes = int(time_str.split('分钟前')[0])
        occurrence_time = current_time - datetime.timedelta(minutes=minutes)
    elif '小时前' in time_str:
        hours = int(time_str.split('小时前')[0])
        occurrence_time = current_time - datetime.timedelta(hours=hours)
    elif '昨天' in time_str:
        occurrence_time = current_time - datetime.timedelta(days=1)
    elif '天前' in time_str:
        occurrence_time = current_time - datetime.timedelta(days=int(time_str.split('天前')[0]))
    elif '昨天' in time_str:
        # time_str = '昨天HH:mm'
        time_part = time_str.split('昨天')[-1].strip()
        occurrence_time = (current_time - datetime.timedelta(days=1)).replace(
            hour=int(time_part.split(':')[0]),
            minute=int(time_part.split(':')[1]),
            second=0
        )
    elif '前天' in time_str:
        # time_str = '前天HH:mm'
        time_part = time_str.split('前天')[-1].strip()
        occurrence_time = (current_time - datetime.timedelta(days=2)).replace(
            hour=int(time_part.split(':')[0]),
            minute=int(time_part.split(':')[1]),
            second=0
        )
    elif '年' in time_str and '月' in time_str and '日' in time_str:
        time_pattern = r"(\d{4}年\d{1,2}月\d{1,2}日 \d{1,2}:\d{2})"
        match = re.search(time_pattern, time_str)
        time_str = match.group(1)
        occurrence_time = datetime.datetime.strptime(time_str, "%Y年%m月%d日 %H:%M")
    elif '/' in time_str:
        occurrence_time = datetime.datetime.strptime(time_str, "%Y/%m/%d %H:%M:%S")
    else:
        try:
            occurrence_time = datetime.datetime.strptime(time_str, '%Y-%m-%d')
        except ValueError:
            logger.error(f"Unable to parse date: {time_str}")
            occurrence_time = current_time
    return occurrence_time
--- a/utils/utils.py
+++ b/utils/utils.py
@ -0,0 +1,6 @@
 def get_md5(url) -> str:
    import hashlib
    m = hashlib.md5()
    m.update(url.encode('utf-8'))
    return m.hexdigest()