From 5267db8a0d2746a0f4dc1f49a80a3007a01f72f9 Mon Sep 17 00:00:00 2001 From: konjacpotato Date: Wed, 12 Nov 2025 21:19:26 +0800 Subject: [PATCH] import edward --- .dockerignore | 6 + .gitea/workflows/deploy-workflow.yml | 30 +++++ .gitignore | 1 + Dockerfile | 19 ++++ Readme.md | 9 ++ config/config.py | 4 + database/database.py | 37 +++++++ database/tcontentdispatch/curd.py | 50 +++++++++ database/tcontentdispatch/model.py | 27 +++++ database/thotcontent/crud.py | 77 +++++++++++++ database/thotcontent/model.py | 23 ++++ database/thottopic/crud.py | 87 +++++++++++++++ database/thottopic/model.py | 34 ++++++ database/tmaterial/crud.py | 129 ++++++++++++++++++++++ database/tmaterial/model.py | 29 +++++ database/tnews/crud.py | 15 +++ database/tnews/model.py | 25 +++++ database/tscheduler/crud.py | 35 ++++++ database/tscheduler/model.py | 26 +++++ database/tvideoscript/video_script.py | 57 ++++++++++ docker-compose.yml | 7 ++ edward.py | 35 ++++++ llm/kimi.py | 55 +++++++++ llm/local/ollama.py | 56 ++++++++++ llm/tongyi.py | 41 +++++++ log/log_manager.py | 70 ++++++++++++ log/log_prod.config | 22 ++++ log/log_test.config | 27 +++++ requirements.txt | Bin 0 -> 1164 bytes run_ai_summary_with_local_ollama.py | 5 + search/search_from_doubao.py | 31 ++++++ search/search_from_kimi.py | 35 ++++++ search/search_from_tongyi.py | 33 ++++++ search/search_manager.py | 44 ++++++++ task/default/default_task.py | 8 ++ task/hot_topic/manual_execution_script.py | 67 +++++++++++ task/hot_topic/script_task.py | 73 ++++++++++++ task/manager_task.py | 108 ++++++++++++++++++ task/material_distribution.py | 17 +++ task/news/reference_message.py | 49 ++++++++ task/news/revisal.py | 54 +++++++++ task/real_estate/reference_message.py | 47 ++++++++ task/real_estate/revisal.py | 50 +++++++++ task/summary/ai_summary.py | 38 +++++++ task/summary/ai_summary_ollama.py | 53 +++++++++ task/tech/reference_message.py | 47 ++++++++ utils/time_utils.py | 50 +++++++++ utils/utils.py | 6 + 48 files changed, 1848 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitea/workflows/deploy-workflow.yml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 Readme.md create mode 100644 config/config.py create mode 100644 database/database.py create mode 100644 database/tcontentdispatch/curd.py create mode 100644 database/tcontentdispatch/model.py create mode 100644 database/thotcontent/crud.py create mode 100644 database/thotcontent/model.py create mode 100644 database/thottopic/crud.py create mode 100644 database/thottopic/model.py create mode 100644 database/tmaterial/crud.py create mode 100644 database/tmaterial/model.py create mode 100644 database/tnews/crud.py create mode 100644 database/tnews/model.py create mode 100644 database/tscheduler/crud.py create mode 100644 database/tscheduler/model.py create mode 100644 database/tvideoscript/video_script.py create mode 100644 docker-compose.yml create mode 100644 edward.py create mode 100644 llm/kimi.py create mode 100644 llm/local/ollama.py create mode 100644 llm/tongyi.py create mode 100644 log/log_manager.py create mode 100644 log/log_prod.config create mode 100644 log/log_test.config create mode 100644 requirements.txt create mode 100644 run_ai_summary_with_local_ollama.py create mode 100644 search/search_from_doubao.py create mode 100644 search/search_from_kimi.py create mode 100644 search/search_from_tongyi.py create mode 100644 search/search_manager.py create mode 100644 task/default/default_task.py create mode 100644 task/hot_topic/manual_execution_script.py create mode 100644 task/hot_topic/script_task.py create mode 100644 task/manager_task.py create mode 100644 task/material_distribution.py create mode 100644 task/news/reference_message.py create mode 100644 task/news/revisal.py create mode 100644 task/real_estate/reference_message.py create mode 100644 task/real_estate/revisal.py create mode 100644 task/summary/ai_summary.py create mode 100644 task/summary/ai_summary_ollama.py create mode 100644 task/tech/reference_message.py create mode 100644 utils/time_utils.py create mode 100644 utils/utils.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..28db922 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.git +.gitea +.gitignore +Readme.md +Dockerfile +docker-compose.yml \ No newline at end of file diff --git a/.gitea/workflows/deploy-workflow.yml b/.gitea/workflows/deploy-workflow.yml new file mode 100644 index 0000000..d4730b4 --- /dev/null +++ b/.gitea/workflows/deploy-workflow.yml @@ -0,0 +1,30 @@ +name: Gitea Actions Demo +run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀 +on: [push] + +jobs: + deploy: + runs-on: ubuntu-latest + container: + image: gitea/runner-images:ubuntu-latest + steps: + - name: clone project code + run: git clone ${{ gitea.server_url }}/${{ gitea.repository }} . + - name: List files + run: ls -la + - name: Stop running containers + run: | + docker compose down || true + - name: Remove old image + run: | + IMAGE_NAME=$(basename "$PWD") + echo "Removing old image: $IMAGE_NAME" + docker images | grep "$IMAGE_NAME" && docker rmi -f $(docker images "$IMAGE_NAME" -q) || echo "No old image found." + - name: Build new image + run: | + docker build -t $(basename "$PWD"):latest . + - name: Start containers + run: | + docker compose up -d + - name: Show container status + run: docker ps \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed8ebf5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..edba11a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# 使用官方轻量级 Python 基础镜像 +FROM python:3.12-slim + +# 设置工作目录(容器内路径) +WORKDIR /app + +# 将项目文件复制到容器中 +COPY . /app + +# (可选)如果你有 requirements.txt,则先复制并安装依赖 +RUN if [ -f requirements.txt ]; then \ + pip install --no-cache-dir -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/; \ + fi + +# 设置环境变量(防止 Python 缓存文件) +ENV PYTHONUNBUFFERED=1 + +# 启动命令 +CMD ["python", "edward.py"] diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..4aaca88 --- /dev/null +++ b/Readme.md @@ -0,0 +1,9 @@ +# Edward(缩写为Ed) + +许多编辑职位上的名字为 Ed(也与“编辑”Editor的缩写一致) + +## environment + +- conda create -n edward python=3.12 +- conda activate edward +- pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple \ No newline at end of file diff --git a/config/config.py b/config/config.py new file mode 100644 index 0000000..a8ac3aa --- /dev/null +++ b/config/config.py @@ -0,0 +1,4 @@ +# scheduler name +scheduler_name = 'edward' +# scheduler interval in seconds +scheduler_interval = 3600 \ No newline at end of file diff --git a/database/database.py b/database/database.py new file mode 100644 index 0000000..c80c549 --- /dev/null +++ b/database/database.py @@ -0,0 +1,37 @@ +from contextlib import contextmanager + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, declarative_base + +from log.log_manager import logger + +Base = declarative_base() + +DATABASE_URL = 'postgresql+psycopg://postgres:K8u3fg0o@47.119.128.161:60001/squirrel' +engine = create_engine( + DATABASE_URL, + pool_size=10, + max_overflow=20, + pool_timeout=30, + pool_recycle=1800, # 防止数据库端连接过期 + connect_args={ + 'connect_timeout': 15, + 'keepalives_idle': 60, + 'keepalives_interval': 10, + 'keepalives_count': 5 + } +) +Base.metadata.create_all(engine) + +@contextmanager +def get_session(): + session = sessionmaker(bind=engine)() + try: + yield session + session.commit() # 自动提交成功的事务 + except Exception as e: + session.rollback() # 异常时回滚 + logger.error(f"Database operation failed: {str(e)}") + raise # 重新抛出异常 + finally: + session.close() # 确保会话关闭 \ No newline at end of file diff --git a/database/tcontentdispatch/curd.py b/database/tcontentdispatch/curd.py new file mode 100644 index 0000000..b453250 --- /dev/null +++ b/database/tcontentdispatch/curd.py @@ -0,0 +1,50 @@ +from database.tcontentdispatch.model import TContentDispatch + + +def create_content(db, content: TContentDispatch): + db.add(content) + db.commit() + db.refresh(content) + return content + + +def create_or_update_content(db, content: TContentDispatch): + content_in_db = db.query(TContentDispatch).filter(TContentDispatch.id == content.id).first() + + if content_in_db: + # Update existing content + db.commit() # Save changes to the database + db.refresh(content) # Refresh the object with the updated values + else: + # Create new content if not found in DB + db.add(content) + db.commit() # Save new content to the database + db.refresh(content) # Refresh to get the content's updated state (e.g., ID if it's auto-generated) + +def get_content_by_id(db, content_id: int): + return db.query(TContentDispatch).filter(TContentDispatch.id == content_id).first() + +def get_content_by_title_and_category(db, title: str, category: str): + return db.query(TContentDispatch).filter( + TContentDispatch.title == title, + TContentDispatch.category == category + ).first() + +def update_content(db, content_id: int, updates: dict): + content = db.query(TContentDispatch).filter(TContentDispatch.id == content_id).first() + if content: + for key, value in updates.items(): + setattr(content, key, value) + db.commit() + db.refresh(content) + return content + +def update(db): + db.commit() + +def delete_content(db, content_id: int): + content = db.query(TContentDispatch).filter(TContentDispatch.id == content_id).first() + if content: + db.delete(content) + db.commit() + return content diff --git a/database/tcontentdispatch/model.py b/database/tcontentdispatch/model.py new file mode 100644 index 0000000..0620818 --- /dev/null +++ b/database/tcontentdispatch/model.py @@ -0,0 +1,27 @@ +from sqlalchemy import Column, Integer, String, Text, Boolean, TIMESTAMP, func +from sqlalchemy.dialects.postgresql import BIGINT +from dataclasses import dataclass +from database.database import Base + +@dataclass +class TContentDispatch(Base): + __tablename__ = 't_content_dispatch' + + id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='自动递增的唯一任务ID') + creation_date: str = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='记录数据条目的创建时间') + is_sent: bool = Column(Boolean, default=False, nullable=False, comment='表示数据条目是否已被发送') + category: str = Column(String(255), nullable=False, comment='类别') + title: str = Column(String(255), nullable=False, comment='标题') + cover_image: str = Column(Text, nullable=True, comment='封面') + poster_image: str = Column(Text, nullable=True, comment='海报') + opening_text: str = Column(Text, nullable=True, comment='开头语') + content: str = Column(Text, nullable=False, comment='内容') + ai_content: str = Column(Text, nullable=True, comment='AI编辑的内容') + closing_text: str = Column(Text, nullable=True, comment='结束语') + is_scheduled: bool = Column(Boolean, default=False, nullable=False, comment='是否设置为定时发送') + schedule_time: str = Column(TIMESTAMP, nullable=True, comment='定时发送的具体时间') + format: str = Column(String(50), nullable=True, comment='数据条目的格式') + ai_generate: int = Column(Integer, default=0, nullable=False, comment='是否AI生成。0否,1部分参与,2是。') + + def __repr__(self): + return f"" diff --git a/database/thotcontent/crud.py b/database/thotcontent/crud.py new file mode 100644 index 0000000..3ebd192 --- /dev/null +++ b/database/thotcontent/crud.py @@ -0,0 +1,77 @@ +from database.thotcontent.model import THotContent +from log.log_manager import logger + + +def create_hot_content(db, hot_content: THotContent): + db.add(hot_content) + db.commit() + db.refresh(hot_content) + return hot_content + +# 插入数据库之前判断数据库中是否已经存在,根据news.url 判断 +def create_content_if_url_not_exists(db, hot_content: THotContent): + # 检查是否已经存在具有相同 URL 的记录 + existing_content = db.query(THotContent).filter(THotContent.url == hot_content.url).first() + + if existing_content: + # 如果记录已存在,直接返回已有的记录 + return existing_content + + # 如果记录不存在,插入新的记录 + db.add(hot_content) + db.commit() + db.refresh(hot_content) + return hot_content + + +def create_contents_top3_if_url_not_exists(db, contents: list[THotContent]): + logger.info(f"采集到内容数量:{len(contents)},存入数据库前三") + # 按照 THotContent.content_upvote_count 对contents进行排序 + contents.sort(key=lambda x: x.content_upvote_count, reverse=True) + + # 保留 contents 的前3条 + contents = contents[:3] + + inserted_contents = [] # 用于保存实际插入的新闻记录 + + for content in contents: + # 检查是否已经存在具有相同 URL 的记录 + existing_content = db.query(THotContent).filter(THotContent.url == content.url).first() + + if not existing_content: + # 如果记录不存在,插入新的记录 + db.add(content) + inserted_contents.append(content) + + # 批量提交所有插入的记录 + db.commit() + + # 刷新所有新插入的记录 + for content in inserted_contents: + db.refresh(content) + + return inserted_contents + +def get_hot_content_by_id(db, hot_content_id: int): + return db.query(THotContent).filter(THotContent.id == hot_content_id).first() + +def get_hot_content_by_topic_id(db, topic_id: int): + return db.query(THotContent).filter(THotContent.topic_id == topic_id).all() + +def get_hot_contents(db, skip: int = 0, limit: int = 100): + return db.query(THotContent).offset(skip).limit(limit).all() + +def update_hot_content(db, hot_content_id: int, updates: dict): + hot_content = db.query(THotContent).filter(THotContent.id == hot_content_id).first() + if hot_content: + for key, value in updates.items(): + setattr(hot_content, key, value) + db.commit() + db.refresh(hot_content) + return hot_content + +def delete_hot_content(db, hot_content_id: int): + hot_content = db.query(THotContent).filter(THotContent.id == hot_content_id).first() + if hot_content: + db.delete(hot_content) + db.commit() \ No newline at end of file diff --git a/database/thotcontent/model.py b/database/thotcontent/model.py new file mode 100644 index 0000000..49470e6 --- /dev/null +++ b/database/thotcontent/model.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from sqlalchemy import Column, Integer, String, BIGINT, TIMESTAMP, func +from sqlalchemy.dialects.postgresql import BIGINT + +from database.database import Base + +@dataclass +class THotContent(Base): + __tablename__ = 't_hot_content' + + id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='序号') + topic_id: int = Column(BIGINT, nullable=False, comment='关联话题ID') + url: Optional[str] = Column(String, nullable=True, comment='内容链接') + content: Optional[str] = Column(String, nullable=True, comment='内容详情') + content_upvote_count: Optional[int] = Column(BIGINT, nullable=True, comment='内容点赞数量') + content_comment_count: Optional[int] = Column(Integer, nullable=True, comment='内容评论数量') + create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间') + + def __repr__(self): + return f"" \ No newline at end of file diff --git a/database/thottopic/crud.py b/database/thottopic/crud.py new file mode 100644 index 0000000..ae3672c --- /dev/null +++ b/database/thottopic/crud.py @@ -0,0 +1,87 @@ +from datetime import datetime + +from sqlalchemy import func + +from database.thottopic.model import THotTopic + + +def create_hot_topic(db, hot_topic: THotTopic): + db.add(hot_topic) + db.commit() + db.refresh(hot_topic) + return hot_topic + + +# 插入数据库之前判断数据库中是否已经存在,根据news.url 判断 +def create_topic_if_url_not_exists(db, hot_topic: THotTopic): + # 检查是否已经存在具有相同 URL 的记录 + existing_topic = db.query(THotTopic).filter(THotTopic.url == hot_topic.url).first() + + if existing_topic: + # 如果记录已存在,直接返回已有的记录 + return existing_topic + + # 如果记录不存在,插入新的记录 + db.add(hot_topic) + db.commit() + db.refresh(hot_topic) + return hot_topic + + +def create_topics_if_url_not_exists(db, topics: list[THotTopic]): + inserted_topics = [] # 用于保存实际插入的新闻记录 + + for topic in topics: + # 检查是否已经存在具有相同 URL 的记录 + existing_topic = db.query(THotTopic).filter(THotTopic.url == topic.url).first() + + if not existing_topic: + # 如果记录不存在,插入新的记录 + db.add(topic) + inserted_topics.append(topic) + + # 批量提交所有插入的记录 + db.commit() + + # 刷新所有新插入的记录 + for topic in inserted_topics: + db.refresh(topic) + + return inserted_topics + + +def get_hot_topic_by_id(db, hot_topic_id: int): + return db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first() + + +def get_hot_topics(db, skip: int = 0, limit: int = 100): + return db.query(THotTopic).offset(skip).limit(limit).all() + +# 根据THotTopic.update_time排序,获取最新的THotTopic +def get_latest_hot_topic(db): + return db.query(THotTopic).order_by(THotTopic.update_time.desc()).first() + +# 根据THotTopic.create_time获取今日的所有THotTopic +def get_today_hot_topic(db): + today = datetime.now().date() + return db.query(THotTopic).filter(func.date(THotTopic.create_time) == today).all() + +def update_hot_topic(db, hot_topic: THotTopic): + db.merge(hot_topic) + db.commit() + db.refresh(hot_topic) + return hot_topic + + +# def update_hot_topic(db, hot_topic_id: int, updates: dict): +# db.query(THotTopic).filter(THotTopic.id == hot_topic_id).update(updates) +# db.commit() +# return db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first() + + +def delete_hot_topic(db, hot_topic_id: int): + hot_topic = db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first() + if hot_topic: + db.delete(hot_topic) + db.commit() + return hot_topic diff --git a/database/thottopic/model.py b/database/thottopic/model.py new file mode 100644 index 0000000..a2dbf51 --- /dev/null +++ b/database/thottopic/model.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from sqlalchemy import Column, String, Integer, TIMESTAMP, func +from sqlalchemy.dialects.postgresql import BIGINT + +from database.database import Base + + +@dataclass +class THotTopic(Base): + __tablename__ = 't_hot_topic' + + id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='序号') + topic: str = Column(String, nullable=False, comment='话题') + topic_description: Optional[str] = Column(String, nullable=True, comment='话题描述') + url: Optional[str] = Column(String, nullable=True, comment='话题链接') + source: Optional[str] = Column(String, nullable=True, comment='话题来源') + keywords: Optional[str] = Column(String, nullable=True, comment='话题关键词') + content_count: int = Column(Integer, default=0, nullable=False, comment='话题内容数量') + comment_count: int = Column(Integer, default=0, nullable=False, comment='话题评论数量') + follower_count: int = Column(Integer, default=0, nullable=False, comment='话题关注者数量') + date_created: Optional[datetime] = Column(TIMESTAMP(timezone=True), nullable=True, comment='话题创建时间') + date_modified: Optional[datetime] = Column(TIMESTAMP(timezone=True), nullable=True, comment='话题修改时间') + top_content_url: Optional[str] = Column(String, nullable=True, comment='热内内容链接') + top_content_upvote_count: Optional[int] = Column(BIGINT, nullable=True, comment='热门内容点赞数量') + top_content_comment_count: Optional[int] = Column(Integer, nullable=True, comment='热门内容评论数量') + create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间') + update_time: Optional[datetime] = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='更新时间') + ai_script: Optional[str] = Column(String, nullable=True, comment='内容脚本') + + def __repr__(self): + return f"" \ No newline at end of file diff --git a/database/tmaterial/crud.py b/database/tmaterial/crud.py new file mode 100644 index 0000000..be68f98 --- /dev/null +++ b/database/tmaterial/crud.py @@ -0,0 +1,129 @@ +from database.tmaterial.model import TMaterial +from database.tnews.model import TNews + + +def create_material(db, material: TMaterial): + db.add(material) + db.commit() + db.refresh(material) + return material + +def receive_news(db, news_list: list[TNews]): + material_list = [] + for news in news_list: + material = TMaterial() + material.title = news.title + material.summary = news.summary + material.url = news.url + material.content = news.content + material.occurrence_date = news.occurrence_date + material.source = news.source + material.primary_category = news.primary_category + material.secondary_category = news.secondary_category + material.tertiary_category = news.tertiary_category + material.label = news.label + material_list.append(material) + create_materials_if_url_not_exists(db, material_list) + + +# 插入数据库之前判断数据库中是否已经存在,根据TMaterial.url 判断 +def create_material_if_url_not_exists(db, material: TMaterial): + # 检查是否已经存在具有相同 URL 的记录 + existing_material = db.query(TMaterial).filter(TMaterial.url == material.url).first() + + if existing_material: + # 如果记录已存在,直接返回已有的记录 + return existing_material + + # 如果记录不存在,插入新的记录 + db.add(material) + db.commit() + db.refresh(material) + return material + + +def create_materials_if_url_not_exists(db, material_list: list[TMaterial]): + inserted_materials = [] # 用于保存实际插入的新闻记录 + + for material in material_list: + # 检查是否已经存在具有相同 URL 的记录 + existing_materials = db.query(TMaterial).filter(TMaterial.url == material.url).first() + + if not existing_materials: + # 如果记录不存在,插入新的记录 + db.add(material) + inserted_materials.append(material) + + # 批量提交所有插入的记录 + db.commit() + + # 刷新所有新插入的记录 + for material in inserted_materials: + db.refresh(material) + + return inserted_materials + + +def get_material_by_id(db, material_id: int): + return db.query(TMaterial).filter(TMaterial.id == material_id).first() + + +def get_material_need_summary(db): + return db.query(TMaterial).filter(TMaterial.ai_summary == None).all() + + +def get_materials_for_generate_reference_message(db, news_type: str) -> list[TMaterial]: + return db.query(TMaterial).filter( + TMaterial.type == news_type, + TMaterial.ai_summary != None, + TMaterial.is_usage == False + ).order_by(TMaterial.occurrence_date.desc()).all() + +def get_materials_for_generate_real_estate_reference_message(db) -> list[TMaterial]: + return db.query(TMaterial).filter( + TMaterial.primary_category == '新闻类', + TMaterial.secondary_category == '经济类', + TMaterial.tertiary_category == '房地产', + TMaterial.ai_summary != None, + TMaterial.ai_summary != 'summary formate error', + TMaterial.is_usage == False + ).order_by(TMaterial.occurrence_date.desc()).all() + +def get_materials_for_generate_news(db) -> list[TMaterial]: + return db.query(TMaterial).filter( + TMaterial.label == '资讯', + TMaterial.ai_summary != None, + TMaterial.ai_summary != 'summary formate error', + TMaterial.is_usage == False + ).order_by(TMaterial.occurrence_date.desc()).all() + +def get_materials_for_generate_tech_reference_message(db) -> list[TMaterial]: + return db.query(TMaterial).filter( + TMaterial.primary_category == '新闻类', + TMaterial.secondary_category == '科技类', + TMaterial.ai_summary != None, + TMaterial.ai_summary != 'summary formate error', + TMaterial.is_usage == False + ).order_by(TMaterial.occurrence_date.desc()).all() + +def update_material_by_id(db, news: TMaterial): + db.merge(news) + db.commit() + + +def update_material(db, material_id: int, updates: dict): + material = db.query(TMaterial).filter(TMaterial.id == material_id).first() + if material: + for key, value in updates.items(): + setattr(material, key, value) + db.commit() + db.refresh(material) + return material + + +def delete_material(db, material_id: int): + material = db.query(TMaterial).filter(TMaterial.id == material_id).first() + if material: + db.delete(material) + db.commit() + return material diff --git a/database/tmaterial/model.py b/database/tmaterial/model.py new file mode 100644 index 0000000..60f5d96 --- /dev/null +++ b/database/tmaterial/model.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from sqlalchemy import Column, String, Boolean, DateTime, BigInteger, text, INT +from database.database import Base + +@dataclass +class TMaterial(Base): + __tablename__ = 't_material' + + id: int = Column(BigInteger, primary_key=True, autoincrement=True, comment='编号') + title: Optional[str] = Column(String, nullable=True, comment='标题') + summary: Optional[str] = Column(String, nullable=True, comment='摘要') + url: Optional[str] = Column(String, nullable=True, comment='链接') + content: Optional[str] = Column(String, nullable=True, comment='内容/正文') + occurrence_date: Optional[datetime] = Column(DateTime(timezone=True), nullable=True, comment='发布日期') + source: Optional[str] = Column(String, nullable=True, comment='来源') + primary_category: str = Column(String, nullable=True, comment='一级类别') + secondary_category: str = Column(String, nullable=True, comment='二级类别') + tertiary_category: str = Column(String, nullable=True, comment='三级类别') + label: str = Column(String, nullable=True, comment='标签') + lang: str = Column(String, nullable=False, default='zh', comment='语言') + quality_score: int = Column(INT, nullable=True, comment='质量评分') + ai_summary: Optional[str] = Column(String, nullable=True, comment='AI摘要') + ai_title: Optional[str] = Column(String, nullable=True, comment='AI标题') + ai_content: Optional[str] = Column(String, nullable=True, comment='AI正文') + is_usage: bool = Column(Boolean, nullable=False, default=False, server_default=text('false'), comment='是否已用') + create_time: datetime = Column(DateTime(timezone=True), nullable=False, server_default=text('now()'), comment='创建日期') \ No newline at end of file diff --git a/database/tnews/crud.py b/database/tnews/crud.py new file mode 100644 index 0000000..261751c --- /dev/null +++ b/database/tnews/crud.py @@ -0,0 +1,15 @@ +from database.tnews.model import TNews + + +def get_news_unprocessed(db): + return db.query(TNews).filter( + TNews.is_usage == False, + TNews.content != None, + TNews.content != 'not found element' + ).all() + +def set_news_usage(db, news_list : list[TNews]): + for news in news_list: + news.is_usage = True + db.commit() + diff --git a/database/tnews/model.py b/database/tnews/model.py new file mode 100644 index 0000000..7c683d6 --- /dev/null +++ b/database/tnews/model.py @@ -0,0 +1,25 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from sqlalchemy import Column, String, Boolean, DateTime, BigInteger, text, INT +from database.database import Base + +@dataclass +class TNews(Base): + __tablename__ = 't_news' + + id: int = Column(BigInteger, primary_key=True, autoincrement=True, comment='编号') + title: Optional[str] = Column(String, nullable=True, comment='标题') + summary: Optional[str] = Column(String, nullable=True, comment='摘要') + url: Optional[str] = Column(String, nullable=True, comment='链接') + content: Optional[str] = Column(String, nullable=True, comment='内容/正文') + occurrence_date: Optional[datetime] = Column(DateTime(timezone=True), nullable=True, comment='发布日期') + source: Optional[str] = Column(String, nullable=True, comment='来源') + primary_category: str = Column(String, nullable=True, comment='一级类别') + secondary_category: str = Column(String, nullable=True, comment='二级类别') + tertiary_category: str = Column(String, nullable=True, comment='三级类别') + label: str = Column(String, nullable=True, comment='标签') + lang: str = Column(String, nullable=False, default='zh', comment='语言') + is_usage: bool = Column(Boolean, nullable=False, default=False, server_default=text('false'), comment='是否已用') + create_time: datetime = Column(DateTime(timezone=True), nullable=False, server_default=text('now()'), comment='创建日期') \ No newline at end of file diff --git a/database/tscheduler/crud.py b/database/tscheduler/crud.py new file mode 100644 index 0000000..6f11fce --- /dev/null +++ b/database/tscheduler/crud.py @@ -0,0 +1,35 @@ +from database.tscheduler.model import TScheduler + +def create_task(db, task: TScheduler): + db.add(task) + db.commit() + db.refresh(task) + return task + +def get_task_by_id(db, task_id: int): + return db.query(TScheduler).filter(TScheduler.id == task_id).first() + +def get_active_tasks(db): + return db.query(TScheduler).filter(TScheduler.active == True).all() + +def get_tasks_by_executor(db, executor: str): + return db.query(TScheduler).filter( + TScheduler.executor == executor, + TScheduler.active == True + ).all() + +def update_task(db, task_id: int, updates: dict): + task = db.query(TScheduler).filter(TScheduler.id == task_id).first() + if task: + for key, value in updates.items(): + setattr(task, key, value) + db.commit() + db.refresh(task) + return task + +def delete_task(db, task_id: int): + task = db.query(TScheduler).filter(TScheduler.id == task_id).first() + if task: + db.delete(task) + db.commit() + return task \ No newline at end of file diff --git a/database/tscheduler/model.py b/database/tscheduler/model.py new file mode 100644 index 0000000..9cc29fc --- /dev/null +++ b/database/tscheduler/model.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional +from sqlalchemy import Column, Integer, String, Boolean, Text, DateTime +from database.database import Base + +@dataclass +class TScheduler(Base): + __tablename__ = 't_scheduler' + + id: int = Column(Integer, primary_key=True, autoincrement=True, comment='自动递增的唯一任务ID') + task_name: str = Column(String(64), nullable=False, comment='任务名称') + trigger: str = Column(String(10), nullable=False, comment='调度方式,interval、cron、date') + interval_seconds: Optional[int] = Column(Integer, nullable=True, comment='固定时间间隔(秒),用于 interval 类型') + cron_expression: Optional[str] = Column(String(255), nullable=True, comment='CRON 表达式,用于 cron 类型') + execution_date: Optional[datetime] = Column(DateTime, nullable=True, comment='执行时间,用于 date 类型') + task_payload: Optional[str] = Column(Text, nullable=True, comment='任务相关的参数或数据') + active: Optional[bool] = Column(Boolean, default=False, nullable=True, comment='任务状态,是否启用') + executor: Optional[str] = Column(String(32), nullable=True, comment='任务执行者') + handler: Optional[str] = Column(String(32), nullable=True, comment='任务执行程序') + last_run: Optional[datetime] = Column(DateTime, nullable=True, comment='上一次执行时间') + next_run: Optional[datetime] = Column(DateTime, nullable=True, comment='下一次执行时间') + create_time: datetime = Column(DateTime, default=datetime.utcnow, nullable=True, comment='创建时间') + update_time: datetime = Column(DateTime, default=datetime.utcnow, nullable=True, comment='更新时间') + module_path: Optional[str] = Column(String(255), nullable=True, comment='任务逻辑所在模块名称') + function_name: Optional[str] = Column(String(256), nullable=True, comment='任务逻辑的函数名称') \ No newline at end of file diff --git a/database/tvideoscript/video_script.py b/database/tvideoscript/video_script.py new file mode 100644 index 0000000..5dc58d9 --- /dev/null +++ b/database/tvideoscript/video_script.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from sqlalchemy import Column, String, TIMESTAMP, func + +from database.database import Base, get_session +from utils import utils + + +@dataclass +class VideoScript(Base): + __tablename__ = 't_video_script' + + id: str = Column(String, primary_key=True, comment='唯一标识') + title: str = Column(String, nullable=False, comment='标题') + description: Optional[str] = Column(String, nullable=True, comment='描述') + keywords: Optional[str] = Column(String, nullable=True, comment='话题关键词') + url: str = Column(String, nullable=False, comment='话题链接') + script: str = Column(String, nullable=True, comment='视频脚本') + content: str = Column(String, nullable=True, comment='话题内容') + create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间') + + def __repr__(self): + return f"" + +def create_video_script(video_script: VideoScript): + if video_script.id is None: + video_script.id = utils.get_md5(video_script.url) + + with get_session() as db: + db.add(video_script) + db.commit() + db.refresh(video_script) + return video_script + +def video_script_not_exists(url_list: list): + """ + url_list如果在数据库中已经存在,则去除掉 + :param url_list: + :return: + """ + with get_session() as db: + video_scripts = db.query(VideoScript).filter(VideoScript.url.in_(url_list)).all() + for video_script in video_scripts: + url_list.remove(video_script.url) + return url_list + +def get_today_video_script(db): + today = datetime.now().date() + return db.query(VideoScript).filter(func.date(VideoScript.create_time) == today).all() + +def update_video_script(db, video_script: VideoScript): + db.merge(video_script) + db.commit() + db.refresh(video_script) + return video_script \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1992692 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,7 @@ +version: "3.8" + +services: + app: + image: edward:latest + container_name: edward + restart: always diff --git a/edward.py b/edward.py new file mode 100644 index 0000000..706bf84 --- /dev/null +++ b/edward.py @@ -0,0 +1,35 @@ +import datetime +from functools import partial + +from apscheduler.events import EVENT_JOB_ERROR +from apscheduler.schedulers.blocking import BlockingScheduler + +from config import config +from log.log_manager import log, logger +from task.manager_task import manager_task + + +def job_error_listener(event): + if event.exception: + logger.error(f"Job {event.job_id} crashed: {str(event.exception)}") + # 可添加邮件/钉钉告警逻辑 + +if __name__ == '__main__': + scheduler = BlockingScheduler() + # 每隔config.scheduler_interval秒执行一次任务,同时设定第一次执行在程序启动后10秒后执行 + scheduler.add_job( + partial(manager_task, scheduler), + 'interval', + seconds=config.scheduler_interval, + jitter=30, # 添加随机抖动避免任务雪崩 + next_run_time=datetime.datetime.now() + datetime.timedelta(seconds=10) # 替代 date 触发器 + ) + + # 添加任务错误监听器 + scheduler.add_listener(job_error_listener, EVENT_JOB_ERROR) + + try: + log("started successfully.") + scheduler.start() # 阻塞运行 + except (KeyboardInterrupt, SystemExit): + log("Shutting down ...") diff --git a/llm/kimi.py b/llm/kimi.py new file mode 100644 index 0000000..b88c42a --- /dev/null +++ b/llm/kimi.py @@ -0,0 +1,55 @@ +from DrissionPage import Chromium, ChromiumOptions +from DrissionPage.errors import ElementNotFoundError + + +class Kimi: + def __init__(self, base_timeout=30): + self.base_timeout = base_timeout + """Initialize the Chromium browser with specified timeout settings.""" + # co = ChromiumOptions().auto_port() + co = ChromiumOptions() + co.set_timeouts(base=self.base_timeout) + self.browser = Chromium(addr_or_opts=co) + self.tab = self.browser.latest_tab + self.tab.get('https://kimi.moonshot.cn/') + self.first_time = True + + def chat(self, input_message, wait_time=10): + """Send a message through the chat interface and retrieve the response.""" + try: + chat_input = self.tab.ele('.editorContentEditable___FZJd9') + chat_input.wait.enabled() + chat_input.input(input_message) + self.tab.wait(3) + self.tab.ele('#send-button').wait.enabled() + self.tab.ele('#send-button').click() + self.tab.wait(wait_time) + chat_input.input(' ') + self.tab.ele('#send-button').wait.enabled() + output = self.tab.ele('#^chat-markdown').text + except ElementNotFoundError: + output = '' + return output + + def generate(self, input_message, wait_time=10): + if not self.first_time: + new_session_btn = self.tab.ele('.myAgentTool___Y1_mC') + new_session_btn.wait.enabled() + new_session_btn.click() + self.tab.wait(3) + self.first_time = False + return self.chat(input_message, wait_time) + + def search(self, input_message, wait_time=10): + return self.chat(input_message, wait_time) + + def quit(self): + self.browser.quit() + +if __name__ == "__main__": + kimi = Kimi() + message = '今日新闻' + response = kimi.search(message) + kimi.quit() + + print(response) diff --git a/llm/local/ollama.py b/llm/local/ollama.py new file mode 100644 index 0000000..434d772 --- /dev/null +++ b/llm/local/ollama.py @@ -0,0 +1,56 @@ +import requests + + +class Ollama: + def __init__(self, model="qwen2.5:14b", server_url="http://192.168.1.200:11434"): + self.model = model + self.server_url = server_url + + def is_service_running(self): + """检查 Ollama 服务是否正在运行""" + try: + url = f"{self.server_url}/api/generate" + headers = {"Content-Type": "application/json"} + payload = { + "model": self.model, + "prompt": "ping", # 使用轻量级的 prompt 测试 + "stream": False + } + output = requests.post(url, headers=headers, json=payload, timeout=60) + if output.status_code == 200: + return True + else: + return False + except requests.exceptions.RequestException: + return False + + def generate_text(self, input_message: str): + url = f"{self.server_url}/api/generate" + headers = {"Content-Type": "application/json"} + payload = { + "model": self.model, + "prompt": input_message, + "stream": False, # 关闭流式传输 + "options": { + "temperature": 0.7, + "top_p": 0.9, + "num_ctx": 20480 + } + } + try: + output = requests.post(url, headers=headers, json=payload) + output.raise_for_status() # 检查HTTP请求是否成功 + result = output.json() + return result.get("response", "No response from model.") + except requests.exceptions.RequestException as e: + return f"Error calling Ollama API: {e}" + + +if __name__ == "__main__": + # 示例提示文本 + ollama = Ollama() + print("Ollama 服务是否正在运行:", ollama.is_service_running()) + prompt = "请用中文描述机器学习的基本概念。" + response = ollama.generate_text(prompt) + print("模型生成的文本:") + print(response) diff --git a/llm/tongyi.py b/llm/tongyi.py new file mode 100644 index 0000000..943d7ff --- /dev/null +++ b/llm/tongyi.py @@ -0,0 +1,41 @@ +from DrissionPage import Chromium, ChromiumOptions + +class TongYi: + def __init__(self, base_timeout=30): + self.base_timeout = base_timeout + """Initialize the Chromium browser with specified timeout settings.""" + # co = ChromiumOptions().auto_port() + co = ChromiumOptions() + co.set_timeouts(base=self.base_timeout) + self.browser = Chromium(addr_or_opts=co) + self.tab = self.browser.latest_tab + self.tab.get('https://tongyi.aliyun.com/') + + def chat(self, input_message, wait_time=10): + """Send a message through the chat interface and retrieve the response.""" + chat_input = self.tab.ele('@placeholder=千事不决问通义') + chat_input.input(input_message) + self.tab.ele('.operateBtn--zFx6rSR0').click() + self.tab.wait(wait_time) + chat_input.input('over') + self.tab.ele('.operateBtn--zFx6rSR0').wait.enabled() + output = self.tab.ele('.containerWrap--lFLVsVCe').text + return output + + def generate(self, input_message, wait_time=10): + return self.chat(input_message, wait_time) + + def search(self, input_message, wait_time=10): + return self.chat(input_message, wait_time) + + def quit(self): + self.browser.quit() + + +if __name__ == "__main__": + tong_yi = TongYi() + message = '今日新闻' + response = tong_yi.search(message) + tong_yi.quit() + + print(response) diff --git a/log/log_manager.py b/log/log_manager.py new file mode 100644 index 0000000..d7b5dd5 --- /dev/null +++ b/log/log_manager.py @@ -0,0 +1,70 @@ +import logging.config +import sys + +import config.config + +""" +Usage: +1 code +from log.log_manager import logger +logger.info("Starting Jarvas") +2 app start +python demo.py --logconfig=log_prod.config + +当前目录下的log_prod.config是一份参考配置 +""" + +# default logging config for development +LOG_DEV_CONFIG = { + "version": 1, + "disable_existing_loggers": False, + "loggers": { + "root": { + "level": "INFO", + "handlers": ["consoleHandler"] + } + }, + "handlers": { + "consoleHandler": { + "class": "logging.StreamHandler", + "level": "INFO", + "formatter": "verbose", + "stream": sys.stdout + } + }, + "formatters": { + "verbose": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S" + } + } +} + +log_config_message = "" +# 获取命令行参数 +args = sys.argv + +# 查找包含 'logconfig' 的参数 +logconfig_param = next((arg for arg in args if '--logconfig' in arg), None) +logconfig_value = None +if logconfig_param: + # 如果找到了 logconfig 参数,提取其值 + _, logconfig_value = logconfig_param.split('=') # 以 '=' 分割 + log_config_message = f"--logconfig value: {logconfig_value}" +else: + log_config_message = "没有找到 --logconfig 参数,使用默认log配置" + +if logconfig_value: + # 使用入参日志配置文件 + logging.config.fileConfig(logconfig_value) +else: + # 使用默认日志配置 + logging.config.dictConfig(LOG_DEV_CONFIG) + +logger = logging.getLogger('root') +# 打印日志配置信息 +logger.info(log_config_message) + + +def log(message: str): + logger.info(f'{config.config.scheduler_name} {message}') diff --git a/log/log_prod.config b/log/log_prod.config new file mode 100644 index 0000000..0a1ffc2 --- /dev/null +++ b/log/log_prod.config @@ -0,0 +1,22 @@ +[loggers] +keys=root + +[handlers] +keys=fileHandler + +[formatters] +keys=verbose + +[logger_root] +level=INFO +handlers=fileHandler + +[handler_fileHandler] +class=FileHandler +level=INFO +formatter=verbose +args=('x:/log/edward/app.log', 'a') + +[formatter_verbose] +format=%(asctime)s - %(name)s - %(levelname)s - %(message)s +datefmt=%Y-%m-%d %H:%M:%S diff --git a/log/log_test.config b/log/log_test.config new file mode 100644 index 0000000..2523030 --- /dev/null +++ b/log/log_test.config @@ -0,0 +1,27 @@ +[loggers] +keys=root + +[handlers] +keys=fileHandler,consoleHandler + +[formatters] +keys=verbose + +[logger_root] +level=INFO +handlers=fileHandler,consoleHandler + +[handler_fileHandler] +class=FileHandler +level=INFO +formatter=verbose +args=('x:/log/edward/app.log', 'a', 'utf-8') + +[handler_consoleHandler] +class=logging.StreamHandler +level=INFO +formatter=verbose + +[formatter_verbose] +format=%(asctime)s - %(name)s - %(levelname)s - %(message)s +datefmt=%Y-%m-%d %H:%M:%S diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6d887ae8b6177ddae3fb23a8234478c5edb7ca18 GIT binary patch literal 1164 zcmZWp+fKqz5S(WdKLvu6+lvnv-n>NR0}K?z#sU@E0DinWv)iRnb8-T7XJ=-&yL^9* z@rWr>ykUt2D&+j!;hHBw8&^DSba06a++fDY5)Zs7@17?^$*b?m=n&iG9T{GjoyA{? zQM^k;7d=C?$?v?<7W1C?5-n`7<*zlL<;?pumhhw8$Wk=LyD^XWTuz6z@|L+`nHf)b zQ}&=n$h{O*Z%SQZ#328!FkyDd`)mlYEq-|!7+Rf~Q0qY7(wq@>OJ~mLY~gD{j?8p3 zA~)>(3@dUEjNyQ1=vM;^s07RLl4 zqwfv_}Dj&e=`8g*kcTeaEcu-=J0b2h}vCC2#mmtdEH?{>8NZ^}DIg=d_}?k`6U#&N;OH zXcE0YQ#|1gW5#{E*0sBMdr3WuB@vpeBl$G3F^%fBRNZ08?853?aV8wC_o*G!EV&1t zA}MsOu9Ew+AgAWo>y8a<;yyRpHX)?yv5IbTo{arX&Rr9?J37$CoVq@BzPqX6<*V7K UrXvUYm(k8iN&gl-)AJeq0SrQ_e*gdg literal 0 HcmV?d00001 diff --git a/run_ai_summary_with_local_ollama.py b/run_ai_summary_with_local_ollama.py new file mode 100644 index 0000000..9e4d0c2 --- /dev/null +++ b/run_ai_summary_with_local_ollama.py @@ -0,0 +1,5 @@ +from task.manager_task import execute_task +from task.summary.ai_summary_ollama import ai_summary_task + +if __name__ == '__main__': + execute_task(ai_summary_task) \ No newline at end of file diff --git a/search/search_from_doubao.py b/search/search_from_doubao.py new file mode 100644 index 0000000..01933f8 --- /dev/null +++ b/search/search_from_doubao.py @@ -0,0 +1,31 @@ +from DrissionPage import Chromium, ChromiumOptions + +class DouBao: + def __init__(self, base_timeout=300): + self.base_timeout = base_timeout + """Initialize the Chromium browser with specified timeout settings.""" + co = ChromiumOptions().auto_port() + co.set_timeouts(base=self.base_timeout) + self.browser = Chromium(addr_or_opts=co) + self.tab = self.browser.latest_tab + + def search(self, input_message, wait_time=10): + """Send a message through the chat interface and retrieve the response.""" + self.tab.get('https://www.doubao.com/') + chat_input = self.tab.ele('@data-testid=chat_input_input') + chat_input.input(input_message) + self.tab.ele('#flow-end-msg-send').click() + self.tab.wait(wait_time) + self.tab.ele('@data-testid=chat_input_local_break_button').wait.hidden() + output = self.tab.eles('@data-testid=message_text_content')[1].text + return output + + def quit(self): + self.browser.quit() + +if __name__ == "__main__": + dou_bao = DouBao() + message = '今日新闻' + response = dou_bao.search(message) + + print(response) diff --git a/search/search_from_kimi.py b/search/search_from_kimi.py new file mode 100644 index 0000000..26c02dc --- /dev/null +++ b/search/search_from_kimi.py @@ -0,0 +1,35 @@ +from DrissionPage import Chromium, ChromiumOptions + +class Kimi: + def __init__(self, base_timeout=300): + self.base_timeout = base_timeout + """Initialize the Chromium browser with specified timeout settings.""" + co = ChromiumOptions().auto_port() + co.set_timeouts(base=self.base_timeout) + self.browser = Chromium(addr_or_opts=co) + self.tab = self.browser.latest_tab + + def search(self, input_message, wait_time=10): + """Send a message through the chat interface and retrieve the response.""" + self.tab.get('https://kimi.moonshot.cn/') + chat_input = self.tab.ele('.editorContentEditable___FZJd9') + chat_input.wait.enabled() + chat_input.input(input_message) + self.tab.wait(10) + self.tab.ele('#send-button').click() + self.tab.wait(wait_time) + chat_input.input(' ') + self.tab.ele('#send-button').wait.enabled() + output = self.tab.ele('#^chat-markdown').text + return output + + def quit(self): + self.browser.quit() + +if __name__ == "__main__": + kimi = Kimi() + message = '今日新闻' + response = kimi.search(message) + kimi.quit() + + print(response) diff --git a/search/search_from_tongyi.py b/search/search_from_tongyi.py new file mode 100644 index 0000000..1425f4d --- /dev/null +++ b/search/search_from_tongyi.py @@ -0,0 +1,33 @@ +from DrissionPage import Chromium, ChromiumOptions + +class TongYi: + def __init__(self, base_timeout=300): + self.base_timeout = base_timeout + """Initialize the Chromium browser with specified timeout settings.""" + co = ChromiumOptions().auto_port() + co.set_timeouts(base=self.base_timeout) + self.browser = Chromium(addr_or_opts=co) + self.tab = self.browser.latest_tab + + def search(self, input_message, wait_time=10): + """Send a message through the chat interface and retrieve the response.""" + self.tab.get('https://tongyi.aliyun.com/') + chat_input = self.tab.ele('@placeholder=千事不决问通义') + chat_input.input(input_message) + self.tab.ele('.operateBtn--zFx6rSR0').click() + self.tab.wait(wait_time) + chat_input.input('over') + self.tab.ele('.operateBtn--zFx6rSR0').wait.enabled() + output = self.tab.ele('.containerWrap--lFLVsVCe').text + return output + + def quit(self): + self.browser.quit() + + +if __name__ == "__main__": + tong_yi = TongYi() + message = '今日新闻' + response = tong_yi.search(message) + + print(response) diff --git a/search/search_manager.py b/search/search_manager.py new file mode 100644 index 0000000..e1325b8 --- /dev/null +++ b/search/search_manager.py @@ -0,0 +1,44 @@ +from database.database_manager import DatabaseManager +from search.search_from_doubao import DouBao +from search.search_from_kimi import Kimi +from search.search_from_tongyi import TongYi + + +def send_dispatch_content_inner(content: str, category: str, title: str): + """send dispatch content""" + db_manager = DatabaseManager() + db_manager.send_dispatch_content(category=category, title=title, content=content, ai_generate=1) + + +def search(input_text: str): + """search messages""" + kimi = Kimi() + kimi_search_result = kimi.search(input_text) + kimi.quit() + tong_yi = TongYi() + tong_yi_search_result = tong_yi.search(input_text) + tong_yi.quit() + dou_bao = DouBao() + dou_bao_search_result = dou_bao.search(input_text) + dou_bao.quit() + return f"{kimi_search_result}\n{tong_yi_search_result}\n{dou_bao_search_result}" + + +def summarize(text: str) -> str: + """Summarizes the given text.""" + prompt = ("请根据提供的草稿,生成一篇完整的文章。\n" + "要求去重和格式化。\n" + "整篇文章全部采用并且只使用简洁的有序列表格式,不需要小标题,例如:\n1、今天天气不错\n2、张三捉住一只猫\n" + "\n以下是草稿内容:\n{}").format(text) + kimi = Kimi() + kimi_inference_result = kimi.search(prompt) + kimi.quit() + print(kimi_inference_result) + return kimi_inference_result + + +def editor(subject: str, category: str, title: str): + """这是一个编辑工具,根据提供的主题,进行素材检索和文章生成""" + draft = search(subject) + kimi_inference_result = summarize(draft) + send_dispatch_content_inner(kimi_inference_result, category, title) diff --git a/task/default/default_task.py b/task/default/default_task.py new file mode 100644 index 0000000..ecaf21b --- /dev/null +++ b/task/default/default_task.py @@ -0,0 +1,8 @@ +import time + +from log.log_manager import logger + + +def default_task(): + logger.info("default_task") + logger.info(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) \ No newline at end of file diff --git a/task/hot_topic/manual_execution_script.py b/task/hot_topic/manual_execution_script.py new file mode 100644 index 0000000..526ff86 --- /dev/null +++ b/task/hot_topic/manual_execution_script.py @@ -0,0 +1,67 @@ +from database.database import get_session +from database.thotcontent.crud import get_hot_content_by_topic_id +from database.thottopic.crud import get_latest_hot_topic, update_hot_topic, get_hot_topic_by_id +from llm.local.ollama import Ollama +from log.log_manager import log + +if __name__ == '__main__': + with get_session() as db: + # 1. 获取热点话题 + latest_hot_topic = get_hot_topic_by_id(db, 265) # 根据话题ID获取特定的热点话题 + # latest_hot_topic = get_latest_hot_topic(db) # 获取最新的热点话题 + topic = latest_hot_topic.topic + print(latest_hot_topic) + # 2. 获取话题内容 + hot_contents = get_hot_content_by_topic_id(db, latest_hot_topic.id) + for hot_content in hot_contents: + print(hot_content) + # 统计hot_content.content的字数 + print(len(hot_content.content)) + topic_content = [hot_content.content for hot_content in hot_contents] + print(topic_content) + print(topic_content[0]) + print('----------------------------------------------------') + print(topic_content[1]) + print('----------------------------------------------------') + print(topic_content[2]) + print('----------------------------------------------------') + input_message = ( + """ + 你是一个专业的编辑。你的任务是根据提供的话题和素材,生成一片口述稿。 + 要求如下: + 1 字数控制在200到1000字之间。 + 话题如下: + """ + + + topic + + + """ + 素材是三位网友的见解: + """ + + + """ + 第一位网友说: + """ + + + topic_content[0] + + + """ + 第二位网友说: + """ + + + topic_content[1] + + + """ + 第三位网友说: + """ + + + topic_content[2] + ) + ollama = Ollama() + if not ollama.is_service_running(): + log("ai_summary_task finish, ollama service not running") + else: + result = ollama.generate_text(input_message) + log(result) + latest_hot_topic.ai_script = result + update_hot_topic(db, latest_hot_topic) diff --git a/task/hot_topic/script_task.py b/task/hot_topic/script_task.py new file mode 100644 index 0000000..e927769 --- /dev/null +++ b/task/hot_topic/script_task.py @@ -0,0 +1,73 @@ +import json +from json import JSONDecodeError + +from database.database import get_session +from database.tvideoscript.video_script import get_today_video_script, update_video_script +from llm.local.ollama import Ollama +from log.log_manager import log +from task.manager_task import execute_task + + +def ai_script_task(): + with get_session() as db: + # 1. 获取今日的热点话题列表 + video_scripts = get_today_video_script(db) + if len(video_scripts) == 0: + log("ai_script_task finish, task size 0") + return + log(f"ai script task size {len(video_scripts)}") + + ollama = Ollama() + if not ollama.is_service_running(): + log("ai_script_task finish, ollama service not running") + return + + for video_script in video_scripts: + topic = video_script.title + log(f'generate script for topic: {topic}') + # 2. 获取话题内容 + content = video_script.content + input_message = ( +""" +## 角色 +- 你是一个资深编辑。 + +## 目标 +- 从输入的素材中选取有用的信息。 + +## 任务描述 +- 从contents中选取最能吸引人的段落或句子,使读者产生兴趣和共鸣。请确保这些内容具有情感张力、戏剧性、趣味性或引发思考的价值。选取两部分内容,分别使用键“content_one”和“content_two”。每部分不少于100个汉字。两部分内容字数之和不多于600个汉字。 + +## 要求 +- 严格遵守字数要求。 +- 直接输出内容。 +- 内容为JSON格式。 + +## 素材如下 +""" + content + ) + # log(input_message) + # 3. 调用ollama生成话题脚本 + llm_result = ollama.generate_text(input_message) + log(llm_result) + try: + llm_result = json.loads(llm_result) + except JSONDecodeError as e: + log(f"ai_script_task error: {e}. skip topic: {topic}") + continue + video_script.script = ( +f"""{video_script.title} +{video_script.description} +一位网友说: +{llm_result["content_one"]} +另一位网友说: +{llm_result["content_two"]} +关于这个问题大家有什么看法呢? +欢迎评论区留言 +""" + ) + # 4. 保存话题脚本 + update_video_script(db, video_script) + +if __name__ == "__main__": + execute_task(ai_script_task) \ No newline at end of file diff --git a/task/manager_task.py b/task/manager_task.py new file mode 100644 index 0000000..0905821 --- /dev/null +++ b/task/manager_task.py @@ -0,0 +1,108 @@ +import importlib +import time +from functools import partial + +from apscheduler.schedulers.blocking import BlockingScheduler + +from config import config +from database.database import get_session +from database.tscheduler.crud import get_tasks_by_executor +from log.log_manager import log + +""" +这是一个特殊的任务,负责管理任务,命名为管理者任务。 + +工作流程: +1 检索数据库任务数据表 +2 检查是否已经在任务队列中,如果不在则添加 + +任务执行时间间隔为600秒。 + +""" + + +def log_task_execution(task_name: str, start_time: float, end_time: float = None): + """辅助函数,记录任务的开始和结束日志""" + start_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(start_time)) + end_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time)) + if end_time is None: + log(f"{task_name} start execute at {start_time_str}") + else: + elapsed_time = end_time - start_time + log(f"{task_name} end execute at {end_time_str}, use time {elapsed_time:.2f} seconds") + + +def execute_task(task: callable): + """执行任务并记录日志""" + start_time = time.time() + log_task_execution(task.__name__, start_time) # 先记录开始时间 + task() + end_time = time.time() + log_task_execution(task.__name__, start_time, end_time) # 记录结束时间 + +# 从数据库加载任务 +def load_tasks(scheduler: BlockingScheduler): + with get_session() as db: + tasks = get_tasks_by_executor(db, config.scheduler_name) + for task in tasks: + module_path = task.module_path + function_name = task.function_name + trigger = task.trigger + interval_seconds = task.interval_seconds + task_id = task.id + + # 动态导入模块和函数 + module = importlib.import_module(module_path) + task_function = partial(execute_task, getattr(module, function_name)) + + # 检查任务是否已存在 + if not scheduler.get_job(str(task_id)): + if trigger == "interval": + scheduler.add_job( + task_function, + "interval", + seconds=interval_seconds, + id=str(task_id), + replace_existing=True + ) + log(f"Task {task.task_name} added with interval {interval_seconds} seconds") + elif trigger == "cron": + # 解析 cron 表达式的字段 + fields = task.cron_expression.split() + # 确保字段长度符合七字段格式 + if len(fields) != 7: + raise ValueError("无效的 Quartz cron 表达式") + # 替换 Quartz 风格的 `?` 为 APScheduler 可接受的 `*` + if fields[5] == '?': + fields[5] = '*' # 替换 `day_of_week` 字段中的 `?` + # 使用 cron 表达式的字段添加任务 + scheduler.add_job( + task_function, # 要执行的任务 + 'cron', # 使用 cron 触发器 + second=fields[0], # 秒 + minute=fields[1], # 分钟 + hour=fields[2], # 小时 + day=fields[3], # 日期 + month=fields[4], # 月份 + day_of_week=fields[5], # 星期 + year=fields[6], # 年份 + id=str(task_id), + replace_existing=True + ) + log(f"Task {task.task_name} added with cron {task.cron_expression}") + elif trigger == "date": + scheduler.add_job( + task_function, + "date", + run_date=task["run_date_and_time"], + id=str(task_id), + replace_existing=True + ) + log(f"Task {task.task_name} added with date {task.execution_date}") + else: + log(f"Invalid trigger type: {trigger}") + + +# 管理者任务 +def manager_task(scheduler: BlockingScheduler): + load_tasks(scheduler) diff --git a/task/material_distribution.py b/task/material_distribution.py new file mode 100644 index 0000000..944f343 --- /dev/null +++ b/task/material_distribution.py @@ -0,0 +1,17 @@ +from database.database import get_session +from database.tmaterial.crud import receive_news +from database.tnews.crud import get_news_unprocessed, set_news_usage +from log.log_manager import log +from task.manager_task import execute_task + + +def distribution_task(): + with get_session() as db: + news_list = get_news_unprocessed(db) + receive_news(db, news_list) + set_news_usage(db, news_list) + log(f'distributed {len(news_list)} news') + + +if __name__ == '__main__': + execute_task(distribution_task) \ No newline at end of file diff --git a/task/news/reference_message.py b/task/news/reference_message.py new file mode 100644 index 0000000..f267eac --- /dev/null +++ b/task/news/reference_message.py @@ -0,0 +1,49 @@ +import re +import time + +from database.database import get_session +from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content +from database.tcontentdispatch.model import TContentDispatch +from database.tmaterial.crud import update_material_by_id, \ + get_materials_for_generate_news +from log.log_manager import log +from task.manager_task import execute_task + + +def generate_news_task(): + with get_session() as db: + # 1. 构建今日新闻文章标题,格式:今日新闻yyyy-MM-dd + title = ("今日新闻" + + time.strftime("%Y", time.localtime()) + '年' + + time.strftime("%m", time.localtime()) + '月' + + time.strftime("%d", time.localtime()) + '日') + # 2. 从内容分发数据表获取当前标题和分类的文章是否存在 + content_dispatch = get_content_by_title_and_category(db, title, "新鲜事") + content = "" + if content_dispatch is not None: + content = content_dispatch.content + # 从最后一条获取并计算开始编号 + result = re.findall(r'(? str: + log(f"ai_edit start execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}") + ollama = Ollama() + response = ollama.generate_text(input_message) + log(f"ai_edit end execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}") + return response + +def revisal_task(): + with get_session() as db: + # 1. 构建今日新闻文章标题,格式:今日新闻yyyy-MM-dd + title = ("今日新闻" + + time.strftime("%Y", time.localtime()) + '年' + + time.strftime("%m", time.localtime()) + '月' + + time.strftime("%d", time.localtime()) + '日') + # 2. 从内容分发数据表获取当前标题和分类的文章是否存在 + content_dispatch = get_content_by_title_and_category(db, title, "新鲜事") + ai_content = "" + if content_dispatch and content_dispatch.content: + # 3. 执行AI编辑 + input_message = (('按照规则编辑提供的内容。规则如下:\n' + '1 去除重复内容\n' + '2 不要故意删除内容\n' + '3 重新编号\n' + '4 不要出现空行\n' + '5 不要出现类似"以下是根据您提供的规则编辑后的内容"等提示信息,直接输出编辑后的内容\n' + '内容如下:\n') + + content_dispatch.content) + ai_content = ai_edit(input_message) + print(content_dispatch.content) + print("-----------------------------------------------------------") + print(ai_content) + # 4. 去掉ai_content中的空行 + ai_content = "\n".join([line for line in ai_content.split("\n") if line.strip()]) + # 5. 把content写入数据库 + if ai_content: + content_dispatch.ai_content = ai_content + content_dispatch.is_sent = False + update(db) + # 获取ai_content的行数 + lines = ai_content.strip().split("\n") + log(f"revisal news task finish, news count: {len(lines)}, news words: {len(ai_content)}") + + +if __name__ == "__main__": + execute_task(revisal_task) diff --git a/task/real_estate/reference_message.py b/task/real_estate/reference_message.py new file mode 100644 index 0000000..810971f --- /dev/null +++ b/task/real_estate/reference_message.py @@ -0,0 +1,47 @@ +import re +import time + +from database.database import get_session +from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content +from database.tcontentdispatch.model import TContentDispatch +from database.tmaterial.crud import update_material_by_id, \ + get_materials_for_generate_real_estate_reference_message +from task.manager_task import execute_task + + +def generate_real_estate_reference_message_task(): + with get_session() as db: + # 1. 构建楼市参考消息文章标题,格式:楼市参考消息yyyy-MM-dd + title = ("楼市参考消息" + + time.strftime("%Y", time.localtime()) + '年' + + time.strftime("%m", time.localtime()) + '月' + + time.strftime("%d", time.localtime()) + '日') + # 2. 从内容分发数据表获取当前标题和分类的文章是否存在 + content_dispatch = get_content_by_title_and_category(db, title, "房地产") + content = "" + if content_dispatch is not None: + content = content_dispatch.content + # 从最后一条获取并计算开始编号 + result = re.findall(r'(? str: + log(f"ai_edit_with_ollama start execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}") + ollama = Ollama() + response = ollama.generate_text(input_message) + log(f"ai_edit_with_ollama end execute at {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}") + return response + +def revisal_task(): + with get_session() as db: + # 1. 构建楼市参考消息文章标题,格式:楼市参考消息yyyy-MM-dd + title = ("楼市参考消息" + + time.strftime("%Y", time.localtime()) + '年' + + time.strftime("%m", time.localtime()) + '月' + + time.strftime("%d", time.localtime()) + '日') + # 2. 从内容分发数据表获取当前标题和分类的文章是否存在 + content_dispatch = get_content_by_title_and_category(db, title, "房地产") + ai_content = "" + if content_dispatch and content_dispatch.content: + print(content_dispatch.content) + # 3. 执行AI编辑 + input_message = (('按照规则编辑提供的内容。规则如下:\n' + '1 以每行内容作为一个处理单元,去掉与房地产、楼市无关的内容。如果整行内容都无关则全部去掉,否则全部保留。\n' + '2 去除重复内容\n' + '3 不要故意删除内容\n' + '5 不要出现空行\n' + '6 重新编号\n' + '7 不要出现类似"以下是根据您提供的规则编辑后的内容"等提示信息,直接输出编辑后的内容\n' + '内容如下:\n') + + content_dispatch.content) + ai_content = ai_edit_with_ollama(input_message) + print("-----------------------------------------------------------") + print(ai_content) + # 4. 把content写入数据库 + if ai_content: + content_dispatch.ai_content = ai_content + content_dispatch.is_sent = False + update(db) + + +if __name__ == "__main__": + execute_task(revisal_task) diff --git a/task/summary/ai_summary.py b/task/summary/ai_summary.py new file mode 100644 index 0000000..6f62776 --- /dev/null +++ b/task/summary/ai_summary.py @@ -0,0 +1,38 @@ +from database.database import get_session +from database.tmaterial.crud import get_material_need_summary, update_material +from llm.kimi import Kimi +from log.log_manager import log +from task.manager_task import execute_task + + +def ai_summary(input_message: str, instance) -> str: + response = instance.generate(input_message) + log(response) + return response + +def ai_summary_task(): + with get_session() as db: + news_list = get_material_need_summary(db) + if len(news_list) == 0: + log("ai_summary_task finish, task size 0") + return + kimi = Kimi() + for news in news_list: + # input_message = news.url + "\n阅读并生成百字以内的摘要" + # input_message = "请将以下文章内容用一句话进行概括,提炼出最核心的观点或主题,简洁明了,不超过 100 字:\n" + news.url + log(news.url) + input_message = ( + "请将以下新闻内容提炼成一句话,简洁明了地传达事件的核心信息,包括谁、什么、何时、何地和为何,字数不超过 100 字:" + + news.url + ) + summary = ai_summary(input_message, kimi) + updates = {"ai_summary": summary} + update_material(db, news.id, updates) + kimi.quit() + log(f"ai_summary_task finish, task size {len(news_list)}") + + +if __name__ == '__main__': + # input_message_ = "https://www.ofweek.com/ai/2024-12/ART-201700-8500-30653318.html\n阅读并生成200字以内的摘要" + # logger.info(ai_summary(input_message_)) + execute_task(ai_summary_task) \ No newline at end of file diff --git a/task/summary/ai_summary_ollama.py b/task/summary/ai_summary_ollama.py new file mode 100644 index 0000000..b27f55e --- /dev/null +++ b/task/summary/ai_summary_ollama.py @@ -0,0 +1,53 @@ +from database.database import get_session +from database.tmaterial.crud import get_material_need_summary, update_material +from llm.local.ollama import Ollama +from log.log_manager import log +from task.manager_task import execute_task + + +def ai_summary(input_message: str, instance) -> str: + response = instance.generate(input_message) + log(response) + return response + +def ai_summary_task(): + with get_session() as db: + news_list = get_material_need_summary(db) + if len(news_list) == 0: + log("ai_summary_task finish, task size 0") + return + log(f"ai summary task size {len(news_list)}") + ollama = Ollama() + if not ollama.is_service_running(): + log("ai_summary_task finish, ollama service not running") + return + for news in news_list: + input_message = ( +""" +请为以下新闻生成严格单段落的中文摘要,要求: +1. 保持段落连贯性,不使用任何分段符号(包括空行、缩进或序号) +2. 核心要素按此顺序呈现: + [时间]>[地点]>[主体机构]>[关键事件]>[量化影响] +3. 采用"总-分"结构: + - 首句陈述核心事实(包含最关键的时间地点主体) + - 中间展开关键细节(使用衔接词:同时/此外/值得注意的是) + - 结尾说明当前状态/后续影响 +4. 字数严格控制在100个字符以内 +5. 禁止使用项目符号、引文格式等非连贯文本元素 + +新闻原文: +""" ++ news.content + ) + summary = ollama.generate_text(input_message) + log(f'{news.url} {summary}') + # 判断summary是否是一段话 + if '\n' in summary: + summary = 'summary formate error' + updates = {"ai_summary": summary} + update_material(db, news.id, updates) + log(f"ai_summary_task finish, task size {len(news_list)}") + + +if __name__ == '__main__': + execute_task(ai_summary_task) \ No newline at end of file diff --git a/task/tech/reference_message.py b/task/tech/reference_message.py new file mode 100644 index 0000000..62a703a --- /dev/null +++ b/task/tech/reference_message.py @@ -0,0 +1,47 @@ +import re +import time + +from database.database import get_session +from database.tcontentdispatch.curd import get_content_by_title_and_category, create_or_update_content +from database.tcontentdispatch.model import TContentDispatch +from database.tmaterial.crud import update_material_by_id, \ + get_materials_for_generate_tech_reference_message +from task.manager_task import execute_task + + +def generate_reference_message_task(): + with get_session() as db: + # 1. 构建AI参考消息文章标题,格式:楼市参考消息yyyy-MM-dd + title = ("科技参考消息" + + time.strftime("%Y", time.localtime()) + '年' + + time.strftime("%m", time.localtime()) + '月' + + time.strftime("%d", time.localtime()) + '日') + # 2. 从内容分发数据表获取当前标题和分类的文章是否存在 + content_dispatch = get_content_by_title_and_category(db, title, "科技") + content = "" + if content_dispatch is not None: + content = content_dispatch.content + # 从最后一条获取并计算开始编号 + result = re.findall(r'(? str: + import hashlib + m = hashlib.md5() + m.update(url.encode('utf-8')) + return m.hexdigest() \ No newline at end of file