import peter

This commit is contained in:
konjacpotato
2025-11-12 20:42:16 +08:00
commit 8c1a740f0b
147 changed files with 2763 additions and 0 deletions

5
database/Readme.md Normal file
View File

@ -0,0 +1,5 @@
```
t_top_topic table DDL
上面是top_topic数据表的DDL根据DDL信息在database模块下按照项目结构创建model.py和crud.py
```

0
database/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

37
database/database.py Normal file
View File

@ -0,0 +1,37 @@
from contextlib import contextmanager
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from log.log_manager import logger
Base = declarative_base()
DATABASE_URL = 'postgresql+psycopg://postgres:K8u3fg0o@47.119.128.161:60001/squirrel'
engine = create_engine(
DATABASE_URL,
pool_size=10,
max_overflow=20,
pool_timeout=30,
pool_recycle=1800, # 防止数据库端连接过期
connect_args={
'connect_timeout': 15,
'keepalives_idle': 60,
'keepalives_interval': 10,
'keepalives_count': 5
}
)
Base.metadata.create_all(engine)
@contextmanager
def get_session():
session = sessionmaker(bind=engine)()
try:
yield session
session.commit() # 自动提交成功的事务
except Exception as e:
session.rollback() # 异常时回滚
logger.error(f"Database operation failed: {str(e)}")
raise # 重新抛出异常
finally:
session.close() # 确保会话关闭

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,77 @@
from database.thotcontent.model import THotContent
from log.log_manager import logger
def create_hot_content(db, hot_content: THotContent):
db.add(hot_content)
db.commit()
db.refresh(hot_content)
return hot_content
# 插入数据库之前判断数据库中是否已经存在根据news.url 判断
def create_content_if_url_not_exists(db, hot_content: THotContent):
# 检查是否已经存在具有相同 URL 的记录
existing_content = db.query(THotContent).filter(THotContent.url == hot_content.url).first()
if existing_content:
# 如果记录已存在,直接返回已有的记录
return existing_content
# 如果记录不存在,插入新的记录
db.add(hot_content)
db.commit()
db.refresh(hot_content)
return hot_content
def create_contents_top3_if_url_not_exists(db, contents: list[THotContent]):
logger.info(f"采集到内容数量:{len(contents)},存入数据库前三")
# 按照 THotContent.content_upvote_count 对contents进行排序
contents.sort(key=lambda x: x.content_upvote_count, reverse=True)
# 保留 contents 的前3条
contents = contents[:3]
inserted_contents = [] # 用于保存实际插入的新闻记录
for content in contents:
# 检查是否已经存在具有相同 URL 的记录
existing_content = db.query(THotContent).filter(THotContent.url == content.url).first()
if not existing_content:
# 如果记录不存在,插入新的记录
db.add(content)
inserted_contents.append(content)
# 批量提交所有插入的记录
db.commit()
# 刷新所有新插入的记录
for content in inserted_contents:
db.refresh(content)
return inserted_contents
def get_hot_content_by_id(db, hot_content_id: int):
return db.query(THotContent).filter(THotContent.id == hot_content_id).first()
def get_hot_content_by_topic_id(db, topic_id: int):
return db.query(THotContent).filter(THotContent.topic_id == topic_id).all()
def get_hot_contents(db, skip: int = 0, limit: int = 100):
return db.query(THotContent).offset(skip).limit(limit).all()
def update_hot_content(db, hot_content_id: int, updates: dict):
hot_content = db.query(THotContent).filter(THotContent.id == hot_content_id).first()
if hot_content:
for key, value in updates.items():
setattr(hot_content, key, value)
db.commit()
db.refresh(hot_content)
return hot_content
def delete_hot_content(db, hot_content_id: int):
hot_content = db.query(THotContent).filter(THotContent.id == hot_content_id).first()
if hot_content:
db.delete(hot_content)
db.commit()

View File

@ -0,0 +1,23 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from sqlalchemy import Column, Integer, String, BIGINT, TIMESTAMP, func
from sqlalchemy.dialects.postgresql import BIGINT
from database.database import Base
@dataclass
class THotContent(Base):
__tablename__ = 't_hot_content'
id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='序号')
topic_id: int = Column(BIGINT, nullable=False, comment='关联话题ID')
url: Optional[str] = Column(String, nullable=True, comment='内容链接')
content: Optional[str] = Column(String, nullable=True, comment='内容详情')
content_upvote_count: Optional[int] = Column(BIGINT, nullable=True, comment='内容点赞数量')
content_comment_count: Optional[int] = Column(Integer, nullable=True, comment='内容评论数量')
create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
def __repr__(self):
return f"<THotContent(id={self.id}, topic_id={self.topic_id}, url={self.url}, content_upvote_count={self.content_upvote_count})>"

View File

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,90 @@
from database.thottopic.model import THotTopic
def create_hot_topic(db, hot_topic: THotTopic):
db.add(hot_topic)
db.commit()
db.refresh(hot_topic)
return hot_topic
# 插入数据库之前判断数据库中是否已经存在根据news.url 判断
def create_topic_if_url_not_exists(db, hot_topic: THotTopic):
# 检查是否已经存在具有相同 URL 的记录
existing_topic = db.query(THotTopic).filter(THotTopic.url == hot_topic.url).first()
if existing_topic:
# 如果记录已存在,直接返回已有的记录
return existing_topic
# 如果记录不存在,插入新的记录
db.add(hot_topic)
db.commit()
db.refresh(hot_topic)
return hot_topic
def create_topics_if_url_not_exists(db, topics: list[THotTopic]):
inserted_topics = [] # 用于保存实际插入的新闻记录
for topic in topics:
# 检查是否已经存在具有相同 URL 的记录
existing_topic = db.query(THotTopic).filter(THotTopic.url == topic.url).first()
if not existing_topic:
# 如果记录不存在,插入新的记录
db.add(topic)
inserted_topics.append(topic)
# 批量提交所有插入的记录
db.commit()
# 刷新所有新插入的记录
for topic in inserted_topics:
db.refresh(topic)
return inserted_topics
def hot_topic_not_exists(db, url_list: list) -> list:
"""
url如果在数据库中已经存在则去除掉
:param db:
:param url_list:
:return:
"""
hot_topics = db.query(THotTopic).filter(THotTopic.url.in_(url_list)).all()
for hot_topic in hot_topics:
url_list.remove(hot_topic.url)
return url_list
def get_hot_topic_by_id(db, hot_topic_id: int):
return db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first()
def get_hot_topics(db, skip: int = 0, limit: int = 100):
return db.query(THotTopic).offset(skip).limit(limit).all()
# 根据THotTopic.update_time排序获取最新的THotTopic
def get_latest_hot_topic(db):
return db.query(THotTopic).order_by(THotTopic.update_time.desc()).first()
def update_hot_topic(db, hot_topic: THotTopic):
db.merge(hot_topic)
db.commit()
db.refresh(hot_topic)
return hot_topic
# def update_hot_topic(db, hot_topic_id: int, updates: dict):
# db.query(THotTopic).filter(THotTopic.id == hot_topic_id).update(updates)
# db.commit()
# return db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first()
def delete_hot_topic(db, hot_topic_id: int):
hot_topic = db.query(THotTopic).filter(THotTopic.id == hot_topic_id).first()
if hot_topic:
db.delete(hot_topic)
db.commit()
return hot_topic

View File

@ -0,0 +1,34 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from sqlalchemy import Column, String, Integer, TIMESTAMP, func
from sqlalchemy.dialects.postgresql import BIGINT
from database.database import Base
@dataclass
class THotTopic(Base):
__tablename__ = 't_hot_topic'
id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='序号')
topic: str = Column(String, nullable=False, comment='话题')
topic_description: Optional[str] = Column(String, nullable=True, comment='话题描述')
url: Optional[str] = Column(String, nullable=True, comment='话题链接')
source: Optional[str] = Column(String, nullable=True, comment='话题来源')
keywords: Optional[str] = Column(String, nullable=True, comment='话题关键词')
content_count: int = Column(Integer, default=0, nullable=False, comment='话题内容数量')
comment_count: int = Column(Integer, default=0, nullable=False, comment='话题评论数量')
follower_count: int = Column(Integer, default=0, nullable=False, comment='话题关注者数量')
date_created: Optional[datetime] = Column(TIMESTAMP(timezone=True), nullable=True, comment='话题创建时间')
date_modified: Optional[datetime] = Column(TIMESTAMP(timezone=True), nullable=True, comment='话题修改时间')
top_content_url: Optional[str] = Column(String, nullable=True, comment='热内内容链接')
top_content_upvote_count: Optional[int] = Column(BIGINT, nullable=True, comment='热门内容点赞数量')
top_content_comment_count: Optional[int] = Column(Integer, nullable=True, comment='热门内容评论数量')
create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
update_time: Optional[datetime] = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='更新时间')
ai_script: Optional[str] = Column(String, nullable=True, comment='内容脚本')
def __repr__(self):
return f"<THotTopic(topic={self.topic}, url={self.url}, id={self.id}, source={self.source}, content_count={self.content_count})>"

View File

View File

@ -0,0 +1,31 @@
from database.tinformationsource.model import TInformationSource
def create_information_source(db, information_source: TInformationSource):
db.add(information_source)
db.commit()
db.refresh(information_source)
return information_source
def get_information_source_by_id(db, information_source_id: int):
return db.query(TInformationSource).filter(TInformationSource.id == information_source_id).first()
def get_active_information_sources(db) -> list:
return db.query(TInformationSource).filter(TInformationSource.active == True).all()
def update_information_source(db, information_source_id: int, updates: dict):
update_information = db.query(TInformationSource).filter(TInformationSource.id == information_source_id).first()
if update_information:
for key, value in updates.items():
setattr(update_information, key, value)
db.commit()
db.refresh(update_information)
return update_information
def delete_update_information(db, information_source_id: int):
update_information = db.query(TInformationSource).filter(TInformationSource.id == information_source_id).first()
if update_information:
db.delete(update_information)
db.commit()
return update_information

View File

@ -0,0 +1,34 @@
from dataclasses import dataclass
from sqlalchemy import Column, String, Boolean, TIMESTAMP, func, INT
from sqlalchemy.dialects.postgresql import BIGINT
from database.database import Base
@dataclass
class TInformationSource(Base):
__tablename__ = 't_information_source'
id: int = Column(BIGINT, primary_key=True, autoincrement=True, comment='编号')
title: str = Column(String, nullable=False, comment='标题')
description: str = Column(String, nullable=True, comment='描述')
keywords: str = Column(String, nullable=True, comment='关键字')
url: str = Column(String, nullable=True, comment='网站链接')
rss: str = Column(String, nullable=True, comment='RSS链接')
api: str = Column(String, nullable=True, comment='API')
primary_category: str = Column(String, nullable=True, comment='一级类别')
secondary_category: str = Column(String, nullable=True, comment='二级类别')
tertiary_category: str = Column(String, nullable=True, comment='三级类别')
label: str = Column(String, nullable=True, comment='标签')
lang: str = Column(String, nullable=False, default='zh', comment='语言')
priority: int = Column(INT, nullable=False, default=100, comment='优先级')
active: bool = Column(Boolean, default=False, nullable=False, comment='是否启用false未启用true启用')
module: str = Column(String, nullable=True, comment='任务逻辑所在模块名称')
method: str = Column(String, nullable=True, comment='任务逻辑的函数名称')
create_time: str = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
update_time: str = Column(TIMESTAMP(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False, comment='更新时间')
is_static: bool = Column(Boolean, default=True, nullable=False, comment='是否是静态网站false动态true静态')
def __repr__(self):
return f"<TInformationSource(id={self.id}, title={self.title}, category={self.category}, active={self.active})>"

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

87
database/tnews/crud.py Normal file
View File

@ -0,0 +1,87 @@
from database.tnews.model import TNews
def create_news(db, news: TNews):
db.add(news)
db.commit()
db.refresh(news)
return news
# 插入数据库之前判断数据库中是否已经存在根据news.url 判断
def create_news_if_url_not_exists(db, news: TNews):
# 检查是否已经存在具有相同 URL 的记录
existing_news = db.query(TNews).filter(TNews.url == news.url).first()
if existing_news:
# 如果记录已存在,直接返回已有的记录
return existing_news
# 如果记录不存在,插入新的记录
db.add(news)
db.commit()
db.refresh(news)
return news
def create_news_list_if_url_not_exists(db, news_list: list[TNews]):
inserted_news = [] # 用于保存实际插入的新闻记录
for news in news_list:
# 检查是否已经存在具有相同 URL 的记录
existing_news = db.query(TNews).filter(TNews.url == news.url).first()
if not existing_news:
# 如果记录不存在,插入新的记录
db.add(news)
inserted_news.append(news)
# 批量提交所有插入的记录
db.commit()
# 刷新所有新插入的记录
for news in inserted_news:
db.refresh(news)
return inserted_news
def get_news_by_id(db, news_id: int):
return db.query(TNews).filter(TNews.id == news_id).first()
def get_news_need_content(db):
return db.query(TNews).filter(TNews.content == None).all()
def get_news_need_summary(db):
return db.query(TNews).filter(TNews.ai_summary == None).all()
def get_news_for_generate_reference_message(db, news_type: str) -> list[TNews]:
return db.query(TNews).filter(
TNews.type == news_type,
TNews.ai_summary != None,
TNews.is_usage == False
).order_by(TNews.occurrence_date.desc()).all()
def update_news_by_id(db, news: TNews):
db.merge(news)
db.commit()
def update_news(db, news_id: int, updates: dict):
news = db.query(TNews).filter(TNews.id == news_id).first()
if news:
for key, value in updates.items():
setattr(news, key, value)
db.commit()
db.refresh(news)
return news
def delete_news(db, news_id: int):
news = db.query(TNews).filter(TNews.id == news_id).first()
if news:
db.delete(news)
db.commit()
return news

25
database/tnews/model.py Normal file
View File

@ -0,0 +1,25 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from sqlalchemy import Column, String, Boolean, DateTime, BigInteger, text, INT
from database.database import Base
@dataclass
class TNews(Base):
__tablename__ = 't_news'
id: int = Column(BigInteger, primary_key=True, autoincrement=True, comment='编号')
title: Optional[str] = Column(String, nullable=True, comment='标题')
summary: Optional[str] = Column(String, nullable=True, comment='摘要')
url: Optional[str] = Column(String, nullable=True, comment='链接')
content: Optional[str] = Column(String, nullable=True, comment='内容/正文')
occurrence_date: Optional[datetime] = Column(DateTime(timezone=True), nullable=True, comment='发布日期')
source: Optional[str] = Column(String, nullable=True, comment='来源')
primary_category: str = Column(String, nullable=True, comment='一级类别')
secondary_category: str = Column(String, nullable=True, comment='二级类别')
tertiary_category: str = Column(String, nullable=True, comment='三级类别')
label: str = Column(String, nullable=True, comment='标签')
lang: str = Column(String, nullable=False, default='zh', comment='语言')
is_usage: bool = Column(Boolean, nullable=False, default=False, server_default=text('false'), comment='是否已用')
create_time: datetime = Column(DateTime(timezone=True), nullable=False, server_default=text('now()'), comment='创建日期')

View File

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,35 @@
from database.tscheduler.model import TScheduler
def create_task(db, task: TScheduler):
db.add(task)
db.commit()
db.refresh(task)
return task
def get_task_by_id(db, task_id: int):
return db.query(TScheduler).filter(TScheduler.id == task_id).first()
def get_active_tasks(db):
return db.query(TScheduler).filter(TScheduler.active == True).all()
def get_tasks_by_executor(db, executor: str):
return db.query(TScheduler).filter(
TScheduler.executor == executor,
TScheduler.active == True
).all()
def update_task(db, task_id: int, updates: dict):
task = db.query(TScheduler).filter(TScheduler.id == task_id).first()
if task:
for key, value in updates.items():
setattr(task, key, value)
db.commit()
db.refresh(task)
return task
def delete_task(db, task_id: int):
task = db.query(TScheduler).filter(TScheduler.id == task_id).first()
if task:
db.delete(task)
db.commit()
return task

View File

@ -0,0 +1,26 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from sqlalchemy import Column, Integer, String, Boolean, Text, DateTime
from database.database import Base
@dataclass
class TScheduler(Base):
__tablename__ = 't_scheduler'
id: int = Column(Integer, primary_key=True, autoincrement=True, comment='自动递增的唯一任务ID')
task_name: str = Column(String(64), nullable=False, comment='任务名称')
trigger: str = Column(String(10), nullable=False, comment='调度方式interval、cron、date')
interval_seconds: Optional[int] = Column(Integer, nullable=True, comment='固定时间间隔(秒),用于 interval 类型')
cron_expression: Optional[str] = Column(String(255), nullable=True, comment='CRON 表达式,用于 cron 类型')
execution_date: Optional[datetime] = Column(DateTime, nullable=True, comment='执行时间,用于 date 类型')
task_payload: Optional[str] = Column(Text, nullable=True, comment='任务相关的参数或数据')
active: Optional[bool] = Column(Boolean, default=False, nullable=True, comment='任务状态,是否启用')
executor: Optional[str] = Column(String(32), nullable=True, comment='任务执行者')
handler: Optional[str] = Column(String(32), nullable=True, comment='任务执行程序')
last_run: Optional[datetime] = Column(DateTime, nullable=True, comment='上一次执行时间')
next_run: Optional[datetime] = Column(DateTime, nullable=True, comment='下一次执行时间')
create_time: datetime = Column(DateTime, default=datetime.utcnow, nullable=True, comment='创建时间')
update_time: datetime = Column(DateTime, default=datetime.utcnow, nullable=True, comment='更新时间')
module_path: Optional[str] = Column(String(255), nullable=True, comment='任务逻辑所在模块名称')
function_name: Optional[str] = Column(String(256), nullable=True, comment='任务逻辑的函数名称')

View File

View File

@ -0,0 +1,47 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from sqlalchemy import Column, String, TIMESTAMP, func
from database.database import Base, get_session
from utils import utils
@dataclass
class VideoScript(Base):
__tablename__ = 't_video_script'
id: str = Column(String, primary_key=True, comment='唯一标识')
title: str = Column(String, nullable=False, comment='标题')
description: Optional[str] = Column(String, nullable=True, comment='描述')
keywords: Optional[str] = Column(String, nullable=True, comment='话题关键词')
url: str = Column(String, nullable=False, comment='话题链接')
script: str = Column(String, nullable=True, comment='视频脚本')
content: str = Column(String, nullable=True, comment='话题内容')
create_time: datetime = Column(TIMESTAMP(timezone=True), server_default=func.now(), nullable=False, comment='创建时间')
def __repr__(self):
return f"<THotTopic(topic={self.topic}, url={self.url}, id={self.id}, description={self.description}, keywords={self.keywords})>"
def create_video_script(video_script: VideoScript):
if video_script.id is None:
video_script.id = utils.get_md5(video_script.url)
with get_session() as db:
db.add(video_script)
db.commit()
db.refresh(video_script)
return video_script
def video_script_not_exists(url_list: list):
"""
url_list如果在数据库中已经存在则去除掉
:param url_list:
:return:
"""
with get_session() as db:
video_scripts = db.query(VideoScript).filter(VideoScript.url.in_(url_list)).all()
for video_script in video_scripts:
url_list.remove(video_script.url)
return url_list