From 491061acc7548e0ef2fa5093616d717482b33192 Mon Sep 17 00:00:00 2001 From: konjacpotato Date: Fri, 27 Feb 2026 10:29:29 +0800 Subject: [PATCH] =?UTF-8?q?add=20=E6=95=B0=E6=8D=AE=E5=BA=93=E8=B6=85?= =?UTF-8?q?=E6=97=B6=E9=87=8D=E8=AF=95=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- seek/douban_com/douban_group_seek.py | 38 +++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/seek/douban_com/douban_group_seek.py b/seek/douban_com/douban_group_seek.py index 41c67c3..01bcc56 100644 --- a/seek/douban_com/douban_group_seek.py +++ b/seek/douban_com/douban_group_seek.py @@ -1,6 +1,7 @@ import json import time from DrissionPage import Chromium, ChromiumOptions +from psycopg import OperationalError from config.database import SessionLocal from models.source_content import SourceContent from utils import logger @@ -12,6 +13,40 @@ class DoubanGroupSeek: self.browser = Chromium(addr_or_opts=co) self.group_id = group_id + def fetch_with_retry(self, session, model, link, max_retries=3, base_delay=1): + """ + 带重试的数据库查询函数 + :param session: SQLAlchemy 会话 + :param model: 模型类 + :param link: 要查询的链接 + :param max_retries: 最大重试次数 + :param base_delay: 初始延迟时间(秒) + :return: 查询结果或 None + """ + for attempt in range(1, max_retries + 1): + try: + # 执行查询 + result = session.query(model.id).filter(model.link == link).first() + return result # 成功则返回结果 + except OperationalError as e: + # 打印错误信息(可选) + logger.error(f"数据库查询失败 (尝试 {attempt}/{max_retries}): {e}") + # 回滚当前会话,避免事务挂起影响后续操作 + session.rollback() + if attempt == max_retries: + # 最后一次重试失败,抛出异常或记录错误后返回 None + logger.info(f"已达到最大重试次数,放弃查询链接: {link}") + raise # 或者返回 None,根据业务决定 + # 计算等待时间:指数退避 + sleep_time = base_delay * (2 ** (attempt - 1)) + time.sleep(sleep_time) + except Exception as e: + # 如果还想捕获其他数据库错误,可以统一处理 + logger.error(f"数据库错误: {e}") + session.rollback() + raise # 非临时性错误,直接抛出 + return None # 正常情况下不会执行到这里 + def seek(self): db = SessionLocal() @@ -42,7 +77,8 @@ class DoubanGroupSeek: results = [] for topic_title, topic_url, update_time in topics: # 检索数据库,根据topic_url查询是否已存在 - existing_content = db.query(SourceContent.id).filter(SourceContent.link == topic_url).first() + # existing_content = db.query(SourceContent.id).filter(SourceContent.link == topic_url).first() + existing_content = self.fetch_with_retry(db, SourceContent, topic_url) if existing_content: # logger.info(f"Topic already exists in database, skipping: {topic_title}:{topic_url}") continue