import peter

This commit is contained in:
konjacpotato
2025-11-12 20:42:16 +08:00
commit 8c1a740f0b
147 changed files with 2763 additions and 0 deletions

57
seek/seek_base.py Normal file
View File

@ -0,0 +1,57 @@
from abc import ABC, abstractmethod
from DrissionPage import Chromium, SessionPage, ChromiumOptions
from database.database import get_session
from database.tinformationsource.model import TInformationSource
from database.tnews.crud import create_news_list_if_url_not_exists
from log.log_manager import log
class SeekBase(ABC):
def __init__(self, information_source: TInformationSource):
self.information_source = information_source
self.session = None # 初始化为 None
self.browser = None # 初始化为 None
self.tab = None
if information_source.is_static:
self.session = SessionPage()
self.session.get(information_source.url)
else:
co = ChromiumOptions()
self.browser = Chromium()
self.tab = self.browser.new_tab()
self.tab.get(information_source.url)
@abstractmethod
def get_news(self):
"""Abstract method to fetch news from a specific source."""
pass
def do_seek_task(self):
"""Saves the list of news to the database if the URL does not already exist."""
news_list = self.get_news()
for news in news_list:
if news.primary_category is None:
news.primary_category = self.information_source.primary_category
if news.secondary_category is None:
news.secondary_category = self.information_source.secondary_category
if news.tertiary_category is None:
news.tertiary_category = self.information_source.tertiary_category
if news.label is None:
news.label = self.information_source.label
if news.lang is None:
news.lang = self.information_source.lang
with get_session() as db:
inserted_news = create_news_list_if_url_not_exists(db, news_list)
log(f'Inserted {len(inserted_news)} {self.information_source.title} news items into the database.')
return inserted_news
def finish(self):
"""Closes the browser and session."""
if self.tab:
self.tab.close()
# if self.browser:
# self.browser.quit()
if self.session:
self.session.close()