from abc import ABC, abstractmethod from DrissionPage import Chromium, SessionPage, ChromiumOptions from database.database import get_session from database.tinformationsource.model import TInformationSource from database.tnews.crud import create_news_list_if_url_not_exists from log.log_manager import log class SeekBase(ABC): def __init__(self, information_source: TInformationSource): self.information_source = information_source self.session = None # 初始化为 None self.browser = None # 初始化为 None self.tab = None if information_source.is_static: self.session = SessionPage() self.session.get(information_source.url) else: co = ChromiumOptions() self.browser = Chromium() self.tab = self.browser.new_tab() self.tab.get(information_source.url) @abstractmethod def get_news(self): """Abstract method to fetch news from a specific source.""" pass def do_seek_task(self): """Saves the list of news to the database if the URL does not already exist.""" news_list = self.get_news() for news in news_list: if news.primary_category is None: news.primary_category = self.information_source.primary_category if news.secondary_category is None: news.secondary_category = self.information_source.secondary_category if news.tertiary_category is None: news.tertiary_category = self.information_source.tertiary_category if news.label is None: news.label = self.information_source.label if news.lang is None: news.lang = self.information_source.lang with get_session() as db: inserted_news = create_news_list_if_url_not_exists(db, news_list) log(f'Inserted {len(inserted_news)} {self.information_source.title} news items into the database.') return inserted_news def finish(self): """Closes the browser and session.""" if self.tab: self.tab.close() # if self.browser: # self.browser.quit() if self.session: self.session.close()