57 lines
2.3 KiB
Python
57 lines
2.3 KiB
Python
from abc import ABC, abstractmethod
|
|
|
|
from DrissionPage import Chromium, SessionPage, ChromiumOptions
|
|
|
|
from database.database import get_session
|
|
from database.tinformationsource.model import TInformationSource
|
|
from database.tnews.crud import create_news_list_if_url_not_exists
|
|
from log.log_manager import log
|
|
|
|
|
|
class SeekBase(ABC):
|
|
def __init__(self, information_source: TInformationSource):
|
|
self.information_source = information_source
|
|
self.session = None # 初始化为 None
|
|
self.browser = None # 初始化为 None
|
|
self.tab = None
|
|
if information_source.is_static:
|
|
self.session = SessionPage()
|
|
self.session.get(information_source.url)
|
|
else:
|
|
co = ChromiumOptions()
|
|
self.browser = Chromium()
|
|
self.tab = self.browser.new_tab()
|
|
self.tab.get(information_source.url)
|
|
|
|
@abstractmethod
|
|
def get_news(self):
|
|
"""Abstract method to fetch news from a specific source."""
|
|
pass
|
|
|
|
def do_seek_task(self):
|
|
"""Saves the list of news to the database if the URL does not already exist."""
|
|
news_list = self.get_news()
|
|
for news in news_list:
|
|
if news.primary_category is None:
|
|
news.primary_category = self.information_source.primary_category
|
|
if news.secondary_category is None:
|
|
news.secondary_category = self.information_source.secondary_category
|
|
if news.tertiary_category is None:
|
|
news.tertiary_category = self.information_source.tertiary_category
|
|
if news.label is None:
|
|
news.label = self.information_source.label
|
|
if news.lang is None:
|
|
news.lang = self.information_source.lang
|
|
with get_session() as db:
|
|
inserted_news = create_news_list_if_url_not_exists(db, news_list)
|
|
log(f'Inserted {len(inserted_news)} {self.information_source.title} news items into the database.')
|
|
return inserted_news
|
|
|
|
def finish(self):
|
|
"""Closes the browser and session."""
|
|
if self.tab:
|
|
self.tab.close()
|
|
# if self.browser:
|
|
# self.browser.quit()
|
|
if self.session:
|
|
self.session.close() |