Files
peter/seek/cnn_com/edition.py
konjacpotato 8c1a740f0b import peter
2025-11-12 20:42:16 +08:00

63 lines
2.4 KiB
Python

import datetime
from DrissionPage.errors import ElementNotFoundError
from database.tinformationsource.model import TInformationSource
from database.tnews.model import TNews
from log.log_manager import logger
from seek.seek_base import SeekBase
class Edition(SeekBase):
def __init__(self, information_source: TInformationSource):
super().__init__(information_source)
def get_news(self):
news_result = []
# _news_list = self.tab.s_ele('.zone zone--t-light zone-2-observer').s_eles('.stack')
# _news_list = self.tab.s_ele('.zone zone--t-light zone-2-observer').s_eles('.stack__items ')
_news_list = self.tab.s_ele('.zone zone--t-light zone-2-observer').s_eles('tag:a')
for _news in _news_list:
print(_news.html)
try:
rs_news = TNews()
rs_news.title = _news.text
rs_news.url = _news.link
# rs_news.summary = tmp_.s_eles('tag:a')[1].text
# rs_news.occurrence_date = self.process_time(tmp_.s_ele('.info__time').text)
rs_news.source = self.information_source.title
news_result.append(rs_news)
except ElementNotFoundError as e:
logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
except Exception as e:
logger.error(f'Unexpected error occurred: {e}')
return news_result
def get_news(information_source: TInformationSource) -> list:
instance = Edition(information_source)
news_list = instance.get_news()
instance.finish()
return news_list
def news_task(information_source: TInformationSource):
logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
instance = Edition(information_source)
instance.do_seek_task()
instance.finish()
logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
if __name__ == '__main__':
logger.info('This module is not for direct call!')
information_source_ = TInformationSource()
information_source_.is_static = False
information_source_.url = 'https://edition.cnn.com/'
information_source_.title = 'edition_CNN'
# news_task(information_source_)
news_list_ = get_news(information_source_)
for news in news_list_:
print(news)
logger.info('Done.')