import datetime from DrissionPage.errors import ElementNotFoundError from database.tinformationsource.model import TInformationSource from database.tnews.model import TNews from log.log_manager import logger from seek.content_base import ContentBase class ArticleContent(ContentBase): def __init__(self, news: TNews): super().__init__(news) def get_content(self): content_ = '' try: content_ = self.session.s_ele('#detailContent').text except ElementNotFoundError: content_ = 'not found element' return content_ def get_occurrence_date(self): try: header_time = self.session.s_ele('.header-time left') year = header_time.s_ele('.year').text # 2023 day = header_time.s_ele('.day').text # 12/27 time = header_time.s_ele('.time').text # 08:05:11 occurrence_date_ = f'{year}/{day} {time}' print(occurrence_date_) except ElementNotFoundError: occurrence_date_ = None return occurrence_date_ def get_content(information_source: TInformationSource) -> list: article_content = ArticleContent(information_source) result = article_content.get_content() article_content.get_occurrence_date() article_content.finish() return result def content_task(news: TNews): logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', ) article_content = ArticleContent(news) article_content.do_seek_task() article_content.finish() logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}') if __name__ == '__main__': logger.info('This module is not for direct call!') news_ = TNews() news_.is_static = True news_.url = 'https://www.news.cn/politics/leaders/20241227/90e76f85ad4a43ba94802b07c5736e00/c.html' content = get_content(news_) logger.info(content) logger.info('Done.')