import datetime from DrissionPage.errors import ElementNotFoundError from database.tinformationsource.model import TInformationSource from database.tnews.model import TNews from log.log_manager import logger from seek.seek_base import SeekBase from utils.time_utils import process_time class House(SeekBase): def __init__(self, information_source: TInformationSource): super().__init__(information_source) def get_news(self): news_result = [] _news_list = self.session.s_ele('.sf_listPage').s_eles('tag:li') for _news in _news_list: try: rs_news = TNews() rs_news.title = _news.s_ele('tag:a').text rs_news.url = _news.s_ele('tag:a').link rs_news.summary = _news.s_ele('tag:p').text rs_news.occurrence_date = process_time(_news.s_ele('.tag').text) rs_news.source = self.information_source.title news_result.append(rs_news) except ElementNotFoundError as e: logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.") except Exception as e: logger.error(f'Unexpected error occurred: {e}') return news_result def get_news(information_source: TInformationSource) -> list: instance = House(information_source) news_list = instance.get_news() instance.finish() return news_list def news_task(information_source: TInformationSource): logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', ) instance = House(information_source) instance.do_seek_task() instance.finish() logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}') if __name__ == '__main__': logger.info('This module is not for direct call!') information_source_ = TInformationSource() information_source_.is_static = True information_source_.url = 'https://sz.leju.com/news/' information_source_.title = '房产_新浪乐居' # news_task(information_source_) news_list_ = get_news(information_source_) for news in news_list_: print(news) logger.info('Done.')