import datetime from DrissionPage.errors import ElementNotFoundError from database.tinformationsource.model import TInformationSource from database.tnews.model import TNews from log.log_manager import logger from seek.seek_base import SeekBase from utils.time_utils import process_time class House(SeekBase): def __init__(self, information_source: TInformationSource): super().__init__(information_source) def get_news(self): news_result = [] _news_list = self.session.s_ele('.news-list').s_eles('tag:li') for _news in _news_list: try: rs_news = TNews() tmp = _news.s_ele('.txt') rs_news.title = tmp.s_ele('tag:a').text rs_news.url = tmp.s_ele('tag:a').link rs_news.summary = tmp.s_ele('tag:p').text rs_news.occurrence_date = process_time(tmp.s_eles('tag:span')[1].text) rs_news.source = self.information_source.title news_result.append(rs_news) except ElementNotFoundError as e: if _news.s_ele('.item'): # 此为视频内容,跳过 continue logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.") except Exception as e: logger.error(f'Unexpected error occurred: {e}') return news_result def get_news(information_source: TInformationSource) -> list: instance = House(information_source) news_list = instance.get_news() instance.finish() return news_list def news_task(information_source: TInformationSource): logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', ) instance = House(information_source) instance.do_seek_task() instance.finish() logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}') if __name__ == '__main__': logger.info('This module is not for direct call!') information_source_ = TInformationSource() information_source_.is_static = True information_source_.url = 'https://sz.news.fang.com/' information_source_.title = '房产_房天下' news_task(information_source_) # news_list_ = get_news(information_source_) # for news in news_list_: # print(news) logger.info('Done.')