import importlib from database.database import get_session from database.tinformationsource.curd import get_active_information_sources from database.tnews.crud import get_news_need_content from log.log_manager import logger from task.manager_task import execute_task def content_spider_task(): with get_session() as db: # 1. 获取信息源数据 information_sources = get_active_information_sources(db) # 2. 获取需要获取内容的新闻数据 news_list = get_news_need_content(db) # 3. 遍历新闻数据 for news in news_list: for information_source in information_sources: if information_source.title != news.source: continue if information_source.module is None or information_source.method is None: logger.error(f"{information_source.title} module or method is None") continue news.is_static = information_source.is_static # 动态导入模块和函数 # 把模块路径最后一部分换成content module_path = information_source.module.rsplit('.', 1)[0] + '.content' module = importlib.import_module(module_path) task_function = getattr(module, 'content_task') try: task_function(news) except Exception as e: logger.error(f"{information_source.title} task error: {e}") if __name__ == '__main__': execute_task(content_spider_task)