39 lines
1.6 KiB
Python
39 lines
1.6 KiB
Python
import importlib
|
|
|
|
from database.database import get_session
|
|
from database.tinformationsource.curd import get_active_information_sources
|
|
from database.tnews.crud import get_news_need_content
|
|
from log.log_manager import logger
|
|
from task.manager_task import execute_task
|
|
|
|
|
|
def content_spider_task():
|
|
with get_session() as db:
|
|
# 1. 获取信息源数据
|
|
information_sources = get_active_information_sources(db)
|
|
# 2. 获取需要获取内容的新闻数据
|
|
news_list = get_news_need_content(db)
|
|
# 3. 遍历新闻数据
|
|
for news in news_list:
|
|
for information_source in information_sources:
|
|
if information_source.title != news.source:
|
|
continue
|
|
if information_source.module is None or information_source.method is None:
|
|
logger.error(f"{information_source.title} module or method is None")
|
|
continue
|
|
news.is_static = information_source.is_static
|
|
# 动态导入模块和函数
|
|
# 把模块路径最后一部分换成content
|
|
module_path = information_source.module.rsplit('.', 1)[0] + '.content'
|
|
module = importlib.import_module(module_path)
|
|
task_function = getattr(module, 'content_task')
|
|
try:
|
|
task_function(news)
|
|
except Exception as e:
|
|
logger.error(f"{information_source.title} task error: {e}")
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
execute_task(content_spider_task)
|