Files
peter/task/hot_topic/zhihu.py
konjacpotato 8c1a740f0b import peter
2025-11-12 20:42:16 +08:00

36 lines
1.4 KiB
Python

from database.tvideoscript.video_script import video_script_not_exists, VideoScript, create_video_script
from seek.zhihu_com.zhihu_hot import ZhihuHot
from task.manager_task import execute_task
def spider_task():
zhihu_hot = ZhihuHot()
# 1. 获取热榜主题
hot_topic_url_list = zhihu_hot.get_topic_url_list()
# 2. 过滤掉已经在数据库存在的主题
hot_topic_url_list = video_script_not_exists(hot_topic_url_list)
# 3. 选择前10个主题
hot_topic_url_list = hot_topic_url_list[:10]
# hot_topic_url_list = hot_topic_url_list[:3]
# 4. 循环获取每个主题的内容
for hot_topic_url in hot_topic_url_list:
print(hot_topic_url)
content = zhihu_hot.get_content(hot_topic_url)
print(content)
if content['contents'] is None or len(content['contents']) == 0:
print(f'skip {hot_topic_url}, no fitch content')
continue
# 5. 将内容保存到数据库中
video_script = VideoScript(title=content['title'],
keywords=content['keywords'],
description=content['topic_description'],
content=content['contents'],
url=content['url'])
create_video_script(video_script)
if __name__ == '__main__':
execute_task(spider_task)