import peter
This commit is contained in:
0
task/hot_topic/__init__.py
Normal file
0
task/hot_topic/__init__.py
Normal file
BIN
task/hot_topic/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
task/hot_topic/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
task/hot_topic/__pycache__/zhihu.cpython-312.pyc
Normal file
BIN
task/hot_topic/__pycache__/zhihu.cpython-312.pyc
Normal file
Binary file not shown.
36
task/hot_topic/zhihu.py
Normal file
36
task/hot_topic/zhihu.py
Normal file
@ -0,0 +1,36 @@
|
||||
from database.tvideoscript.video_script import video_script_not_exists, VideoScript, create_video_script
|
||||
from seek.zhihu_com.zhihu_hot import ZhihuHot
|
||||
from task.manager_task import execute_task
|
||||
|
||||
|
||||
def spider_task():
|
||||
zhihu_hot = ZhihuHot()
|
||||
# 1. 获取热榜主题
|
||||
hot_topic_url_list = zhihu_hot.get_topic_url_list()
|
||||
|
||||
# 2. 过滤掉已经在数据库存在的主题
|
||||
hot_topic_url_list = video_script_not_exists(hot_topic_url_list)
|
||||
|
||||
# 3. 选择前10个主题
|
||||
hot_topic_url_list = hot_topic_url_list[:10]
|
||||
# hot_topic_url_list = hot_topic_url_list[:3]
|
||||
|
||||
# 4. 循环获取每个主题的内容
|
||||
for hot_topic_url in hot_topic_url_list:
|
||||
print(hot_topic_url)
|
||||
content = zhihu_hot.get_content(hot_topic_url)
|
||||
print(content)
|
||||
if content['contents'] is None or len(content['contents']) == 0:
|
||||
print(f'skip {hot_topic_url}, no fitch content')
|
||||
continue
|
||||
# 5. 将内容保存到数据库中
|
||||
video_script = VideoScript(title=content['title'],
|
||||
keywords=content['keywords'],
|
||||
description=content['topic_description'],
|
||||
content=content['contents'],
|
||||
url=content['url'])
|
||||
create_video_script(video_script)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
execute_task(spider_task)
|
||||
Reference in New Issue
Block a user