import peter
This commit is contained in:
58
seek/cnn_com/content.py
Normal file
58
seek/cnn_com/content.py
Normal file
@ -0,0 +1,58 @@
|
||||
import datetime
|
||||
|
||||
from DrissionPage.errors import ElementNotFoundError
|
||||
|
||||
from database.tinformationsource.model import TInformationSource
|
||||
from database.tnews.model import TNews
|
||||
from log.log_manager import logger
|
||||
from seek.content_base import ContentBase
|
||||
|
||||
|
||||
class ArticleContent(ContentBase):
|
||||
def __init__(self, news: TNews):
|
||||
super().__init__(news)
|
||||
|
||||
def get_content(self):
|
||||
content_ = ''
|
||||
try:
|
||||
content_ = self.session.s_ele('#detailContent').text
|
||||
except ElementNotFoundError:
|
||||
content_ = 'not found element'
|
||||
return content_
|
||||
|
||||
def get_occurrence_date(self):
|
||||
try:
|
||||
header_time = self.session.s_ele('.header-time left')
|
||||
year = header_time.s_ele('.year').text # 2023
|
||||
day = header_time.s_ele('.day').text # 12/27
|
||||
time = header_time.s_ele('.time').text # 08:05:11
|
||||
occurrence_date_ = f'{year}/{day} {time}'
|
||||
print(occurrence_date_)
|
||||
except ElementNotFoundError:
|
||||
occurrence_date_ = None
|
||||
return occurrence_date_
|
||||
|
||||
def get_content(information_source: TInformationSource) -> list:
|
||||
article_content = ArticleContent(information_source)
|
||||
result = article_content.get_content()
|
||||
article_content.get_occurrence_date()
|
||||
article_content.finish()
|
||||
return result
|
||||
|
||||
|
||||
def content_task(news: TNews):
|
||||
logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
|
||||
article_content = ArticleContent(news)
|
||||
article_content.do_seek_task()
|
||||
article_content.finish()
|
||||
logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info('This module is not for direct call!')
|
||||
news_ = TNews()
|
||||
news_.is_static = True
|
||||
news_.url = 'https://www.news.cn/politics/leaders/20241227/90e76f85ad4a43ba94802b07c5736e00/c.html'
|
||||
content = get_content(news_)
|
||||
logger.info(content)
|
||||
logger.info('Done.')
|
||||
Reference in New Issue
Block a user