import peter

This commit is contained in:
konjacpotato
2025-11-12 20:42:16 +08:00
commit 8c1a740f0b
147 changed files with 2763 additions and 0 deletions

View File

Binary file not shown.

View File

@ -0,0 +1,58 @@
import datetime
from DrissionPage.errors import ElementNotFoundError
from database.tinformationsource.model import TInformationSource
from database.tnews.model import TNews
from log.log_manager import logger
from seek.content_base import ContentBase
class ArticleContent(ContentBase):
def __init__(self, news: TNews):
super().__init__(news)
def get_content(self):
content_ = ''
try:
content_ = self.session.s_ele('#detailContent').text
except ElementNotFoundError:
content_ = 'not found element'
return content_
def get_occurrence_date(self):
try:
header_time = self.session.s_ele('.header-time left')
year = header_time.s_ele('.year').text # 2023
day = header_time.s_ele('.day').text # 12/27
time = header_time.s_ele('.time').text # 08:05:11
occurrence_date_ = f'{year}/{day} {time}'
print(occurrence_date_)
except ElementNotFoundError:
occurrence_date_ = None
return occurrence_date_
def get_content(information_source: TInformationSource) -> list:
article_content = ArticleContent(information_source)
result = article_content.get_content()
article_content.get_occurrence_date()
article_content.finish()
return result
def content_task(news: TNews):
logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
article_content = ArticleContent(news)
article_content.do_seek_task()
article_content.finish()
logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
if __name__ == '__main__':
logger.info('This module is not for direct call!')
news_ = TNews()
news_.is_static = True
news_.url = 'https://www.news.cn/politics/leaders/20241227/90e76f85ad4a43ba94802b07c5736e00/c.html'
content = get_content(news_)
logger.info(content)
logger.info('Done.')

View File

@ -0,0 +1,59 @@
import datetime
from DrissionPage.errors import ElementNotFoundError
from database.tinformationsource.model import TInformationSource
from database.tnews.model import TNews
from log.log_manager import logger
from seek.seek_base import SeekBase
class Information(SeekBase):
def __init__(self, information_source: TInformationSource):
super().__init__(information_source)
def get_news(self):
news_result = []
_news_list = self.session.s_ele('#focusListNews').s_eles('tag:li')
for _news in _news_list:
try:
rs_news = TNews()
rs_news.title = _news.s_ele('tag:a').text
rs_news.url = _news.s_ele('tag:a').link
# rs_news.summary = tmp_.s_eles('tag:a')[1].text
# rs_news.occurrence_date = self.process_time(tmp_.s_ele('.info__time').text)
rs_news.source = self.information_source.title
news_result.append(rs_news)
except ElementNotFoundError as e:
logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
except Exception as e:
logger.error(f'Unexpected error occurred: {e}')
return news_result
def get_news(information_source: TInformationSource) -> list:
instance = Information(information_source)
news_list = instance.get_news()
instance.finish()
return news_list
def news_task(information_source: TInformationSource):
logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
instance = Information(information_source)
instance.do_seek_task()
instance.finish()
logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
if __name__ == '__main__':
logger.info('This module is not for direct call!')
information_source_ = TInformationSource()
information_source_.is_static = True
information_source_.url = 'http://www.xinhuanet.com/'
information_source_.title = '资讯_新华网'
news_task(information_source_)
# news_list_ = get_news(information_source_)
# for news in news_list_:
# print(news)
logger.info('Done.')