import peter
This commit is contained in:
0
seek/the_paper_com/__init__.py
Normal file
0
seek/the_paper_com/__init__.py
Normal file
BIN
seek/the_paper_com/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
seek/the_paper_com/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
seek/the_paper_com/__pycache__/base.cpython-312.pyc
Normal file
BIN
seek/the_paper_com/__pycache__/base.cpython-312.pyc
Normal file
Binary file not shown.
BIN
seek/the_paper_com/__pycache__/content.cpython-312.pyc
Normal file
BIN
seek/the_paper_com/__pycache__/content.cpython-312.pyc
Normal file
Binary file not shown.
BIN
seek/the_paper_com/__pycache__/international.cpython-312.pyc
Normal file
BIN
seek/the_paper_com/__pycache__/international.cpython-312.pyc
Normal file
Binary file not shown.
BIN
seek/the_paper_com/__pycache__/tech.cpython-312.pyc
Normal file
BIN
seek/the_paper_com/__pycache__/tech.cpython-312.pyc
Normal file
Binary file not shown.
32
seek/the_paper_com/base.py
Normal file
32
seek/the_paper_com/base.py
Normal file
@ -0,0 +1,32 @@
|
||||
from DrissionPage.errors import ElementNotFoundError
|
||||
|
||||
from database.tinformationsource.model import TInformationSource
|
||||
from database.tnews.model import TNews
|
||||
from log.log_manager import logger
|
||||
from seek.seek_base import SeekBase
|
||||
from utils.time_utils import process_time
|
||||
|
||||
|
||||
class Base(SeekBase):
|
||||
def __init__(self, information_source: TInformationSource):
|
||||
super().__init__(information_source)
|
||||
|
||||
def get_news(self):
|
||||
news_result = []
|
||||
_news_list = self.session.s_ele('.index_cards__AdZtA').s_eles('.ant-col ant-col-6')
|
||||
|
||||
for _news in _news_list:
|
||||
tnews = TNews()
|
||||
try:
|
||||
tnews.title = _news.s_ele('tag:a').text
|
||||
tnews.url = _news.s_ele('tag:a').link
|
||||
_time = _news.s_ele('.small_text__dR01h').s_eles('tag:span')[1].text
|
||||
tnews.occurrence_date = process_time(_time)
|
||||
tnews.source = self.information_source.title
|
||||
news_result.append(tnews)
|
||||
except ElementNotFoundError as e:
|
||||
logger.error(f"ElementNotFoundError {tnews.title}: {e} - Failed to find element in news item.")
|
||||
except Exception as e:
|
||||
logger.error(f'Unexpected error occurred: {e}')
|
||||
|
||||
return news_result
|
||||
50
seek/the_paper_com/content.py
Normal file
50
seek/the_paper_com/content.py
Normal file
@ -0,0 +1,50 @@
|
||||
import datetime
|
||||
|
||||
from DrissionPage.errors import ElementNotFoundError
|
||||
|
||||
from database.tinformationsource.model import TInformationSource
|
||||
from database.tnews.model import TNews
|
||||
from log.log_manager import logger
|
||||
from seek.content_base import ContentBase
|
||||
|
||||
|
||||
class ThePaperContent(ContentBase):
|
||||
def __init__(self, news: TNews):
|
||||
super().__init__(news)
|
||||
|
||||
def get_content(self):
|
||||
content_ = ''
|
||||
try:
|
||||
content_ = self.session.s_ele('.^index_cententWrap').text
|
||||
except ElementNotFoundError as e:
|
||||
try:
|
||||
# 视频
|
||||
content_ = self.session.s_ele('.^header_desc').text
|
||||
except ElementNotFoundError as e:
|
||||
content_ = 'not found element'
|
||||
return content_
|
||||
|
||||
|
||||
def get_content(information_source: TInformationSource) -> list:
|
||||
the_paper_content = ThePaperContent(information_source)
|
||||
content = the_paper_content.get_content()
|
||||
the_paper_content.finish()
|
||||
return content
|
||||
|
||||
|
||||
def content_task(news: TNews):
|
||||
logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
|
||||
ofweek_com_ai = ThePaperContent(news)
|
||||
ofweek_com_ai.do_seek_task()
|
||||
ofweek_com_ai.finish()
|
||||
logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info('This module is not for direct call!')
|
||||
news_ = TNews()
|
||||
news_.is_static = True
|
||||
news_.url = 'https://www.thepaper.cn/newsDetail_forward_29745442'
|
||||
content = get_content(news_)
|
||||
logger.info(content)
|
||||
logger.info('Done.')
|
||||
38
seek/the_paper_com/international.py
Normal file
38
seek/the_paper_com/international.py
Normal file
@ -0,0 +1,38 @@
|
||||
import datetime
|
||||
|
||||
from database.tinformationsource.model import TInformationSource
|
||||
from log.log_manager import logger
|
||||
from seek.the_paper_com.base import Base
|
||||
|
||||
|
||||
class International(Base):
|
||||
def __init__(self, information_source: TInformationSource):
|
||||
super().__init__(information_source)
|
||||
|
||||
|
||||
def get_news(information_source: TInformationSource) -> list:
|
||||
instance = International(information_source)
|
||||
news_list = instance.get_news()
|
||||
instance.finish()
|
||||
return news_list
|
||||
|
||||
|
||||
def news_task(information_source: TInformationSource):
|
||||
logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
|
||||
instance = International(information_source)
|
||||
instance.do_seek_task()
|
||||
instance.finish()
|
||||
logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info('This module is not for direct call!')
|
||||
information_source_ = TInformationSource()
|
||||
information_source_.is_static = True
|
||||
information_source_.url = 'https://www.thepaper.cn/channel_122908'
|
||||
information_source_.title = '国际_澎湃新闻'
|
||||
# news_task(information_source_)
|
||||
news_list_ = get_news(information_source_)
|
||||
for news in news_list_:
|
||||
print(news)
|
||||
logger.info('Done.')
|
||||
38
seek/the_paper_com/tech.py
Normal file
38
seek/the_paper_com/tech.py
Normal file
@ -0,0 +1,38 @@
|
||||
import datetime
|
||||
|
||||
from database.tinformationsource.model import TInformationSource
|
||||
from log.log_manager import logger
|
||||
from seek.the_paper_com.base import Base
|
||||
|
||||
|
||||
class Tech(Base):
|
||||
def __init__(self, information_source: TInformationSource):
|
||||
super().__init__(information_source)
|
||||
|
||||
|
||||
def get_news(information_source: TInformationSource) -> list:
|
||||
instance = Tech(information_source)
|
||||
news_list = instance.get_news()
|
||||
instance.finish()
|
||||
return news_list
|
||||
|
||||
|
||||
def news_task(information_source: TInformationSource):
|
||||
logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
|
||||
instance = Tech(information_source)
|
||||
instance.do_seek_task()
|
||||
instance.finish()
|
||||
logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info('This module is not for direct call!')
|
||||
information_source_ = TInformationSource()
|
||||
information_source_.is_static = True
|
||||
information_source_.url = 'https://www.thepaper.cn/channel_119908'
|
||||
information_source_.title = '科技_澎湃新闻'
|
||||
news_task(information_source_)
|
||||
# news_list_ = get_news(information_source_)
|
||||
# for news in news_list_:
|
||||
# print(news)
|
||||
logger.info('Done.')
|
||||
Reference in New Issue
Block a user