import peter

2025-11-12 20:42:16 +08:00
commit 8c1a740f0b
147 changed files with 2763 additions and 0 deletions
--- a/seek/163_com/init.py
+++ b/seek/163_com/init.py
--- a/seek/163_com/pycache/init.cpython-312.pyc
+++ b/seek/163_com/pycache/init.cpython-312.pyc
--- a/seek/163_com/pycache/content.cpython-312.pyc
+++ b/seek/163_com/pycache/content.cpython-312.pyc
--- a/seek/163_com/pycache/house.cpython-312.pyc
+++ b/seek/163_com/pycache/house.cpython-312.pyc
--- a/seek/163_com/content.py
+++ b/seek/163_com/content.py
@ -0,0 +1,45 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        try:
+            content_ = self.session.s_ele('.post_body').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://www.163.com/dy/article/JKC1V4E70519DDQ2.html'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/163_com/house.py
+++ b/seek/163_com/house.py
@ -0,0 +1,59 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+
+
+class House(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        _news_list = self.session.s_ele('.news-first').s_eles('.data_row news_article clearfix2 ')
+        for _news in _news_list:
+            try:
+                rs_news = TNews()
+                rs_news.title = _news.s_ele('.news_title').s_ele('tag:a').text
+                rs_news.url = _news.s_ele('tag:a').link
+                # rs_news.summary = _news.s_ele('tag:p').text
+                # rs_news.occurrence_date = self.process_time(tmp.s_eles('tag:span')[1].text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = House(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = House(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://sz.house.163.com/'
+    information_source_.title = '房产_网易'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/init.py
+++ b/seek/init.py
--- a/seek/pycache/init.cpython-312.pyc
+++ b/seek/pycache/init.cpython-312.pyc
--- a/seek/pycache/content_base.cpython-312.pyc
+++ b/seek/pycache/content_base.cpython-312.pyc
--- a/seek/pycache/seek_base.cpython-312.pyc
+++ b/seek/pycache/seek_base.cpython-312.pyc
--- a/seek/anjuke_com/init.py
+++ b/seek/anjuke_com/init.py
--- a/seek/anjuke_com/pycache/init.cpython-312.pyc
+++ b/seek/anjuke_com/pycache/init.cpython-312.pyc
--- a/seek/anjuke_com/pycache/content.cpython-312.pyc
+++ b/seek/anjuke_com/pycache/content.cpython-312.pyc
--- a/seek/anjuke_com/pycache/house.cpython-312.pyc
+++ b/seek/anjuke_com/pycache/house.cpython-312.pyc
--- a/seek/anjuke_com/content.py
+++ b/seek/anjuke_com/content.py
@ -0,0 +1,46 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        content_ = ''
+        try:
+            content_ = self.session.s_ele('.^info-content').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://sz.news.anjuke.com/louping-965203-pan528488.html'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/anjuke_com/house.py
+++ b/seek/anjuke_com/house.py
@ -0,0 +1,62 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class House(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        print(self.session.html)
+        _news_list = self.session.s_ele('.main-list').s_eles('.m-list-item clearfix')
+        for _news in _news_list:
+            try:
+                rs_news = TNews()
+                tmp_ = _news.s_ele('.item-col-right')
+                rs_news.title = tmp_.s_ele('tag:h3').text
+                rs_news.url = tmp_.s_ele('tag:a').link
+                rs_news.summary = tmp_.s_eles('tag:a')[1].text
+                rs_news.occurrence_date = process_time(tmp_.s_ele('.info__time').text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = House(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = House(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://sz.news.anjuke.com/hot/'
+    information_source_.title = '房产_安居客'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/cnn_com/init.py
+++ b/seek/cnn_com/init.py
--- a/seek/cnn_com/content.py
+++ b/seek/cnn_com/content.py
@ -0,0 +1,58 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        content_ = ''
+        try:
+            content_ = self.session.s_ele('#detailContent').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+    def get_occurrence_date(self):
+        try:
+            header_time = self.session.s_ele('.header-time left')
+            year = header_time.s_ele('.year').text # 2023
+            day = header_time.s_ele('.day').text # 12/27
+            time = header_time.s_ele('.time').text # 08:05:11
+            occurrence_date_ = f'{year}/{day} {time}'
+            print(occurrence_date_)
+        except ElementNotFoundError:
+            occurrence_date_ = None
+        return occurrence_date_
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.get_occurrence_date()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    article_content = ArticleContent(news)
+    article_content.do_seek_task()
+    article_content.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://www.news.cn/politics/leaders/20241227/90e76f85ad4a43ba94802b07c5736e00/c.html'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/cnn_com/edition.py
+++ b/seek/cnn_com/edition.py
@ -0,0 +1,62 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+
+
+class Edition(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        # _news_list = self.tab.s_ele('.zone zone--t-light zone-2-observer').s_eles('.stack')
+        # _news_list = self.tab.s_ele('.zone zone--t-light zone-2-observer').s_eles('.stack__items ')
+        _news_list = self.tab.s_ele('.zone zone--t-light zone-2-observer').s_eles('tag:a')
+        for _news in _news_list:
+            print(_news.html)
+            try:
+                rs_news = TNews()
+                rs_news.title = _news.text
+                rs_news.url = _news.link
+                # rs_news.summary = tmp_.s_eles('tag:a')[1].text
+                # rs_news.occurrence_date = self.process_time(tmp_.s_ele('.info__time').text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = Edition(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = Edition(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = False
+    information_source_.url = 'https://edition.cnn.com/'
+    information_source_.title = 'edition_CNN'
+    # news_task(information_source_)
+    news_list_ = get_news(information_source_)
+    for news in news_list_:
+        print(news)
+    logger.info('Done.')
--- a/seek/content_base.py
+++ b/seek/content_base.py
@ -0,0 +1,50 @@
+from abc import ABC, abstractmethod
+
+from DrissionPage import Chromium, SessionPage, ChromiumOptions
+
+from database.database import get_session
+from database.tnews.crud import update_news_by_id
+from database.tnews.model import TNews
+from log.log_manager import log
+
+
+class ContentBase(ABC):
+    def __init__(self, news: TNews):
+        self.news = news
+        self.session = None  # 初始化为 None
+        self.browser = None  # 初始化为 None
+        if news.is_static:
+            self.session = SessionPage()
+            self.session.get(news.url)
+        else:
+            co = ChromiumOptions()
+            self.browser = Chromium(addr_or_opts=co)
+            # self.tab = self.browser.latest_tab
+            self.tab = self.browser.new_tab()
+            self.tab.get(news.url)
+
+    @abstractmethod
+    def get_content(self):
+        """Abstract method to fetch news from a specific source."""
+        pass
+
+    def get_occurrence_date(self):
+        return None
+
+    def do_seek_task(self):
+        """Saves the list of news to the database if the URL does not already exist."""
+        self.news.content = self.get_content()
+        if self.news.occurrence_date is None:
+            self.news.occurrence_date = self.get_occurrence_date()
+        with get_session() as db:
+            update_news_by_id(db, self.news)
+        log(f'successful fetch {self.news.title} news content into the database.')
+
+    def finish(self):
+        """Closes the browser and session."""
+        if self.tab:
+            self.tab.close()
+        # if self.browser:
+        #     self.browser.quit()
+        if self.session:
+            self.session.close()
--- a/seek/fang_com/init.py
+++ b/seek/fang_com/init.py
--- a/seek/fang_com/pycache/init.cpython-312.pyc
+++ b/seek/fang_com/pycache/init.cpython-312.pyc
--- a/seek/fang_com/pycache/content.cpython-312.pyc
+++ b/seek/fang_com/pycache/content.cpython-312.pyc
--- a/seek/fang_com/pycache/house.cpython-312.pyc
+++ b/seek/fang_com/pycache/house.cpython-312.pyc
--- a/seek/fang_com/content.py
+++ b/seek/fang_com/content.py
@ -0,0 +1,46 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        content_ = ''
+        try:
+            content_ = self.session.s_ele('.^news-text').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://sz.news.fang.com/open/51863596.html'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/fang_com/house.py
+++ b/seek/fang_com/house.py
@ -0,0 +1,64 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class House(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        _news_list = self.session.s_ele('.news-list').s_eles('tag:li')
+        for _news in _news_list:
+            try:
+                rs_news = TNews()
+                tmp = _news.s_ele('.txt')
+                rs_news.title = tmp.s_ele('tag:a').text
+                rs_news.url = tmp.s_ele('tag:a').link
+                rs_news.summary = tmp.s_ele('tag:p').text
+                rs_news.occurrence_date = process_time(tmp.s_eles('tag:span')[1].text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                if _news.s_ele('.item'):
+                    # 此为视频内容，跳过
+                    continue
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = House(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = House(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://sz.news.fang.com/'
+    information_source_.title = '房产_房天下'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/focus_cn/init.py
+++ b/seek/focus_cn/init.py
--- a/seek/focus_cn/pycache/init.cpython-312.pyc
+++ b/seek/focus_cn/pycache/init.cpython-312.pyc
--- a/seek/focus_cn/pycache/content.cpython-312.pyc
+++ b/seek/focus_cn/pycache/content.cpython-312.pyc
--- a/seek/focus_cn/pycache/house.cpython-312.pyc
+++ b/seek/focus_cn/pycache/house.cpython-312.pyc
--- a/seek/focus_cn/content.py
+++ b/seek/focus_cn/content.py
@ -0,0 +1,46 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        news.is_static = True
+        super().__init__(news)
+
+    def get_content(self):
+        try:
+            content_ = self.session.s_ele('.article').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://www.focus.cn/a/842171870_124752'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/focus_cn/house.py
+++ b/seek/focus_cn/house.py
@ -0,0 +1,62 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class House(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        self.tab.wait.ele_displayed('.FeedList')
+        _news_list = self.tab.s_ele('.cbd-recommend').s_eles('.FeedList')
+        for _news in _news_list:
+            try:
+                rs_news = TNews()
+                rs_news.title = _news.s_ele('.item-text-content-title').text
+                link = _news.s_ele('tag:a').link
+                rs_news.url = link.split('?')[0]
+                rs_news.summary = _news.s_ele('.item-text-content-description').text
+                rs_news.occurrence_date = process_time(_news.s_ele('.extra-info-item').text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = House(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = House(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = False
+    information_source_.url = 'https://sz.focus.cn/zixun/'
+    information_source_.title = '房产_搜狐焦点'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/leju_com/init.py
+++ b/seek/leju_com/init.py
--- a/seek/leju_com/pycache/init.cpython-312.pyc
+++ b/seek/leju_com/pycache/init.cpython-312.pyc
--- a/seek/leju_com/pycache/content.cpython-312.pyc
+++ b/seek/leju_com/pycache/content.cpython-312.pyc
--- a/seek/leju_com/pycache/house.cpython-312.pyc
+++ b/seek/leju_com/pycache/house.cpython-312.pyc
--- a/seek/leju_com/content.py
+++ b/seek/leju_com/content.py
@ -0,0 +1,45 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        try:
+            content_ = self.session.s_ele('.^sf_news_contend').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://sz.leju.com/news/2024-12-18/18427272536617796292963.shtml'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/leju_com/house.py
+++ b/seek/leju_com/house.py
@ -0,0 +1,60 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class House(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        _news_list = self.session.s_ele('.sf_listPage').s_eles('tag:li')
+        for _news in _news_list:
+            try:
+                rs_news = TNews()
+                rs_news.title = _news.s_ele('tag:a').text
+                rs_news.url = _news.s_ele('tag:a').link
+                rs_news.summary = _news.s_ele('tag:p').text
+                rs_news.occurrence_date = process_time(_news.s_ele('.tag').text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = House(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = House(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://sz.leju.com/news/'
+    information_source_.title = '房产_新浪乐居'
+    # news_task(information_source_)
+    news_list_ = get_news(information_source_)
+    for news in news_list_:
+        print(news)
+    logger.info('Done.')
--- a/seek/mittr_com/init.py
+++ b/seek/mittr_com/init.py
--- a/seek/mittr_com/pycache/init.cpython-312.pyc
+++ b/seek/mittr_com/pycache/init.cpython-312.pyc
--- a/seek/mittr_com/pycache/content.cpython-312.pyc
+++ b/seek/mittr_com/pycache/content.cpython-312.pyc
--- a/seek/mittr_com/pycache/mit_t_r.cpython-312.pyc
+++ b/seek/mittr_com/pycache/mit_t_r.cpython-312.pyc
--- a/seek/mittr_com/content.py
+++ b/seek/mittr_com/content.py
@ -0,0 +1,45 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        try:
+            content_ = self.tab.s_ele('.content').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = False
+    news_.url = 'https://www.mittrchina.com/news/detail/14218'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/mittr_com/mit_t_r.py
+++ b/seek/mittr_com/mit_t_r.py
@ -0,0 +1,63 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class MittrChinaCom(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        self.tab.wait.ele_displayed('.last-item')
+        _news_list = self.tab.s_ele('.lastest-list').s_eles('.last-item')
+
+        for _news in _news_list:
+            try:
+                tnews = TNews()
+                tnews.title = _news.s_ele('tag:a').text
+                tnews.url = _news.s_ele('tag:a').link
+                _time = _news.parent().s_ele('.time').text
+                tnews.occurrence_date = process_time(_time)
+                tnews.source = self.information_source.title
+                news_result.append(tnews)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    mittr = MittrChinaCom(information_source)
+    news_list = mittr.get_news()
+    mittr.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    mittr = MittrChinaCom(information_source)
+    mittr.do_seek_task()
+    mittr.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = False
+    information_source_.url = 'https://www.mittrchina.com/'
+    information_source_.title = '科技_麻省理工科技评论'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/ofweek_com/init.py
+++ b/seek/ofweek_com/init.py
--- a/seek/ofweek_com/pycache/init.cpython-312.pyc
+++ b/seek/ofweek_com/pycache/init.cpython-312.pyc
--- a/seek/ofweek_com/pycache/ai.cpython-312.pyc
+++ b/seek/ofweek_com/pycache/ai.cpython-312.pyc
--- a/seek/ofweek_com/pycache/content.cpython-312.pyc
+++ b/seek/ofweek_com/pycache/content.cpython-312.pyc
--- a/seek/ofweek_com/ai.py
+++ b/seek/ofweek_com/ai.py
@ -0,0 +1,62 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class OfweekComAi(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        _news_list = self.session.s_ele('.main-cont-left w640').s_eles('.^top-title')
+
+        for _news in _news_list:
+            try:
+                __news = TNews()
+                __news.title = _news.s_ele('tag:a').text
+                __news.url = _news.s_ele('tag:a').link
+                _time = _news.parent().s_eles('tag:span')[4].text
+                __news.occurrence_date = process_time(_time)
+                __news.source = self.information_source.title
+                news_result.append(__news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    ofweek_com_ai = OfweekComAi(information_source)
+    news_list = ofweek_com_ai.get_news()
+    ofweek_com_ai.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = OfweekComAi(information_source)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://www.ofweek.com/ai/'
+    information_source_.title = '人工智能_维科网'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/ofweek_com/content.py
+++ b/seek/ofweek_com/content.py
@ -0,0 +1,46 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        content_ = ''
+        try:
+            content_ = self.session.s_ele('.artical-content').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ArticleContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://www.ofweek.com/ai/2024-12/ART-201721-8120-30654143.html'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/seek_base.py
+++ b/seek/seek_base.py
@ -0,0 +1,57 @@
+from abc import ABC, abstractmethod
+
+from DrissionPage import Chromium, SessionPage, ChromiumOptions
+
+from database.database import get_session
+from database.tinformationsource.model import TInformationSource
+from database.tnews.crud import create_news_list_if_url_not_exists
+from log.log_manager import log
+
+
+class SeekBase(ABC):
+    def __init__(self, information_source: TInformationSource):
+        self.information_source = information_source
+        self.session = None  # 初始化为 None
+        self.browser = None  # 初始化为 None
+        self.tab = None
+        if information_source.is_static:
+            self.session = SessionPage()
+            self.session.get(information_source.url)
+        else:
+            co = ChromiumOptions()
+            self.browser = Chromium()
+            self.tab = self.browser.new_tab()
+            self.tab.get(information_source.url)
+
+    @abstractmethod
+    def get_news(self):
+        """Abstract method to fetch news from a specific source."""
+        pass
+
+    def do_seek_task(self):
+        """Saves the list of news to the database if the URL does not already exist."""
+        news_list = self.get_news()
+        for news in news_list:
+            if news.primary_category is None:
+                news.primary_category = self.information_source.primary_category
+            if news.secondary_category is None:
+                news.secondary_category = self.information_source.secondary_category
+            if news.tertiary_category is None:
+                news.tertiary_category = self.information_source.tertiary_category
+            if news.label is None:
+                news.label = self.information_source.label
+            if news.lang is None:
+                news.lang = self.information_source.lang
+        with get_session() as db:
+            inserted_news = create_news_list_if_url_not_exists(db, news_list)
+        log(f'Inserted {len(inserted_news)} {self.information_source.title} news items into the database.')
+        return inserted_news
+
+    def finish(self):
+        """Closes the browser and session."""
+        if self.tab:
+            self.tab.close()
+        # if self.browser:
+        #     self.browser.quit()
+        if self.session:
+            self.session.close()
--- a/seek/the_paper_com/init.py
+++ b/seek/the_paper_com/init.py
--- a/seek/the_paper_com/pycache/init.cpython-312.pyc
+++ b/seek/the_paper_com/pycache/init.cpython-312.pyc
--- a/seek/the_paper_com/pycache/base.cpython-312.pyc
+++ b/seek/the_paper_com/pycache/base.cpython-312.pyc
--- a/seek/the_paper_com/pycache/content.cpython-312.pyc
+++ b/seek/the_paper_com/pycache/content.cpython-312.pyc
--- a/seek/the_paper_com/pycache/international.cpython-312.pyc
+++ b/seek/the_paper_com/pycache/international.cpython-312.pyc
--- a/seek/the_paper_com/pycache/tech.cpython-312.pyc
+++ b/seek/the_paper_com/pycache/tech.cpython-312.pyc
--- a/seek/the_paper_com/base.py
+++ b/seek/the_paper_com/base.py
@ -0,0 +1,32 @@
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+from utils.time_utils import process_time
+
+
+class Base(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        _news_list = self.session.s_ele('.index_cards__AdZtA').s_eles('.ant-col ant-col-6')
+
+        for _news in _news_list:
+            tnews = TNews()
+            try:
+                tnews.title = _news.s_ele('tag:a').text
+                tnews.url = _news.s_ele('tag:a').link
+                _time = _news.s_ele('.small_text__dR01h').s_eles('tag:span')[1].text
+                tnews.occurrence_date = process_time(_time)
+                tnews.source = self.information_source.title
+                news_result.append(tnews)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError {tnews.title}: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+
+        return news_result
--- a/seek/the_paper_com/content.py
+++ b/seek/the_paper_com/content.py
@ -0,0 +1,50 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ThePaperContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        content_ = ''
+        try:
+            content_ = self.session.s_ele('.^index_cententWrap').text
+        except ElementNotFoundError as e:
+            try:
+                # 视频
+                content_ = self.session.s_ele('.^header_desc').text
+            except ElementNotFoundError as e:
+                content_ = 'not found element'
+        return content_
+
+
+def get_content(information_source: TInformationSource) -> list:
+    the_paper_content = ThePaperContent(information_source)
+    content = the_paper_content.get_content()
+    the_paper_content.finish()
+    return content
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    ofweek_com_ai = ThePaperContent(news)
+    ofweek_com_ai.do_seek_task()
+    ofweek_com_ai.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://www.thepaper.cn/newsDetail_forward_29745442'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/the_paper_com/international.py
+++ b/seek/the_paper_com/international.py
@ -0,0 +1,38 @@
+import datetime
+
+from database.tinformationsource.model import TInformationSource
+from log.log_manager import logger
+from seek.the_paper_com.base import Base
+
+
+class International(Base):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = International(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = International(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://www.thepaper.cn/channel_122908'
+    information_source_.title = '国际_澎湃新闻'
+    # news_task(information_source_)
+    news_list_ = get_news(information_source_)
+    for news in news_list_:
+        print(news)
+    logger.info('Done.')
--- a/seek/the_paper_com/tech.py
+++ b/seek/the_paper_com/tech.py
@ -0,0 +1,38 @@
+import datetime
+
+from database.tinformationsource.model import TInformationSource
+from log.log_manager import logger
+from seek.the_paper_com.base import Base
+
+
+class Tech(Base):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = Tech(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = Tech(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'https://www.thepaper.cn/channel_119908'
+    information_source_.title = '科技_澎湃新闻'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/xinhuanet_com/init.py
+++ b/seek/xinhuanet_com/init.py
--- a/seek/xinhuanet_com/pycache/init.cpython-312.pyc
+++ b/seek/xinhuanet_com/pycache/init.cpython-312.pyc
--- a/seek/xinhuanet_com/pycache/content.cpython-312.pyc
+++ b/seek/xinhuanet_com/pycache/content.cpython-312.pyc
--- a/seek/xinhuanet_com/pycache/information.cpython-312.pyc
+++ b/seek/xinhuanet_com/pycache/information.cpython-312.pyc
--- a/seek/xinhuanet_com/content.py
+++ b/seek/xinhuanet_com/content.py
@ -0,0 +1,58 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.content_base import ContentBase
+
+
+class ArticleContent(ContentBase):
+    def __init__(self, news: TNews):
+        super().__init__(news)
+
+    def get_content(self):
+        content_ = ''
+        try:
+            content_ = self.session.s_ele('#detailContent').text
+        except ElementNotFoundError:
+            content_ = 'not found element'
+        return content_
+
+    def get_occurrence_date(self):
+        try:
+            header_time = self.session.s_ele('.header-time left')
+            year = header_time.s_ele('.year').text # 2023
+            day = header_time.s_ele('.day').text # 12/27
+            time = header_time.s_ele('.time').text # 08:05:11
+            occurrence_date_ = f'{year}/{day} {time}'
+            print(occurrence_date_)
+        except ElementNotFoundError:
+            occurrence_date_ = None
+        return occurrence_date_
+
+def get_content(information_source: TInformationSource) -> list:
+    article_content = ArticleContent(information_source)
+    result = article_content.get_content()
+    article_content.get_occurrence_date()
+    article_content.finish()
+    return result
+
+
+def content_task(news: TNews):
+    logger.info(f'{news.title} news_task start execute at {datetime.datetime.now()}', )
+    article_content = ArticleContent(news)
+    article_content.do_seek_task()
+    article_content.finish()
+    logger.info(f'{news.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    news_ = TNews()
+    news_.is_static = True
+    news_.url = 'https://www.news.cn/politics/leaders/20241227/90e76f85ad4a43ba94802b07c5736e00/c.html'
+    content = get_content(news_)
+    logger.info(content)
+    logger.info('Done.')
--- a/seek/xinhuanet_com/information.py
+++ b/seek/xinhuanet_com/information.py
@ -0,0 +1,59 @@
+import datetime
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+
+
+class Information(SeekBase):
+    def __init__(self, information_source: TInformationSource):
+        super().__init__(information_source)
+
+    def get_news(self):
+        news_result = []
+        _news_list = self.session.s_ele('#focusListNews').s_eles('tag:li')
+        for _news in _news_list:
+            try:
+                rs_news = TNews()
+                rs_news.title = _news.s_ele('tag:a').text
+                rs_news.url = _news.s_ele('tag:a').link
+                # rs_news.summary = tmp_.s_eles('tag:a')[1].text
+                # rs_news.occurrence_date = self.process_time(tmp_.s_ele('.info__time').text)
+                rs_news.source = self.information_source.title
+                news_result.append(rs_news)
+            except ElementNotFoundError as e:
+                logger.error(f"ElementNotFoundError: {e} - Failed to find element in news item.")
+            except Exception as e:
+                logger.error(f'Unexpected error occurred: {e}')
+        return news_result
+
+
+def get_news(information_source: TInformationSource) -> list:
+    instance = Information(information_source)
+    news_list = instance.get_news()
+    instance.finish()
+    return news_list
+
+
+def news_task(information_source: TInformationSource):
+    logger.info(f'{information_source.title} news_task start execute at {datetime.datetime.now()}', )
+    instance = Information(information_source)
+    instance.do_seek_task()
+    instance.finish()
+    logger.info(f'{information_source.title} news_task end execute at {datetime.datetime.now()}')
+
+
+if __name__ == '__main__':
+    logger.info('This module is not for direct call!')
+    information_source_ = TInformationSource()
+    information_source_.is_static = True
+    information_source_.url = 'http://www.xinhuanet.com/'
+    information_source_.title = '资讯_新华网'
+    news_task(information_source_)
+    # news_list_ = get_news(information_source_)
+    # for news in news_list_:
+    #     print(news)
+    logger.info('Done.')
--- a/seek/zhihu_com/init.py
+++ b/seek/zhihu_com/init.py
--- a/seek/zhihu_com/pycache/init.cpython-312.pyc
+++ b/seek/zhihu_com/pycache/init.cpython-312.pyc
--- a/seek/zhihu_com/pycache/zhihu.cpython-312.pyc
+++ b/seek/zhihu_com/pycache/zhihu.cpython-312.pyc
--- a/seek/zhihu_com/pycache/zhihu_hot.cpython-312.pyc
+++ b/seek/zhihu_com/pycache/zhihu_hot.cpython-312.pyc
--- a/seek/zhihu_com/demo.py
+++ b/seek/zhihu_com/demo.py
@ -0,0 +1,25 @@
+from database.database import get_session
+from database.thotcontent.crud import get_hot_content_by_topic_id
+from database.thottopic.crud import get_latest_hot_topic
+
+if __name__ == '__main__':
+    with get_session() as db:
+        # 1. 获取最新的热点话题
+        latest_hot_topic = get_latest_hot_topic(db)
+        topic = latest_hot_topic.topic
+        print(latest_hot_topic)
+        # 2. 获取话题内容
+        hot_contents = get_hot_content_by_topic_id(db, latest_hot_topic.id)
+        for hot_content in hot_contents:
+            print(hot_content)
+            # 统计hot_content.content的字数
+            print(len(hot_content.content))
+        topic_content = [hot_content.content for hot_content in hot_contents]
+        print(topic_content)
+        print(len(topic_content))
+        print('---------------------------------------------------------------')
+        print(topic_content[0])
+        print('---------------------------------------------------------------')
+        print(topic_content[1])
+        print('---------------------------------------------------------------')
+        print(topic_content[2])
--- a/seek/zhihu_com/demo2.py
+++ b/seek/zhihu_com/demo2.py
@ -0,0 +1,116 @@
+import json
+
+from DrissionPage import Chromium
+from DrissionPage import ChromiumOptions
+from DrissionPage.errors import ElementNotFoundError
+
+from log.log_manager import logger
+
+
+def get_content_from_meta(metas, itemprop):
+    content = None
+    for meta in metas:
+        if meta.attr('itemprop') == itemprop:
+            content = meta.attr('content')
+    return content
+
+
+class Zhihu:
+    def __init__(self):
+        co = ChromiumOptions()
+        self.browser = Chromium()
+        self.tab = None
+
+    def get_content(self, url):
+        """获取话题内容数据"""
+        global title, keywords, date_created, date_modified, follower_count, comment_count, answer_count, topic_description
+        contents_result = []
+        try:
+            self.tab = self.browser.new_tab()
+            # 访问话题/问题页面
+            self.tab.get(url)
+
+            for _ in range(10):
+                # 等待内容加载
+                self.tab.wait.ele_displayed('.List-item')
+                self.tab.wait(3)
+                # 向下滚动页面，直到所有内容加载完成
+                self.tab.scroll.to_bottom()
+                self.tab.wait(1)
+                self.tab.scroll.up(100)
+
+            # 获取话题/问题相关信息：话题内容、keywards、话题创建日期dateCreated、话题修改日期dateModified、回答数量answerCount、评论数量commentCount
+            question_page = self.tab.ele('.QuestionPage')
+            # 获取话题属性，为QuestionPage的前9个meta标签
+            metas = question_page.eles('tag:meta')[0:9]
+            # print(metas)
+            title = get_content_from_meta(metas, 'name')
+            answer_count = get_content_from_meta(metas, 'answerCount')
+            comment_count = get_content_from_meta(metas, 'commentCount')
+            keywords = get_content_from_meta(metas, 'keywords')
+            date_created = get_content_from_meta(metas, 'dateCreated')
+            date_modified = get_content_from_meta(metas, 'dateModified')
+            follower_count = get_content_from_meta(metas, 'zhihu:followerCount')
+            # print(date_created, date_modified, answer_count, comment_count, keywords)
+            topic_description = ""
+            try:
+                topic_description = question_page.ele('.RichText ztext css-ob6uua').text
+            except ElementNotFoundError as e:
+                logger.error(f"元素缺失：不存在topic_description")
+
+            # 获取所有内容条目
+            content_items = self.tab.ele('.Question-mainColumn').eles('.List-item')
+
+            total_characters = 0
+            for item in content_items:
+                try:
+                    content = item.ele('.RichContent-inner').text
+                    # 计算content的字数
+                    content_len = len(content)
+                    print(content_len)
+                    if content_len > 1000 or content_len < 100:
+                        logger.error(f"skip本条内容，内容长度：{content_len}")
+                        continue
+                    if total_characters > 5000:
+                        logger.error(f"contents_result长度超过5000，跳出循环")
+                        break
+                    total_characters += content_len
+                    contents_result.append(content)
+                    # 打印contents_result的长度
+                    logger.info(f"contents_result长度：{len(contents_result)}")
+                except ElementNotFoundError as e:
+                    logger.error(f"元素缺失：{str(e)}")
+                except ValueError as e:
+                    logger.error(f"热度值转换失败：{str(e)}")
+
+        except ElementNotFoundError as e:
+            logger.error(f"热榜容器元素未找到：{str(e)}")
+        except Exception as e:
+            logger.error(f"获取热榜数据异常：{str(e)}")
+        finally:
+            if self.tab:
+                self.tab.close()
+        # 返回json格式的数据
+        return json.dumps({
+            'title': title,
+            'answer_count': answer_count,
+            'comment_count': comment_count,
+            'topic_description': topic_description,
+            'keywords': keywords,
+            'date_created': date_created,
+            'date_modified': date_modified,
+            'follower_count': follower_count,
+            'contents': contents_result
+        }, ensure_ascii=False)
+
+
+if __name__ == '__main__':
+    # 测试用例
+    logger.info('知乎采集测试')
+    # 执行采集任务
+    zhihu = Zhihu()
+    result = zhihu.get_content('https://www.zhihu.com/question/588507809')
+    print(len(result))
+    print(result)
+    logger.info('测试完成')
+
--- a/seek/zhihu_com/hot.py
+++ b/seek/zhihu_com/hot.py
@ -0,0 +1,85 @@
+import datetime
+import re  # 添加正则表达式库的导入
+
+from DrissionPage.errors import ElementNotFoundError
+
+from database.database import get_session
+from database.tinformationsource.model import TInformationSource
+from database.tnews.model import TNews
+from log.log_manager import logger
+from seek.seek_base import SeekBase
+
+
+class ZhihuHot(SeekBase):
+    def get_news(self):
+        """获取知乎热榜数据"""
+        news_result = []
+        try:
+            # 访问热榜页面
+            self.tab.get('https://www.zhihu.com/hot')
+
+            # 等待热榜内容加载
+            self.tab.wait.ele_displayed('.HotItem')
+
+            # 获取所有热榜条目
+            hot_items = self.tab.ele('.HotList-list').eles('.HotItem')
+
+            for item in hot_items:
+                try:
+                    news = TNews()
+                    # 提取标题和链接
+                    news.title = item.ele('tag:a').attr('title').title()
+                    news.url = item.ele('tag:a').link
+
+                    # 提取热度值（去除"热度"文字）
+                    heat_value = item('.HotItem-metrics HotItem-metrics--bottom').text
+                    logger.info(f"热度值：{heat_value}")
+                    # 使用正则表达式提取数值部分
+                    match = re.search(r'(\d+\s*万)', heat_value)
+
+                    if match:
+                        news.heat = match.group(1).replace(' ', '')  # 去除空格
+                    else:
+                        logger.error(f"无法提取热度值：{heat_value}")
+                    logger.info(f"提取到的热度值：{news.heat}")
+
+                    news.source = self.information_source.title
+                    news.occurrence_date = datetime.datetime.now()
+                    news_result.append(news)
+                except ElementNotFoundError as e:
+                    logger.error(f"元素缺失：{str(e)}")
+                except ValueError as e:
+                    logger.error(f"热度值转换失败：{str(e)}")
+
+        except ElementNotFoundError as e:
+            logger.error(f"热榜容器元素未找到：{str(e)}")
+        except Exception as e:
+            logger.error(f"获取热榜数据异常：{str(e)}")
+
+        return news_result
+
+def get_news(information_source: TInformationSource) -> list:
+    """对外暴露的获取新闻接口"""
+    zhihu = ZhihuHot(information_source)
+    return zhihu.get_news()
+
+def news_task(information_source: TInformationSource):
+    """任务执行入口"""
+    with get_session() as db:
+        news_list = get_news(information_source)
+        # create_news_list_if_url_not_exists(db, news_list)
+        for news in news_list:
+            logger.info(f"采集到新闻：{news}")
+
+if __name__ == '__main__':
+    # 测试用例
+    logger.info('知乎热榜采集测试')
+    information_source_ = TInformationSource()
+    information_source_.is_static = False  # 知乎需要浏览器渲染
+    information_source_.url = 'https://www.zhihu.com/hot'
+    information_source_.title = '热榜_知乎'
+
+    # 执行采集任务
+    news_task(information_source_)
+    logger.info('测试完成')
+
--- a/seek/zhihu_com/zhihu.py
+++ b/seek/zhihu_com/zhihu.py
@ -0,0 +1,173 @@
+import re
+
+from DrissionPage import Chromium
+from DrissionPage import ChromiumOptions
+from DrissionPage.errors import ElementNotFoundError
+
+from database.database import get_session
+from database.thotcontent.crud import create_contents_top3_if_url_not_exists
+from database.thotcontent.model import THotContent
+from database.thottopic.crud import create_topics_if_url_not_exists, update_hot_topic
+from database.thottopic.model import THotTopic
+from log.log_manager import logger
+
+def get_content_from_meta(metas, itemprop):
+    content = None
+    for meta in metas:
+        if meta.attr('itemprop') == itemprop:
+            content = meta.attr('content')
+    return content
+
+
+class Zhihu:
+    def __init__(self):
+        co = ChromiumOptions()
+        self.browser = Chromium()
+        self.tab = None
+
+    def get_topics(self):
+        """获取知乎数据"""
+        topics_result = []
+        try:
+            self.tab = self.browser.new_tab()
+            # 访问知乎主页面
+            self.tab.get('https://www.zhihu.com')
+
+            # 等待热榜内容加载
+            self.tab.wait.ele_displayed('.Card TopstoryItem TopstoryItem-isRecommend')
+
+            # 获取所有热榜条目
+            hot_items = self.tab.ele('.Topstory-content').eles('.Card TopstoryItem TopstoryItem-isRecommend')
+
+            for item in hot_items:
+                try:
+                    topic = THotTopic()
+                    topic.source = '知乎'
+                    # 提取标题和链接
+                    topic.topic = item.ele('tag:h2').ele('tag:a').text
+                    topic.url = item.ele('tag:h2').ele('tag:a').link
+                    pattern = r'^https://www\.zhihu\.com/question/\d+'
+                    result = re.findall(pattern, topic.url)
+                    if result:
+                        topic.url = result[0]
+                    else:
+                        continue
+                    topics_result.append(topic)
+                except ElementNotFoundError as e:
+                    logger.error(f"元素缺失：{str(e)}")
+                except ValueError as e:
+                    logger.error(f"热度值转换失败：{str(e)}")
+
+        except ElementNotFoundError as e:
+            logger.error(f"热榜容器元素未找到：{str(e)}")
+        except Exception as e:
+            logger.error(f"获取热榜数据异常：{str(e)}")
+        finally:
+            if self.tab:
+                self.tab.close()
+        return topics_result
+
+    def get_content(self, topic: THotTopic, db):
+        """获取话题内容数据"""
+        contents_result = []
+        try:
+            self.tab = self.browser.new_tab()
+            # 访问话题/问题页面
+            self.tab.get(topic.url)
+
+            for _ in range(10):
+                # 等待内容加载
+                self.tab.wait.ele_displayed('.List-item')
+                self.tab.wait(3)
+                # 向下滚动页面，直到所有内容加载完成
+                self.tab.scroll.to_bottom()
+                self.tab.wait(1)
+                self.tab.scroll.up(100)
+
+            # 获取话题/问题相关信息：话题内容、keywards、话题创建日期dateCreated、话题修改日期dateModified、回答数量answerCount、评论数量commentCount
+            question_page = self.tab.ele('.QuestionPage')
+            # 获取话题属性，为QuestionPage的前9个meta标签
+            metas = question_page.eles('tag:meta')[0:9]
+            # print(metas)
+            answer_count = get_content_from_meta(metas, 'answerCount')
+            comment_count = get_content_from_meta(metas, 'commentCount')
+            keywords = get_content_from_meta(metas, 'keywords')
+            date_created = get_content_from_meta(metas, 'dateCreated')
+            date_modified = get_content_from_meta(metas, 'dateModified')
+            follower_count = get_content_from_meta(metas, 'zhihu:followerCount')
+            # print(date_created, date_modified, answer_count, comment_count, keywords)
+            topic.content_count = int(answer_count)
+            topic.comment_count = int(comment_count)
+            topic.follower_count = int(follower_count)
+            topic.keywords = keywords
+            topic.date_created = date_created
+            topic.date_modified = date_modified
+            try:
+                topic.topic_description = question_page.ele('.RichText ztext css-ob6uua').text
+            except ElementNotFoundError as e:
+                logger.error(f"元素缺失：不存在topic_description")
+            update_hot_topic(db, topic)
+
+            # 获取所有内容条目
+            content_items = self.tab.ele('.Question-mainColumn').eles('.List-item')
+
+            for item in content_items:
+                try:
+                    content = THotContent()
+                    content.topic_id = topic.id
+                    content.url = item.ele('.ContentItem-time').ele('tag:a').link
+                    upvote_str = item.ele('.Button VoteButton VoteButton--up FEfUrdfMIKpQDJDqkjte').text
+                    match = re.search(r'(\d+\.?\d*)\s*万?', upvote_str)
+                    if match:
+                        number = float(match.group(1))
+                        content.content_upvote_count = int(number * 10000) if '万' in upvote_str else int(number)
+                    else:
+                        content.content_upvote_count = 0
+                    comment_str = item.ele('.Button ContentItem-action FEfUrdfMIKpQDJDqkjte Button--plain Button--withIcon Button--withLabel fEPKGkUK5jyc4fUuT0QP B46v1Ak6Gj5sL2JTS4PY RuuQ6TOh2cRzJr6WlyQp').text
+                    match = re.search(r'(\d{1,3}(?:,\d{3})*)', comment_str)
+                    if match:
+                        content.content_comment_count = int(match.group(1).replace(',', ''))
+                    else:
+                        content.content_comment_count = 0
+                    content.content = item.ele('.RichContent-inner').text
+                    contents_result.append(content)
+                except ElementNotFoundError as e:
+                    logger.error(f"元素缺失：{str(e)}")
+                except ValueError as e:
+                    logger.error(f"热度值转换失败：{str(e)}")
+
+        except ElementNotFoundError as e:
+            logger.error(f"热榜容器元素未找到：{str(e)}")
+        except Exception as e:
+            logger.error(f"获取热榜数据异常：{str(e)}")
+        finally:
+            if self.tab:
+                self.tab.close()
+        return contents_result
+
+
+
+def get_topics() -> list:
+    zhihu = Zhihu()
+    topics = zhihu.get_topics()
+    return topics
+
+def gather_task():
+    """任务执行入口"""
+    with get_session() as db:
+        zhihu = Zhihu()
+        topics = zhihu.get_topics()
+        inserted_topics = create_topics_if_url_not_exists(db, topics)
+        for topic in inserted_topics:
+            logger.info(f"采集到话题：{topic}")
+            contents = zhihu.get_content(topic, db)
+            create_contents_top3_if_url_not_exists(db, contents)
+
+
+if __name__ == '__main__':
+    # 测试用例
+    logger.info('知乎采集测试')
+    # 执行采集任务
+    gather_task()
+    logger.info('测试完成')
+
--- a/seek/zhihu_com/zhihu_hot.py
+++ b/seek/zhihu_com/zhihu_hot.py
@ -0,0 +1,156 @@
+from DrissionPage import Chromium
+from DrissionPage import ChromiumOptions
+from DrissionPage.errors import ElementNotFoundError
+
+from log.log_manager import logger
+
+
+def get_content_from_meta(metas, itemprop):
+    content = None
+    for meta in metas:
+        if meta.attr('itemprop') == itemprop:
+            content = meta.attr('content')
+    return content
+
+
+class ZhihuHot:
+    def __init__(self):
+        co = ChromiumOptions()
+        self.browser = Chromium()
+
+    def get_topic_url_list(self) -> list:
+        """获取知乎热榜数据"""
+        _topic_url_list = []
+        _tab = None
+        try:
+            _tab = self.browser.new_tab()
+            # 访问热榜页面
+            _tab.get('https://www.zhihu.com/hot')
+
+            # 等待热榜内容加载
+            _tab.wait.ele_displayed('.HotItem')
+
+            # 获取所有热榜条目
+            hot_items = _tab.ele('.HotList-list').eles('.HotItem')
+
+            for item in hot_items:
+                try:
+                    # 提取标题和链接
+                    # title = item.ele('tag:a').attr('title').title()
+                    url = item.ele('tag:a').link
+                    _topic_url_list.append(url)
+                except ElementNotFoundError as e:
+                    logger.error(f"元素缺失：{str(e)}")
+                except ValueError as e:
+                    logger.error(f"热度值转换失败：{str(e)}")
+
+        except ElementNotFoundError as e:
+            logger.error(f"热榜容器元素未找到：{str(e)}")
+        except Exception as e:
+            logger.error(f"获取热榜数据异常：{str(e)}")
+        finally:
+            if _tab:
+                _tab.close()
+
+        return _topic_url_list
+
+    def get_content(self, url):
+        """获取话题内容数据"""
+        contents_result = []
+        _tab = None
+        global title, keywords, date_created, date_modified, follower_count, comment_count, answer_count, topic_description
+        try:
+            _tab = self.browser.new_tab()
+            # 访问话题/问题页面
+            _tab.get(url)
+
+            for _ in range(10):
+            # for _ in range(1):
+                # 等待内容加载
+                _tab.wait.ele_displayed('.List-item')
+                _tab.wait(3)
+                # 向下滚动页面，直到所有内容加载完成
+                _tab.scroll.to_bottom()
+                _tab.wait(1)
+                _tab.scroll.up(100)
+
+            # 获取话题/问题相关信息：话题内容、keywards、话题创建日期dateCreated、话题修改日期dateModified、回答数量answerCount、评论数量commentCount
+            question_page = _tab.ele('.QuestionPage')
+            # 获取话题属性，为QuestionPage的前9个meta标签
+            metas = question_page.eles('tag:meta')[0:9]
+            # print(metas)
+            title = get_content_from_meta(metas, 'name')
+            answer_count = get_content_from_meta(metas, 'answerCount')
+            comment_count = get_content_from_meta(metas, 'commentCount')
+            keywords = get_content_from_meta(metas, 'keywords')
+            date_created = get_content_from_meta(metas, 'dateCreated')
+            date_modified = get_content_from_meta(metas, 'dateModified')
+            follower_count = get_content_from_meta(metas, 'zhihu:followerCount')
+            # print(date_created, date_modified, answer_count, comment_count, keywords)
+            topic_description = ""
+            try:
+                unfold_topic_description = question_page.ele('.^Button QuestionRichText-more')
+                if unfold_topic_description:
+                    unfold_topic_description.click()
+                topic_description = question_page.ele('.^QuestionRichText').text
+            except ElementNotFoundError as e:
+                logger.error(f"元素缺失：不存在topic_description")
+
+            # 获取所有内容条目
+            content_items = _tab.ele('.Question-mainColumn').eles('.List-item')
+
+            total_characters = 0
+            for item in content_items:
+                try:
+                    content = item.ele('.RichContent-inner').text
+                    # 计算content的字数
+                    content_len = len(content)
+                    print(content_len)
+                    if content_len > 1000 or content_len < 100:
+                        logger.error(f"skip本条内容，内容长度：{content_len}")
+                        continue
+                    if total_characters > 5000:
+                        logger.error(f"contents_result长度超过5000，跳出循环")
+                        break
+                    total_characters += content_len
+                    contents_result.append(content)
+                    # 打印contents_result的长度
+                    logger.info(f"contents_result长度：{len(contents_result)}")
+                except ElementNotFoundError as e:
+                    logger.error(f"元素缺失：{str(e)}")
+                except ValueError as e:
+                    logger.error(f"热度值转换失败：{str(e)}")
+
+        except ElementNotFoundError as e:
+            logger.error(f"热榜容器元素未找到：{str(e)}")
+        except Exception as e:
+            logger.error(f"获取热榜数据异常：{str(e)}")
+        finally:
+            if _tab:
+                _tab.close()
+        return {
+            'title': title,
+            'topic_description': topic_description,
+            'keywords': keywords,
+            'url': url,
+            'contents': contents_result,
+            'date_created': date_created,
+            'date_modified': date_modified,
+            'follower_count': follower_count,
+            'answer_count': answer_count,
+            'comment_count': comment_count
+        }
+
+
+if __name__ == '__main__':
+    # 测试用例
+    logger.info('知乎采集测试')
+    # 执行采集任务
+    zhihu_hot = ZhihuHot()
+    result = zhihu_hot.get_content('https://www.zhihu.com/question/14351228309')
+    print(len(result))
+    print(result)
+    # topic_url_list = zhihu_hot.get_topic_url_list()
+    # print(topic_url_list)
+    logger.info('测试完成')
+