This commit is contained in:
110
llm/generate_daily_article.py
Normal file
110
llm/generate_daily_article.py
Normal file
@ -0,0 +1,110 @@
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from config.settings import settings
|
||||
from llm import prompt as prompts
|
||||
from utils.logger import logger
|
||||
from llm.prompts.daily_article_prompt import PROMPT_DAILY_ARTICLE
|
||||
|
||||
|
||||
BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
MODEL = "deepseek-v3.2"
|
||||
|
||||
|
||||
def _make_client() -> OpenAI:
|
||||
return OpenAI(api_key=settings.DASHSCOPE_API_KEY, base_url=BASE_URL)
|
||||
|
||||
|
||||
def _call_model(system_prompt: Optional[str], user_prompt: str, stream: bool = False, enable_search: bool = False) -> Any:
|
||||
client = _make_client()
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": user_prompt})
|
||||
|
||||
# Non-streaming call for simplicity
|
||||
resp = client.chat.completions.create(model=MODEL, messages=messages, stream=stream, extra_body={"enable_search": enable_search})
|
||||
# When stream=False the SDK typically returns a full object; content location may vary.
|
||||
# We'll try common access patterns.
|
||||
try:
|
||||
# OpenAI-compatible: resp.choices[0].message.content
|
||||
return resp.choices[0].message.content
|
||||
except Exception:
|
||||
try:
|
||||
# fallback: resp.choices[0].text
|
||||
return resp.choices[0].text
|
||||
except Exception:
|
||||
# As last resort, return raw resp
|
||||
return resp
|
||||
|
||||
|
||||
def _extract_json(text: str) -> str:
|
||||
"""Attempt to extract the first JSON object/array from text."""
|
||||
if not isinstance(text, str):
|
||||
raise ValueError("Expected text to be str")
|
||||
# Find first '[' or '{'
|
||||
start_idx = None
|
||||
for i, ch in enumerate(text):
|
||||
if ch in "[{":
|
||||
start_idx = i
|
||||
break
|
||||
if start_idx is None:
|
||||
raise ValueError("No JSON object/array found in text")
|
||||
|
||||
# Try to find a matching closing bracket by scanning and counting
|
||||
stack = []
|
||||
for j in range(start_idx, len(text)):
|
||||
ch = text[j]
|
||||
if ch in "{[":
|
||||
stack.append(ch)
|
||||
elif ch in "]}":
|
||||
if not stack:
|
||||
continue
|
||||
opening = stack.pop()
|
||||
if (opening == "{" and ch != "}") or (opening == "[" and ch != "]"):
|
||||
# mismatched, continue
|
||||
continue
|
||||
if not stack:
|
||||
return text[start_idx : j + 1]
|
||||
|
||||
# Fallback: try regex to capture last '}' or ']' occurrence
|
||||
m = re.search(r"(\{.*\}|\[.*\])", text, re.S)
|
||||
if m:
|
||||
return m.group(1)
|
||||
raise ValueError("Could not extract JSON from model output")
|
||||
|
||||
|
||||
def _parse_json_safe(text: str) -> Any:
|
||||
try:
|
||||
return json.loads(text)
|
||||
except Exception:
|
||||
# try to extract JSON substring
|
||||
jtext = _extract_json(text)
|
||||
return json.loads(jtext)
|
||||
|
||||
|
||||
def generate_daily_article() -> List[Dict[str, Any]]:
|
||||
"""Call PROMPT_DAILY_ARTICLE to generate a daily article."""
|
||||
logger.debug(f"prompt for generate_daily_article:\n{PROMPT_DAILY_ARTICLE}")
|
||||
|
||||
content = _call_model(system_prompt=None, user_prompt=PROMPT_DAILY_ARTICLE, enable_search=True)
|
||||
logger.debug(f"raw output from generate_daily_article:\n{content}")
|
||||
if isinstance(content, (dict, list)):
|
||||
return content
|
||||
text = content if isinstance(content, str) else str(content)
|
||||
data = _parse_json_safe(text)
|
||||
logger.debug(f"result for generate_daily_article:\n{data}")
|
||||
return data
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
content = generate_daily_article()
|
||||
article = content["阶段4_今日文章"]["文章正文"]
|
||||
print(article)
|
||||
@ -11,14 +11,14 @@ from utils.logger import logger
|
||||
|
||||
|
||||
BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
MODEL = "deepseek-v3.2-exp"
|
||||
MODEL = "deepseek-v3.2"
|
||||
|
||||
|
||||
def _make_client() -> OpenAI:
|
||||
return OpenAI(api_key=settings.DASHSCOPE_API_KEY, base_url=BASE_URL)
|
||||
|
||||
|
||||
def _call_model(system_prompt: Optional[str], user_prompt: str, stream: bool = False) -> Any:
|
||||
def _call_model(system_prompt: Optional[str], user_prompt: str, stream: bool = False, enable_search: bool = False) -> Any:
|
||||
client = _make_client()
|
||||
messages = []
|
||||
if system_prompt:
|
||||
@ -26,7 +26,7 @@ def _call_model(system_prompt: Optional[str], user_prompt: str, stream: bool = F
|
||||
messages.append({"role": "user", "content": user_prompt})
|
||||
|
||||
# Non-streaming call for simplicity
|
||||
resp = client.chat.completions.create(model=MODEL, messages=messages, stream=stream)
|
||||
resp = client.chat.completions.create(model=MODEL, messages=messages, stream=stream, extra_body={"enable_search": enable_search})
|
||||
# When stream=False the SDK typically returns a full object; content location may vary.
|
||||
# We'll try common access patterns.
|
||||
try:
|
||||
@ -118,7 +118,7 @@ def generate_topics(start_time: Optional[str] = None, end_time: Optional[str] =
|
||||
|
||||
logger.debug(f"prompt for generate_topics:\n{user_prompt}")
|
||||
|
||||
content = _call_model(system_prompt=None, user_prompt=user_prompt)
|
||||
content = _call_model(system_prompt=None, user_prompt=user_prompt, enable_search=True)
|
||||
logger.debug(f"raw output from generate_topics:\n{content}")
|
||||
if isinstance(content, (dict, list)):
|
||||
return content
|
||||
|
||||
@ -42,7 +42,7 @@ prompt_b1 = """
|
||||
- meme_name:要写段子的梗名称(字符串)
|
||||
- research:关于该梗的深度研究文本(字符串)
|
||||
|
||||
根据以上输入,创作3篇风格不同的脱口秀段子,要求如下并严格返回 JSON 对象(仅输出 JSON):
|
||||
根据以上输入,创作1篇风格不同的脱口秀段子,要求如下并严格返回 JSON 对象(仅输出 JSON):
|
||||
{
|
||||
"meme": "梗名称",
|
||||
"style": "观察生活",
|
||||
@ -57,7 +57,7 @@ prompt_b2 = """
|
||||
- meme_name:要写段子的梗名称(字符串)
|
||||
- research:关于该梗的深度研究文本(字符串)
|
||||
|
||||
根据以上输入,创作3篇风格不同的脱口秀段子,要求如下并严格返回 JSON 对象(仅输出 JSON):
|
||||
根据以上输入,创作1篇风格不同的脱口秀段子,要求如下并严格返回 JSON 对象(仅输出 JSON):
|
||||
{
|
||||
"meme": "梗名称",
|
||||
"style": "夸张讽刺",
|
||||
@ -72,7 +72,7 @@ prompt_b3 = """
|
||||
- meme_name:要写段子的梗名称(字符串)
|
||||
- research:关于该梗的深度研究文本(字符串)
|
||||
|
||||
根据以上输入,创作3篇风格不同的脱口秀段子,要求如下并严格返回 JSON 对象(仅输出 JSON):
|
||||
根据以上输入,创作1篇风格不同的脱口秀段子,要求如下并严格返回 JSON 对象(仅输出 JSON):
|
||||
{
|
||||
"meme": "梗名称",
|
||||
"style": "角色扮演",
|
||||
@ -87,27 +87,37 @@ prompt_c = """
|
||||
- meme_name:梗名称(字符串)
|
||||
- materials:包含“深度研究”与若干脱口秀段子的文本(字符串),已由人工筛选
|
||||
|
||||
任务:把 materials 整合成一篇完整的播客文稿,结构严格按照:开场白 -> 梗介绍 -> 起源考据 -> 传播路径 -> 影响分析 -> 脱口秀环节(插入2-3个段子) -> 结束语
|
||||
任务:将 materials 整合为一档四人播客的完整文稿。
|
||||
|
||||
输出格式(严格 JSON,对话按顺序列出,角色限定为 host/guest):
|
||||
节目设定:
|
||||
- 主持人 Host(1人):理性、引导节奏、串联全场。
|
||||
- 脱口秀演员 Guest_A / Guest_B / Guest_C(3人):各有幽默风格,可即兴互动,负责讲段子与分析。
|
||||
|
||||
文稿结构(请严格按以下流程撰写):
|
||||
1. 开场白(Host 开场,介绍节目与三位演员,轻松互动,40-80字)
|
||||
2. 梗介绍(Host 简明引入梗,可向演员提问互动,40-100字)
|
||||
3. 起源考据(由一位演员结合材料讲述,可穿插其他人简短反应,60-150字)
|
||||
4. 传播路径(Host 引导,可由不同演员补充案例,50-120字)
|
||||
5. 影响分析(演员轮流发表观点,Host 总结,80-180字)
|
||||
6. 脱口秀环节(Host 引入,三位演员依次表演段子,每个段子 1000 - 1200 字,段子之间可有简短互动或调侃)
|
||||
7. 结束语(Host 收尾,感谢演员,邀请听众互动,30-60字)
|
||||
|
||||
输出格式(严格 JSON):
|
||||
{
|
||||
"title": "节目标题(建议不超12字)",
|
||||
"title": "节目标题(12字以内,吸引人)",
|
||||
"script": [
|
||||
{"role": "host", "text": "开场白(口语化,20-60字)"},
|
||||
{"role": "host", "text": "梗介绍(简明,30-80字)"},
|
||||
{"role": "guest", "text": "起源考据(40-120字)"},
|
||||
{"role": "host", "text": "传播路径(30-80字)"},
|
||||
{"role": "guest", "text": "影响分析(40-120字)"},
|
||||
{"role": "host", "text": "转入脱口秀环节的台词(15-40字)"},
|
||||
{"role": "guest", "text": "段子A(来自 materials,1000-1200字)"},
|
||||
{"role": "guest", "text": "段子B(来自 materials,1000-1200字)"},
|
||||
{"role": "guest", "text": "段子C(来自 materials,1000-1200字)"},
|
||||
{"role": "host", "text": "结束语(15-40字)"}
|
||||
{"role": "host", "text": "..."},
|
||||
{"role": "guest_a", "text": "..."},
|
||||
{"role": "guest_b", "text": "..."},
|
||||
{"role": "guest_c", "text": "..."},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
要求:
|
||||
- 语言口语化,避免书面语;角色语气分别为:host(理性、引导)、guest(幽默、即兴)。
|
||||
- 在 script 中只保留最终可直接朗读的台词,不要加入编剧说明或括注。每段尽量简洁,便于主播读出。
|
||||
- 严格输出 JSON,不要额外解释或多余文本。
|
||||
- 语言高度口语化,符合聊天氛围,避免书面语。
|
||||
- 角色区分明显:host 控场理性,guest_a/b/c 幽默且风格可略有不同(可自设特点,如冷笑话、夸张、吐槽等)。
|
||||
- script 中只放最终台词,不添加说明。每段台词不宜过长,确保可朗读。
|
||||
- 在合适处允许演员之间简短对话(如提问、接梗、调侃),增强现场感。
|
||||
- 严格仅输出 JSON,无任何额外文本。
|
||||
"""
|
||||
105
llm/prompts/daily_article_prompt.py
Normal file
105
llm/prompts/daily_article_prompt.py
Normal file
@ -0,0 +1,105 @@
|
||||
PROMPT_DAILY_ARTICLE = """
|
||||
你是【智能写作素材生成系统】。
|
||||
|
||||
你的任务是严格按照下述【四个阶段】执行,并且【只允许输出一个 JSON 对象】。
|
||||
❗除 JSON 外,不得输出任何解释、说明、注释、Markdown、代码块或多余文本。
|
||||
|
||||
====================
|
||||
【通用强制规则】
|
||||
1. 最终输出必须是一个合法 JSON(UTF-8,无注释)
|
||||
2. 字段名、层级结构、顺序必须与下方模板完全一致
|
||||
3. 不允许新增、删除、重命名任何字段
|
||||
4. 所有字符串必须是中文
|
||||
5. 所有数组必须按要求数量输出(不可多不可少)
|
||||
6. 需要联网获取信息(今日热点 / 文化日历 / 写作趋势 / 天气季节)
|
||||
====================
|
||||
|
||||
【阶段1:信息采集(联网)】
|
||||
- 搜索今日热点,提取 5 个“写作灵感关键词”
|
||||
- 查询今日文化日历事件(至少 2 条)
|
||||
- 分析当前热门写作趋势(至少 3 条,来自写作/内容社区)
|
||||
- 获取今日天气与季节特征(概括性描述)
|
||||
|
||||
【阶段2:主题生成】
|
||||
基于阶段1信息,生成 3 个写作主题:
|
||||
- 主题A:结合“热点 + 文化事件”
|
||||
- 主题B:回应“社会情绪 + 季节特征”
|
||||
- 主题C:实验性主题(新兴写作形式或叙事结构)
|
||||
|
||||
【阶段3:风格匹配】
|
||||
为 主题A / 主题B / 主题C 分别给出:
|
||||
- 写作风格
|
||||
- 叙事视角
|
||||
- 重点训练技巧
|
||||
- 应避免的常见问题
|
||||
|
||||
【阶段4:生成今日文章】
|
||||
- 在 A / B / C 中选择综合质量最高的一个
|
||||
- 生成一篇 800–1000 字中文文章
|
||||
- 文章必须完整、可直接发表
|
||||
|
||||
====================
|
||||
【❗唯一允许的输出 JSON 模板如下】
|
||||
(必须严格匹配,不得修改结构)
|
||||
|
||||
{
|
||||
"阶段1_信息采集": {
|
||||
"今日热点关键词": [
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
""
|
||||
],
|
||||
"今日文化日历事件": [
|
||||
"",
|
||||
""
|
||||
],
|
||||
"当前热门写作趋势": [
|
||||
"",
|
||||
"",
|
||||
""
|
||||
],
|
||||
"今日天气与季节特征": ""
|
||||
},
|
||||
"阶段2_主题生成": {
|
||||
"主题A": {
|
||||
"标题": "",
|
||||
"主题说明": ""
|
||||
},
|
||||
"主题B": {
|
||||
"标题": "",
|
||||
"主题说明": ""
|
||||
},
|
||||
"主题C": {
|
||||
"标题": "",
|
||||
"主题说明": ""
|
||||
}
|
||||
},
|
||||
"阶段3_风格匹配": {
|
||||
"主题A": {
|
||||
"写作风格": "",
|
||||
"叙事视角": "",
|
||||
"重点训练技巧": "",
|
||||
"应避免的常见问题": ""
|
||||
},
|
||||
"主题B": {
|
||||
"写作风格": "",
|
||||
"叙事视角": "",
|
||||
"重点训练技巧": "",
|
||||
"应避免的常见问题": ""
|
||||
},
|
||||
"主题C": {
|
||||
"写作风格": "",
|
||||
"叙事视角": "",
|
||||
"重点训练技巧": "",
|
||||
"应避免的常见问题": ""
|
||||
}
|
||||
},
|
||||
"阶段4_今日文章": {
|
||||
"选定主题": "主题A / 主题B / 主题C(三选一)",
|
||||
"文章标题": "",
|
||||
"文章正文": ""
|
||||
}
|
||||
}
|
||||
"""
|
||||
Reference in New Issue
Block a user