This commit is contained in:
110
llm/generate_daily_article.py
Normal file
110
llm/generate_daily_article.py
Normal file
@ -0,0 +1,110 @@
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from config.settings import settings
|
||||
from llm import prompt as prompts
|
||||
from utils.logger import logger
|
||||
from llm.prompts.daily_article_prompt import PROMPT_DAILY_ARTICLE
|
||||
|
||||
|
||||
BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
MODEL = "deepseek-v3.2"
|
||||
|
||||
|
||||
def _make_client() -> OpenAI:
|
||||
return OpenAI(api_key=settings.DASHSCOPE_API_KEY, base_url=BASE_URL)
|
||||
|
||||
|
||||
def _call_model(system_prompt: Optional[str], user_prompt: str, stream: bool = False, enable_search: bool = False) -> Any:
|
||||
client = _make_client()
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": user_prompt})
|
||||
|
||||
# Non-streaming call for simplicity
|
||||
resp = client.chat.completions.create(model=MODEL, messages=messages, stream=stream, extra_body={"enable_search": enable_search})
|
||||
# When stream=False the SDK typically returns a full object; content location may vary.
|
||||
# We'll try common access patterns.
|
||||
try:
|
||||
# OpenAI-compatible: resp.choices[0].message.content
|
||||
return resp.choices[0].message.content
|
||||
except Exception:
|
||||
try:
|
||||
# fallback: resp.choices[0].text
|
||||
return resp.choices[0].text
|
||||
except Exception:
|
||||
# As last resort, return raw resp
|
||||
return resp
|
||||
|
||||
|
||||
def _extract_json(text: str) -> str:
|
||||
"""Attempt to extract the first JSON object/array from text."""
|
||||
if not isinstance(text, str):
|
||||
raise ValueError("Expected text to be str")
|
||||
# Find first '[' or '{'
|
||||
start_idx = None
|
||||
for i, ch in enumerate(text):
|
||||
if ch in "[{":
|
||||
start_idx = i
|
||||
break
|
||||
if start_idx is None:
|
||||
raise ValueError("No JSON object/array found in text")
|
||||
|
||||
# Try to find a matching closing bracket by scanning and counting
|
||||
stack = []
|
||||
for j in range(start_idx, len(text)):
|
||||
ch = text[j]
|
||||
if ch in "{[":
|
||||
stack.append(ch)
|
||||
elif ch in "]}":
|
||||
if not stack:
|
||||
continue
|
||||
opening = stack.pop()
|
||||
if (opening == "{" and ch != "}") or (opening == "[" and ch != "]"):
|
||||
# mismatched, continue
|
||||
continue
|
||||
if not stack:
|
||||
return text[start_idx : j + 1]
|
||||
|
||||
# Fallback: try regex to capture last '}' or ']' occurrence
|
||||
m = re.search(r"(\{.*\}|\[.*\])", text, re.S)
|
||||
if m:
|
||||
return m.group(1)
|
||||
raise ValueError("Could not extract JSON from model output")
|
||||
|
||||
|
||||
def _parse_json_safe(text: str) -> Any:
|
||||
try:
|
||||
return json.loads(text)
|
||||
except Exception:
|
||||
# try to extract JSON substring
|
||||
jtext = _extract_json(text)
|
||||
return json.loads(jtext)
|
||||
|
||||
|
||||
def generate_daily_article() -> List[Dict[str, Any]]:
|
||||
"""Call PROMPT_DAILY_ARTICLE to generate a daily article."""
|
||||
logger.debug(f"prompt for generate_daily_article:\n{PROMPT_DAILY_ARTICLE}")
|
||||
|
||||
content = _call_model(system_prompt=None, user_prompt=PROMPT_DAILY_ARTICLE, enable_search=True)
|
||||
logger.debug(f"raw output from generate_daily_article:\n{content}")
|
||||
if isinstance(content, (dict, list)):
|
||||
return content
|
||||
text = content if isinstance(content, str) else str(content)
|
||||
data = _parse_json_safe(text)
|
||||
logger.debug(f"result for generate_daily_article:\n{data}")
|
||||
return data
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
content = generate_daily_article()
|
||||
article = content["阶段4_今日文章"]["文章正文"]
|
||||
print(article)
|
||||
Reference in New Issue
Block a user