vllm稳定输出json
vllm版本
0.6.6
请求样例
def send_call_json(prompt="You are a helpful assistant.",
msg="",
top_p=1.0,
temperature=0.7):
import requests, json, traceback, time
retry_count = 5
data = {
"model":
"Qwen2__5-72B-Instruct",
"messages": [
{
"role": "system",
"content": prompt
},
{
"role": "user",
"content": msg
},
],
"stream":
0,
"stop": ["<|EOT|>", "<|im_end|>"],
"top_p":
top_p,
"repetition_penalty":
1.05,
"temperature":
temperature,
# 此参数用以强制返回json
"response_format": {
"type": "json_object"
}
}
for i in range(0, retry_count):
try:
url = "自己的地址"
headers = {"Content-Type": "application/json"}
print(f"请求\n{data}")
response = requests.post(
url,
data=json.dumps(data, ensure_ascii=False).encode('utf8'),
headers=headers,
timeout=300)
response.encoding = 'utf-8'
print(f"响应\n{response.text}")
response_json = response.json()
content = response_json["choices"][0]["message"]["content"]
return content
except Exception:
'''
模型不停输出导致超时时,修改下面两个参数为默认值
'''
data["top_p"] = 1.0
data["temperature"] = 0.7
print(f"报错再次尝试 {i} {traceback.format_exc()}")
time.sleep(5)
return ""