-
Notifications
You must be signed in to change notification settings - Fork 46
Expand file tree
/
Copy pathserver.py
More file actions
121 lines (104 loc) · 4.76 KB
/
server.py
File metadata and controls
121 lines (104 loc) · 4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from flask import Flask, request, Response, stream_with_context
import requests
import json
import re
app = Flask(__name__)
@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
user_data = request.json
if 'messages' in user_data:
cleaned_messages = []
for msg in user_data['messages']:
content = msg.get('content', '')
cleaned_content = re.sub(r'<think>.*?</think>\s*\n*', '', content, flags=re.DOTALL)
cleaned_msg = msg.copy()
cleaned_msg['content'] = cleaned_content.strip()
cleaned_messages.append(cleaned_msg)
user_data['messages'] = cleaned_messages
headers = {
"Authorization": request.headers.get('Authorization'),
"Content-Type": "application/json"
}
user_data['stream'] = True
# 转发请求
response = requests.post(
"https://api.siliconflow.cn/v1/chat/completions",
json=user_data,
headers=headers,
stream=True
)
def generate():
is_first_reasoning = True
last_was_reasoning = False
for line in response.iter_lines():
if line:
json_str = line.decode('utf-8').replace('data: ', '')
if json_str == '[DONE]':
if last_was_reasoning:
modified_data = {
'choices': [{
'delta': {
'content': "</think>"
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
yield 'data: [DONE]\n\n'
break
try:
response_data = json.loads(json_str)
if 'choices' in response_data and response_data['choices']:
choice = response_data['choices'][0]
if 'delta' in choice:
delta = choice['delta']
# 处理reasoning_content
reasoning = delta.get('reasoning_content', '')
if reasoning:
if is_first_reasoning:
modified_data = {
'choices': [{
'delta': {
'content': "<think>"
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
is_first_reasoning = False
# 输出reasoning内容
modified_data = {
'choices': [{
'delta': {
'content': reasoning
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
last_was_reasoning = True
# 处理content
content = delta.get('content', '')
if content:
if last_was_reasoning:
modified_data = {
'choices': [{
'delta': {
'content': "</think>\n\n"
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
last_was_reasoning = False
yield f"data: {json_str}\n\n"
if not (reasoning or content):
yield f"data: {json_str}\n\n"
else:
yield f"data: {json_str}\n\n"
else:
yield f"data: {json_str}\n\n"
except json.JSONDecodeError:
yield f"data: {json_str}\n\n"
return Response(
stream_with_context(generate()),
mimetype='text/event-stream'
)
if __name__ == '__main__':
app.run(debug=True,host='0.0.0.0', port=9006)