tianlong12 commited on
Commit
ea2c97f
1 Parent(s): 2cbc27e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -29
app.py CHANGED
@@ -1,51 +1,92 @@
1
  from flask import Flask, request, Response, stream_with_context, jsonify
2
  from openai import OpenAI
3
  import json
 
 
4
 
5
  app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  @app.route('/')
8
  def index():
9
- return "Hello, this is the root page of your Flask application!"
10
-
11
- @app.route('/hf/v1/chat/completions', methods=['POST'])
12
- def chat():
13
- try:
14
- # 验证请求头中的API密钥
15
- auth_header = request.headers.get('Authorization')
16
- if not auth_header or not auth_header.startswith('Bearer '):
17
  return jsonify({"error": "Unauthorized"}), 401
18
 
19
  api_key = auth_header.split(" ")[1]
20
- base_url= auth_header.split(" ")[2]
21
 
22
  data = request.json
23
  #print("Received data:", data) # 打印请求体以进行调试
24
-
25
- # 验证请求格式
26
- if not data or 'messages' not in data or 'model' not in data:
27
- return jsonify({"error": "Missing 'messages' or 'model' in request body"}), 400
28
-
29
  model = data['model']
30
  messages = data['messages']
31
  temperature = data.get('temperature', 0.7) # 默认值0.7
 
32
  #max_tokens = calculate_max_tokens(model, messages, requested_max_tokens)
33
  top_p = data.get('top_p', 1.0) # 默认值1.0
34
  n = data.get('n', 1) # 默认值1
35
- stream = data.get('stream', False) # 默认值False
36
  functions = data.get('functions', None) # Functions for function calling
37
  function_call = data.get('function_call', None) # Specific function call request
38
 
 
 
 
 
 
 
 
39
  # 创建每个请求的 OpenAI 客户端实例
40
  client = OpenAI(
41
  api_key=api_key,
42
- base_url=base_url,
43
  )
44
 
45
  # 处理模型响应
46
  if stream:
47
  # 处理流式响应
48
  def generate():
 
49
  response = client.chat.completions.create(
50
  model=model,
51
  messages=messages,
@@ -53,17 +94,34 @@ def chat():
53
  #max_tokens=max_tokens,
54
  top_p=top_p,
55
  n=n,
56
- stream=True,
57
  functions=functions,
58
- function_call=function_call
 
59
  )
60
- for chunk in response:
61
- yield f"data: {json.dumps(chunk.to_dict())}\n\n"
 
 
 
 
 
 
62
 
63
  return Response(stream_with_context(generate()), content_type='text/event-stream')
64
  else:
65
- # 处理非流式响应
66
- response = client.chat.completions.create(
 
 
 
 
 
 
 
 
 
 
 
67
  model=model,
68
  messages=messages,
69
  temperature=temperature,
@@ -72,12 +130,42 @@ def chat():
72
  n=n,
73
  functions=functions,
74
  function_call=function_call,
75
- )
76
- return jsonify(response.to_dict())
77
 
78
- except Exception as e:
79
- print("Exception:", e)
80
- return jsonify({"error": str(e)}), 500
81
 
82
- if __name__ == "__main__":
83
- app.run(host='0.0.0.0', port=7860, threaded=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from flask import Flask, request, Response, stream_with_context, jsonify
2
  from openai import OpenAI
3
  import json
4
+ import tiktoken
5
+ #import httpx
6
 
7
  app = Flask(__name__)
8
+
9
+ # 在请求头中指定你的API密钥名称
10
+ #MY_API_KEY = "sk-gyxzhao"
11
+
12
+ # 模型的最大上下文长度
13
+ MODEL_MAX_CONTEXT_LENGTH = {
14
+ "gpt-4": 8192,
15
+ "gpt-4-0613": 8192,
16
+ "gpt-4o": 4096,
17
+ "gpt-4-turbo": 4096,
18
+ "claude-3-opus-20240229": 4096
19
+ }
20
+
21
+ def calculate_max_tokens(model_name, messages, requested_max_tokens):
22
+ if model_name in ["gpt-4", "gpt-4-0613"]:
23
+ try:
24
+ encoding = tiktoken.encoding_for_model(model_name)
25
+ except Exception as e:
26
+ print(f"Error getting encoding for model {model_name}: {e}")
27
+ encoding = tiktoken.get_encoding("cl100k_base") # 使用通用编码作为后备
28
+
29
+ max_context_length = MODEL_MAX_CONTEXT_LENGTH[model_name]
30
+
31
+ tokens_per_message = 3 # 每个消息的固定令牌数 (role + content + message boundary tokens)
32
+ tokens_per_name = 1 # 如果消息中包含'name'字段,增加的令牌数
33
+ messages_length = 3 # 一开始的消息长度
34
+
35
+ for message in messages:
36
+ messages_length += tokens_per_message
37
+ for key, value in message.items():
38
+ messages_length += len(encoding.encode(value))
39
+ if key == 'name':
40
+ messages_length += tokens_per_name
41
+
42
+ #print(f"Message length in tokens: {messages_length}") # 打印消息长度以进行调试
43
+
44
+ max_tokens = max_context_length - messages_length
45
+ if requested_max_tokens:
46
+ max_tokens = min(max_tokens, requested_max_tokens)
47
+
48
+ return max(100, max_tokens) # 确保max_tokens至少为1
49
+
50
+ else:
51
+ return MODEL_MAX_CONTEXT_LENGTH.get(model_name, 4096) # 其他模型直接返回对应的最大token数
52
 
53
  @app.route('/')
54
  def index():
 
 
 
 
 
 
 
 
55
  return jsonify({"error": "Unauthorized"}), 401
56
 
57
  api_key = auth_header.split(" ")[1]
58
+
59
 
60
  data = request.json
61
  #print("Received data:", data) # 打印请求体以进行调试
 
 
 
 
 
62
  model = data['model']
63
  messages = data['messages']
64
  temperature = data.get('temperature', 0.7) # 默认值0.7
65
+ requested_max_tokens = data.get('max_tokens', MODEL_MAX_CONTEXT_LENGTH.get(model, 4096))
66
  #max_tokens = calculate_max_tokens(model, messages, requested_max_tokens)
67
  top_p = data.get('top_p', 1.0) # 默认值1.0
68
  n = data.get('n', 1) # 默认值1
 
69
  functions = data.get('functions', None) # Functions for function calling
70
  function_call = data.get('function_call', None) # Specific function call request
71
 
72
+ # 检查 Claude 模型,调整消息格式
73
+ system_message = None
74
+ if model.startswith("claude"):
75
+ messages = [msg for msg in messages if msg['role'] != 'system']
76
+ if 'system' in data:
77
+ system_message = data['system']
78
+
79
  # 创建每个请求的 OpenAI 客户端实例
80
  client = OpenAI(
81
  api_key=api_key,
82
+ base_url="https://api.aimlapi.com",
83
  )
84
 
85
  # 处理模型响应
86
  if stream:
87
  # 处理流式响应
88
  def generate():
89
+ if model.startswith("claude"):
90
  response = client.chat.completions.create(
91
  model=model,
92
  messages=messages,
 
94
  #max_tokens=max_tokens,
95
  top_p=top_p,
96
  n=n,
 
97
  functions=functions,
98
+ function_call=function_call,
99
+ #system=system_message # 传递 system_message 作为顶级参数
100
  )
101
+ content = response.choices[0].message.content
102
+ for i in range(0, len(content), 20): # 每20个字符分成一块
103
+ chunk = content[i:i+20]
104
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': chunk}}]})}\n\n"
105
+ else:
106
+ response = client.chat.completions.create(
107
+ model=model,
108
+ messages=messages,
109
 
110
  return Response(stream_with_context(generate()), content_type='text/event-stream')
111
  else:
112
+ # 非流式响应
113
+ if model.startswith("claude"):
114
+ response = client.chat.completions.create(
115
+ model=model,
116
+ messages=messages,
117
+ temperature=temperature,
118
+ n=n,
119
+ functions=functions,
120
+ function_call=function_call,
121
+ #system=system_message # 传递 system_message 作为顶级参数
122
+ )
123
+ else:
124
+ response = client.chat.completions.create(
125
  model=model,
126
  messages=messages,
127
  temperature=temperature,
 
130
  n=n,
131
  functions=functions,
132
  function_call=function_call,
133
+ )
 
134
 
 
 
 
135
 
136
+ # 打印响应
137
+ #print("API response:", response)
138
+
139
+ # 将响应转换为字典
140
+ response_dict = {
141
+ "id": response.id,
142
+ "object": response.object,
143
+ "created": response.created,
144
+ "model": response.model,
145
+ "choices": [
146
+ {
147
+ "message": {
148
+ "role": choice.message.role,
149
+ "content": choice.message.content
150
+ },
151
+ "index": choice.index,
152
+ "finish_reason": choice.finish_reason,
153
+ "logprobs": choice.logprobs.__dict__ if choice.logprobs else None # 转换ChoiceLogprobs为字典
154
+ }
155
+ for choice in response.choices
156
+ ],
157
+ "usage": {
158
+ "prompt_tokens": response.usage.prompt_tokens,
159
+ "completion_tokens": response.usage.completion_tokens,
160
+ "total_tokens": response.usage.total_tokens
161
+ }
162
+ }
163
+
164
+ # 打印JSON格式的响应字典
165
+ #print("Response dict:", json.dumps(response_dict, ensure_ascii=False, indent=2))
166
+
167
+ # 确保返回的JSON格式正确
168
+ return jsonify(response_dict), 200
169
+
170
+ except Exception as e:
171
+ print("Exception:", e)