yangtb24 commited on
Commit
22197b0
1 Parent(s): 7a2a875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +330 -99
app.py CHANGED
@@ -54,10 +54,15 @@ def get_credit_summary(api_key):
54
  logging.error(f"解析额度信息失败,API Key:{api_key},错误信息:{e}")
55
  return None
56
  except ValueError as e:
57
- logging.error(f"total_balance 无法转换为浮点数,API Key:{api_key},错误信息:{e}")
 
 
 
58
  return None
59
 
60
- FREE_MODEL_TEST_KEY = "sk-bmjbjzleaqfgtqfzmcnsbagxrlohriadnxqrzfocbizaxukw"
 
 
61
 
62
  def test_model_availability(api_key, model_name):
63
  """
@@ -68,37 +73,50 @@ def test_model_availability(api_key, model_name):
68
  "Content-Type": "application/json"
69
  }
70
  try:
71
- response = requests.post(TEST_MODEL_ENDPOINT,
72
- headers=headers,
73
- json={
74
- "model": model_name,
75
- "messages": [{"role": "user", "content": "hi"}],
76
- "max_tokens": 5,
77
- "stream": False
78
- },
79
- timeout=10)
 
 
80
  if response.status_code == 429 or response.status_code == 200:
81
  return True
82
  else:
83
  return False
84
  except requests.exceptions.RequestException as e:
85
- logging.error(f"测试模型 {model_name} 可用性失败,API Key:{api_key},错误信息:{e}")
 
 
 
86
  return False
87
 
88
  def refresh_models():
89
  """
90
  刷新模型列表和免费模型列表。
91
  """
92
- global all_models, free_models, embedding_models, free_embedding_models
 
93
 
94
  all_models = get_all_models(FREE_MODEL_TEST_KEY, "chat")
95
  embedding_models = get_all_models(FREE_MODEL_TEST_KEY, "embedding")
96
  free_models = []
97
  free_embedding_models = []
98
 
99
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
100
- future_to_model = {executor.submit(test_model_availability, FREE_MODEL_TEST_KEY, model): model for model in
101
- all_models}
 
 
 
 
 
 
 
102
  for future in concurrent.futures.as_completed(future_to_model):
103
  model = future_to_model[future]
104
  try:
@@ -108,10 +126,15 @@ def refresh_models():
108
  except Exception as exc:
109
  logging.error(f"模型 {model} 测试生成异常: {exc}")
110
 
111
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
 
 
112
  future_to_model = {
113
- executor.submit(test_embedding_model_availability, FREE_MODEL_TEST_KEY, model): model for model in
114
- embedding_models}
 
 
 
115
  for future in concurrent.futures.as_completed(future_to_model):
116
  model = future_to_model[future]
117
  try:
@@ -135,42 +158,64 @@ def test_embedding_model_availability(api_key, model_name):
135
  "Content-Type": "application/json"
136
  }
137
  try:
138
- response = requests.post(EMBEDDINGS_ENDPOINT,
139
- headers=headers,
140
- json={
141
- "model": model_name,
142
- "input": ["hi"],
143
- },
144
- timeout=10)
 
 
145
  if response.status_code == 429 or response.status_code == 200:
146
  return True
147
  else:
148
  return False
149
  except requests.exceptions.RequestException as e:
150
- logging.error(f"测试向量模型 {model_name} 可用性失败,API Key:{api_key},错误信息:{e}")
 
 
 
151
  return False
152
 
153
  def load_keys():
154
  """
155
- 从环境变量中加载 keys,并根据额度和模型可用性进行分类,然后记录到日志中。
 
 
156
  使用线程池并发处理每个 key。
157
  """
158
  keys_str = os.environ.get("KEYS")
159
- test_model = os.environ.get("TEST_MODEL", "Pro/google/gemma-2-9b-it")
 
 
 
160
 
161
  if keys_str:
162
  keys = [key.strip() for key in keys_str.split(',')]
163
- logging.info(f"加载的 keys:{keys}")
164
-
165
- with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
166
- future_to_key = {executor.submit(process_key, key, test_model): key for key in keys}
 
 
 
 
 
 
 
 
 
 
167
 
168
  invalid_keys = []
169
  free_keys = []
170
  unverified_keys = []
171
  valid_keys = []
172
 
173
- for future in concurrent.futures.as_completed(future_to_key):
 
 
174
  key = future_to_key[future]
175
  try:
176
  key_type = future.result()
@@ -190,7 +235,8 @@ def load_keys():
190
  logging.info(f"未实名 KEY:{unverified_keys}")
191
  logging.info(f"有效 KEY:{valid_keys}")
192
 
193
- global invalid_keys_global, free_keys_global, unverified_keys_global, valid_keys_global
 
194
  invalid_keys_global = invalid_keys
195
  free_keys_global = free_keys
196
  unverified_keys_global = unverified_keys
@@ -225,20 +271,36 @@ def get_all_models(api_key, sub_type):
225
  "Content-Type": "application/json"
226
  }
227
  try:
228
- response = requests.get(MODELS_ENDPOINT, headers=headers, params={"sub_type": sub_type})
 
 
 
 
229
  response.raise_for_status()
230
  data = response.json()
231
- if isinstance(data, dict) and 'data' in data and isinstance(data['data'], list):
232
- return [model.get("id") for model in data["data"] if
233
- isinstance(model, dict) and "id" in model]
 
 
 
 
 
 
234
  else:
235
  logging.error("获取模型列表失败:响应数据格式不正确")
236
  return []
237
  except requests.exceptions.RequestException as e:
238
- logging.error(f"获取模型列表失败,API Key:{api_key},错误信息:{e}")
 
 
 
239
  return []
240
  except (KeyError, TypeError) as e:
241
- logging.error(f"解析模型列表失败,API Key:{api_key},错误信息:{e}")
 
 
 
242
  return []
243
 
244
  def determine_request_type(model_name, model_list, free_model_list):
@@ -254,14 +316,23 @@ def determine_request_type(model_name, model_list, free_model_list):
254
 
255
  def select_key(request_type, model_name):
256
  """
257
- 根据请求类型和模型名称选择合适的 KEY,并实现轮询和重试机制。
 
258
  """
259
  if request_type == "free":
260
- available_keys = free_keys_global + unverified_keys_global + valid_keys_global
 
 
 
 
261
  elif request_type == "paid":
262
  available_keys = unverified_keys_global + valid_keys_global
263
  else:
264
- available_keys = free_keys_global + unverified_keys_global + valid_keys_global
 
 
 
 
265
 
266
  if not available_keys:
267
  return None
@@ -276,14 +347,17 @@ def select_key(request_type, model_name):
276
  model_key_indices[model_name] = current_index
277
  return key
278
  else:
279
- logging.warning(f"KEY {key} 无效或达到限制,尝试下一个 KEY")
 
 
280
 
281
  model_key_indices[model_name] = 0
282
  return None
283
 
284
  def key_is_valid(key, request_type):
285
  """
286
- 检查 KEY 是否有效,根据不同的请求类型进行不同的检查。
 
287
  """
288
  if request_type == "invalid":
289
  return False
@@ -303,7 +377,8 @@ def key_is_valid(key, request_type):
303
 
304
  def check_authorization(request):
305
  """
306
- 检查请求头中的 Authorization 字段是否匹配环境变量 AUTHORIZATION_KEY。
 
307
  """
308
  authorization_key = os.environ.get("AUTHORIZATION_KEY")
309
  if not authorization_key:
@@ -323,7 +398,8 @@ def check_authorization(request):
323
 
324
  scheduler = BackgroundScheduler()
325
  scheduler.add_job(load_keys, 'interval', hours=1)
326
- scheduler.add_job(refresh_models, 'interval', minutes=10)
 
327
 
328
  @app.route('/')
329
  def index():
@@ -332,10 +408,19 @@ def index():
332
  @app.route('/check_tokens', methods=['POST'])
333
  def check_tokens():
334
  tokens = request.json.get('tokens', [])
335
- test_model = os.environ.get("TEST_MODEL", "Pro/google/gemma-2-9b-it")
336
-
337
- with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
338
- future_to_token = {executor.submit(process_key, token, test_model): token for token in tokens}
 
 
 
 
 
 
 
 
 
339
 
340
  results = []
341
  for future in concurrent.futures.as_completed(future_to_token):
@@ -343,20 +428,50 @@ def check_tokens():
343
  try:
344
  key_type = future.result()
345
  credit_summary = get_credit_summary(token)
346
- balance = credit_summary.get("total_balance", 0) if credit_summary else 0
 
 
 
347
  if key_type == "invalid":
348
  results.append(
349
- {"token": token, "type": "无效 KEY", "balance": balance, "message": "无法获取额度信息"})
 
 
 
 
 
 
350
  elif key_type == "free":
351
- results.append({"token": token, "type": "免费 KEY", "balance": balance, "message": "额度不足"})
 
 
 
 
 
 
 
352
  elif key_type == "unverified":
353
  results.append(
354
- {"token": token, "type": "未实名 KEY", "balance": balance, "message": "无法使用指定模型"})
 
 
 
 
 
 
355
  elif key_type == "valid":
356
  results.append(
357
- {"token": token, "type": "有效 KEY", "balance": balance, "message": "可以使用指定模型"})
 
 
 
 
 
 
358
  except Exception as exc:
359
- logging.error(f"处理 Token {token} 生成异常: {exc}")
 
 
360
 
361
  return jsonify(results)
362
 
@@ -370,12 +485,23 @@ def handsome_chat_completions():
370
  return jsonify({"error": "Invalid request data"}), 400
371
 
372
  model_name = data['model']
373
- request_type = determine_request_type(model_name, all_models, free_models)
 
 
 
 
374
  api_key = select_key(request_type, model_name)
375
 
376
  if not api_key:
377
  return jsonify(
378
- {"error": "No available API key for this request type or all keys have reached their limits"}), 429
 
 
 
 
 
 
 
379
 
380
  headers = {
381
  "Authorization": f"Bearer {api_key}",
@@ -407,7 +533,10 @@ def handsome_chat_completions():
407
  yield chunk
408
 
409
  end_time = time.time()
410
- first_token_time = first_chunk_time - start_time if first_chunk_time else 0
 
 
 
411
  total_time = end_time - start_time
412
 
413
  prompt_tokens = 0
@@ -421,19 +550,43 @@ def handsome_chat_completions():
421
  try:
422
  response_json = json.loads(line)
423
 
424
- if "usage" in response_json and "completion_tokens" in response_json["usage"]:
425
- completion_tokens = response_json["usage"]["completion_tokens"]
426
-
427
- if "choices" in response_json and len(response_json["choices"]) > 0 and "delta" in \
428
- response_json["choices"][0] and "content" in response_json["choices"][0][
429
- "delta"]:
430
- response_content += response_json["choices"][0]["delta"]["content"]
431
-
432
- if "usage" in response_json and "prompt_tokens" in response_json["usage"]:
433
- prompt_tokens = response_json["usage"]["prompt_tokens"]
434
-
435
- except (KeyError, ValueError, IndexError) as e:
436
- logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
  user_content = ""
439
  messages = data.get("messages", [])
@@ -443,19 +596,39 @@ def handsome_chat_completions():
443
  user_content += message["content"] + " "
444
  elif isinstance(message["content"], list):
445
  for item in message["content"]:
446
- if isinstance(item, dict) and item.get("type") == "text":
447
- user_content += item.get("text", "") + " "
 
 
 
 
 
 
448
 
449
  user_content = user_content.strip()
450
 
451
- user_content_replaced = user_content.replace('\n', '\\n').replace('\r', '\\n')
452
- response_content_replaced = response_content.replace('\n', '\\n').replace('\r', '\\n')
 
 
 
 
453
 
454
  logging.info(
455
- f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: {first_token_time:.4f}秒, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content_replaced}, 输出的内容: {response_content_replaced}"
 
 
 
 
 
 
 
456
  )
457
 
458
- return Response(stream_with_context(generate()), content_type=response.headers['Content-Type'])
 
 
 
459
  else:
460
  response.raise_for_status()
461
  end_time = time.time()
@@ -464,10 +637,17 @@ def handsome_chat_completions():
464
 
465
  try:
466
  prompt_tokens = response_json["usage"]["prompt_tokens"]
467
- completion_tokens = response_json["usage"]["completion_tokens"]
468
- response_content = response_json["choices"][0]["message"]["content"]
 
 
 
 
469
  except (KeyError, ValueError, IndexError) as e:
470
- logging.error(f"解析非流式响应 JSON 失败: {e}, 完整内容: {response_json}")
 
 
 
471
  prompt_tokens = 0
472
  completion_tokens = 0
473
  response_content = ""
@@ -480,16 +660,32 @@ def handsome_chat_completions():
480
  user_content += message["content"] + " "
481
  elif isinstance(message["content"], list):
482
  for item in message["content"]:
483
- if isinstance(item, dict) and item.get("type") == "text":
484
- user_content += item.get("text", "") + " "
 
 
 
 
 
485
 
486
  user_content = user_content.strip()
487
 
488
- user_content_replaced = user_content.replace('\n', '\\n').replace('\r', '\\n')
489
- response_content_replaced = response_content.replace('\n', '\\n').replace('\r', '\\n')
 
 
 
 
490
 
491
  logging.info(
492
- f"使用的key: {api_key}, 提示token: {prompt_tokens}, 输出token: {completion_tokens}, 首字用时: 0, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}, 用户的内容: {user_content_replaced}, 输出的内容: {response_content_replaced}"
 
 
 
 
 
 
 
493
  )
494
  return jsonify(response_json)
495
 
@@ -538,14 +734,21 @@ def get_billing_info():
538
  keys = valid_keys_global + unverified_keys_global
539
  total_balance = 0
540
 
541
- with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
542
- futures = [executor.submit(get_credit_summary, key) for key in keys]
 
 
 
 
543
 
544
  for future in concurrent.futures.as_completed(futures):
545
  try:
546
  credit_summary = future.result()
547
  if credit_summary:
548
- total_balance += credit_summary.get("total_balance", 0)
 
 
 
549
  except Exception as exc:
550
  logging.error(f"获取额度信息生成异常: {exc}")
551
 
@@ -574,7 +777,10 @@ def billing_usage():
574
  "total_usage": 0
575
  })
576
 
577
- @app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
 
 
 
578
  def billing_subscription():
579
  if not check_authorization(request):
580
  return jsonify({"error": "Unauthorized"}), 401
@@ -616,12 +822,23 @@ def handsome_embeddings():
616
  return jsonify({"error": "Invalid request data"}), 400
617
 
618
  model_name = data['model']
619
- request_type = determine_request_type(model_name, embedding_models, free_embedding_models)
 
 
 
 
620
  api_key = select_key(request_type, model_name)
621
 
622
  if not api_key:
623
  return jsonify(
624
- {"error": "No available API key for this request type or all keys have reached their limits"}), 429
 
 
 
 
 
 
 
625
 
626
  headers = {
627
  "Authorization": f"Bearer {api_key}",
@@ -634,7 +851,7 @@ def handsome_embeddings():
634
  EMBEDDINGS_ENDPOINT,
635
  headers=headers,
636
  json=data,
637
- timeout=60
638
  )
639
 
640
  if response.status_code == 429:
@@ -649,12 +866,18 @@ def handsome_embeddings():
649
  prompt_tokens = response_json["usage"]["prompt_tokens"]
650
  embedding_data = response_json["data"]
651
  except (KeyError, ValueError, IndexError) as e:
652
- logging.error(f"解析响应 JSON 失败: {e}, 完整内容: {response_json}")
 
 
 
653
  prompt_tokens = 0
654
  embedding_data = []
655
 
656
  logging.info(
657
- f"使用的key: {api_key}, 提示token: {prompt_tokens}, 总共用时: {total_time:.4f}秒, 使用的模型: {model_name}"
 
 
 
658
  )
659
 
660
  return jsonify({
@@ -679,12 +902,20 @@ if __name__ == '__main__':
679
  unverified_keys_global = []
680
  valid_keys_global = []
681
 
 
 
 
 
682
  scheduler.start()
683
 
684
- load_keys()
685
  logging.info("首次加载 keys 已手动触发执行")
686
 
687
  refresh_models()
688
  logging.info("首次刷新模型列表已手动触发执行")
689
 
690
- app.run(debug=False, host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
 
 
 
 
 
54
  logging.error(f"解析额度信息失败,API Key:{api_key},错误信息:{e}")
55
  return None
56
  except ValueError as e:
57
+ logging.error(
58
+ f"total_balance 无法转换为浮点数,"
59
+ f"API Key:{api_key},错误信息:{e}"
60
+ )
61
  return None
62
 
63
+ FREE_MODEL_TEST_KEY = (
64
+ "sk-bmjbjzleaqfgtqfzmcnsbagxrlohriadnxqrzfocbizaxukw"
65
+ )
66
 
67
  def test_model_availability(api_key, model_name):
68
  """
 
73
  "Content-Type": "application/json"
74
  }
75
  try:
76
+ response = requests.post(
77
+ TEST_MODEL_ENDPOINT,
78
+ headers=headers,
79
+ json={
80
+ "model": model_name,
81
+ "messages": [{"role": "user", "content": "hi"}],
82
+ "max_tokens": 5,
83
+ "stream": False
84
+ },
85
+ timeout=10
86
+ )
87
  if response.status_code == 429 or response.status_code == 200:
88
  return True
89
  else:
90
  return False
91
  except requests.exceptions.RequestException as e:
92
+ logging.error(
93
+ f"测试模型 {model_name} 可用性失败,"
94
+ f"API Key:{api_key},错误信息:{e}"
95
+ )
96
  return False
97
 
98
  def refresh_models():
99
  """
100
  刷新模型列表和免费模型列表。
101
  """
102
+ global all_models, free_models
103
+ global embedding_models, free_embedding_models
104
 
105
  all_models = get_all_models(FREE_MODEL_TEST_KEY, "chat")
106
  embedding_models = get_all_models(FREE_MODEL_TEST_KEY, "embedding")
107
  free_models = []
108
  free_embedding_models = []
109
 
110
+ with concurrent.futures.ThreadPoolExecutor(
111
+ max_workers=10
112
+ ) as executor:
113
+ future_to_model = {
114
+ executor.submit(
115
+ test_model_availability,
116
+ FREE_MODEL_TEST_KEY,
117
+ model
118
+ ): model for model in all_models
119
+ }
120
  for future in concurrent.futures.as_completed(future_to_model):
121
  model = future_to_model[future]
122
  try:
 
126
  except Exception as exc:
127
  logging.error(f"模型 {model} 测试生成异常: {exc}")
128
 
129
+ with concurrent.futures.ThreadPoolExecutor(
130
+ max_workers=10
131
+ ) as executor:
132
  future_to_model = {
133
+ executor.submit(
134
+ test_embedding_model_availability,
135
+ FREE_MODEL_TEST_KEY, model
136
+ ): model for model in embedding_models
137
+ }
138
  for future in concurrent.futures.as_completed(future_to_model):
139
  model = future_to_model[future]
140
  try:
 
158
  "Content-Type": "application/json"
159
  }
160
  try:
161
+ response = requests.post(
162
+ EMBEDDINGS_ENDPOINT,
163
+ headers=headers,
164
+ json={
165
+ "model": model_name,
166
+ "input": ["hi"],
167
+ },
168
+ timeout=10
169
+ )
170
  if response.status_code == 429 or response.status_code == 200:
171
  return True
172
  else:
173
  return False
174
  except requests.exceptions.RequestException as e:
175
+ logging.error(
176
+ f"测试向量模型 {model_name} 可用性失败,"
177
+ f"API Key:{api_key},错误信息:{e}"
178
+ )
179
  return False
180
 
181
  def load_keys():
182
  """
183
+ 从环境变量中加载 keys,进行去重,
184
+ 并根据额度和模型可用性进行分类,
185
+ 然后记录到日志中。
186
  使用线程池并发处理每个 key。
187
  """
188
  keys_str = os.environ.get("KEYS")
189
+ test_model = os.environ.get(
190
+ "TEST_MODEL",
191
+ "Pro/google/gemma-2-9b-it"
192
+ )
193
 
194
  if keys_str:
195
  keys = [key.strip() for key in keys_str.split(',')]
196
+ unique_keys = list(set(keys))
197
+ keys_str = ','.join(unique_keys)
198
+ os.environ["KEYS"] = keys_str
199
+
200
+ logging.info(f"加载的 keys:{unique_keys}")
201
+
202
+ with concurrent.futures.ThreadPoolExecutor(
203
+ max_workers=20
204
+ ) as executor:
205
+ future_to_key = {
206
+ executor.submit(
207
+ process_key, key, test_model
208
+ ): key for key in unique_keys
209
+ }
210
 
211
  invalid_keys = []
212
  free_keys = []
213
  unverified_keys = []
214
  valid_keys = []
215
 
216
+ for future in concurrent.futures.as_completed(
217
+ future_to_key
218
+ ):
219
  key = future_to_key[future]
220
  try:
221
  key_type = future.result()
 
235
  logging.info(f"未实名 KEY:{unverified_keys}")
236
  logging.info(f"有效 KEY:{valid_keys}")
237
 
238
+ global invalid_keys_global, free_keys_global
239
+ global unverified_keys_global, valid_keys_global
240
  invalid_keys_global = invalid_keys
241
  free_keys_global = free_keys
242
  unverified_keys_global = unverified_keys
 
271
  "Content-Type": "application/json"
272
  }
273
  try:
274
+ response = requests.get(
275
+ MODELS_ENDPOINT,
276
+ headers=headers,
277
+ params={"sub_type": sub_type}
278
+ )
279
  response.raise_for_status()
280
  data = response.json()
281
+ if (
282
+ isinstance(data, dict) and
283
+ 'data' in data and
284
+ isinstance(data['data'], list)
285
+ ):
286
+ return [
287
+ model.get("id") for model in data["data"]
288
+ if isinstance(model, dict) and "id" in model
289
+ ]
290
  else:
291
  logging.error("获取模型列表失败:响应数据格式不正确")
292
  return []
293
  except requests.exceptions.RequestException as e:
294
+ logging.error(
295
+ f"获取模型列表失败,"
296
+ f"API Key:{api_key},错误信息:{e}"
297
+ )
298
  return []
299
  except (KeyError, TypeError) as e:
300
+ logging.error(
301
+ f"解析模型列表失败,"
302
+ f"API Key:{api_key},错误信息:{e}"
303
+ )
304
  return []
305
 
306
  def determine_request_type(model_name, model_list, free_model_list):
 
316
 
317
  def select_key(request_type, model_name):
318
  """
319
+ 根据请求类型和模型名称选择合适的 KEY
320
+ 并实现轮询和重试机制。
321
  """
322
  if request_type == "free":
323
+ available_keys = (
324
+ free_keys_global +
325
+ unverified_keys_global +
326
+ valid_keys_global
327
+ )
328
  elif request_type == "paid":
329
  available_keys = unverified_keys_global + valid_keys_global
330
  else:
331
+ available_keys = (
332
+ free_keys_global +
333
+ unverified_keys_global +
334
+ valid_keys_global
335
+ )
336
 
337
  if not available_keys:
338
  return None
 
347
  model_key_indices[model_name] = current_index
348
  return key
349
  else:
350
+ logging.warning(
351
+ f"KEY {key} 无效或达到限制,尝试下一个 KEY"
352
+ )
353
 
354
  model_key_indices[model_name] = 0
355
  return None
356
 
357
  def key_is_valid(key, request_type):
358
  """
359
+ 检查 KEY 是否有效,
360
+ 根据不同的请求类型进行不同的检查。
361
  """
362
  if request_type == "invalid":
363
  return False
 
377
 
378
  def check_authorization(request):
379
  """
380
+ 检查请求头中的 Authorization 字段
381
+ 是否匹配环境变量 AUTHORIZATION_KEY。
382
  """
383
  authorization_key = os.environ.get("AUTHORIZATION_KEY")
384
  if not authorization_key:
 
398
 
399
  scheduler = BackgroundScheduler()
400
  scheduler.add_job(load_keys, 'interval', hours=1)
401
+ scheduler.remove_all_jobs()
402
+ scheduler.add_job(refresh_models, 'interval', hours=1)
403
 
404
  @app.route('/')
405
  def index():
 
408
  @app.route('/check_tokens', methods=['POST'])
409
  def check_tokens():
410
  tokens = request.json.get('tokens', [])
411
+ test_model = os.environ.get(
412
+ "TEST_MODEL",
413
+ "Pro/google/gemma-2-9b-it"
414
+ )
415
+
416
+ with concurrent.futures.ThreadPoolExecutor(
417
+ max_workers=20
418
+ ) as executor:
419
+ future_to_token = {
420
+ executor.submit(
421
+ process_key, token, test_model
422
+ ): token for token in tokens
423
+ }
424
 
425
  results = []
426
  for future in concurrent.futures.as_completed(future_to_token):
 
428
  try:
429
  key_type = future.result()
430
  credit_summary = get_credit_summary(token)
431
+ balance = (
432
+ credit_summary.get("total_balance", 0)
433
+ if credit_summary else 0
434
+ )
435
  if key_type == "invalid":
436
  results.append(
437
+ {
438
+ "token": token,
439
+ "type": "无效 KEY",
440
+ "balance": balance,
441
+ "message": "无法获取额度信息"
442
+ }
443
+ )
444
  elif key_type == "free":
445
+ results.append(
446
+ {
447
+ "token": token,
448
+ "type": "免费 KEY",
449
+ "balance": balance,
450
+ "message": "额度不足"
451
+ }
452
+ )
453
  elif key_type == "unverified":
454
  results.append(
455
+ {
456
+ "token": token,
457
+ "type": "未实名 KEY",
458
+ "balance": balance,
459
+ "message": "无法使用指定模型"
460
+ }
461
+ )
462
  elif key_type == "valid":
463
  results.append(
464
+ {
465
+ "token": token,
466
+ "type": "有效 KEY",
467
+ "balance": balance,
468
+ "message": "可以使用指定模型"
469
+ }
470
+ )
471
  except Exception as exc:
472
+ logging.error(
473
+ f"处理 Token {token} 生成异常: {exc}"
474
+ )
475
 
476
  return jsonify(results)
477
 
 
485
  return jsonify({"error": "Invalid request data"}), 400
486
 
487
  model_name = data['model']
488
+ request_type = determine_request_type(
489
+ model_name,
490
+ all_models,
491
+ free_models
492
+ )
493
  api_key = select_key(request_type, model_name)
494
 
495
  if not api_key:
496
  return jsonify(
497
+ {
498
+ "error": (
499
+ "No available API key for this "
500
+ "request type or all keys have "
501
+ "reached their limits"
502
+ )
503
+ }
504
+ ), 429
505
 
506
  headers = {
507
  "Authorization": f"Bearer {api_key}",
 
533
  yield chunk
534
 
535
  end_time = time.time()
536
+ first_token_time = (
537
+ first_chunk_time - start_time
538
+ if first_chunk_time else 0
539
+ )
540
  total_time = end_time - start_time
541
 
542
  prompt_tokens = 0
 
550
  try:
551
  response_json = json.loads(line)
552
 
553
+ if (
554
+ "usage" in response_json and
555
+ "completion_tokens" in response_json["usage"]
556
+ ):
557
+ completion_tokens = response_json[
558
+ "usage"
559
+ ]["completion_tokens"]
560
+
561
+ if (
562
+ "choices" in response_json and
563
+ len(response_json["choices"]) > 0 and
564
+ "delta" in response_json["choices"][0] and
565
+ "content" in response_json[
566
+ "choices"
567
+ ][0]["delta"]
568
+ ):
569
+ response_content += response_json[
570
+ "choices"
571
+ ][0]["delta"]["content"]
572
+
573
+ if (
574
+ "usage" in response_json and
575
+ "prompt_tokens" in response_json["usage"]
576
+ ):
577
+ prompt_tokens = response_json[
578
+ "usage"
579
+ ]["prompt_tokens"]
580
+
581
+ except (
582
+ KeyError,
583
+ ValueError,
584
+ IndexError
585
+ ) as e:
586
+ logging.error(
587
+ f"解析流式响应单行 JSON 失败: {e}, "
588
+ f"行内容: {line}"
589
+ )
590
 
591
  user_content = ""
592
  messages = data.get("messages", [])
 
596
  user_content += message["content"] + " "
597
  elif isinstance(message["content"], list):
598
  for item in message["content"]:
599
+ if (
600
+ isinstance(item, dict) and
601
+ item.get("type") == "text"
602
+ ):
603
+ user_content += (
604
+ item.get("text", "") +
605
+ " "
606
+ )
607
 
608
  user_content = user_content.strip()
609
 
610
+ user_content_replaced = user_content.replace(
611
+ '\n', '\\n'
612
+ ).replace('\r', '\\n')
613
+ response_content_replaced = response_content.replace(
614
+ '\n', '\\n'
615
+ ).replace('\r', '\\n')
616
 
617
  logging.info(
618
+ f"使用的key: {api_key}, "
619
+ f"提示token: {prompt_tokens}, "
620
+ f"输出token: {completion_tokens}, "
621
+ f"首字用时: {first_token_time:.4f}秒, "
622
+ f"总共用时: {total_time:.4f}秒, "
623
+ f"使用的模型: {model_name}, "
624
+ f"用户的内容: {user_content_replaced}, "
625
+ f"输出的内容: {response_content_replaced}"
626
  )
627
 
628
+ return Response(
629
+ stream_with_context(generate()),
630
+ content_type=response.headers['Content-Type']
631
+ )
632
  else:
633
  response.raise_for_status()
634
  end_time = time.time()
 
637
 
638
  try:
639
  prompt_tokens = response_json["usage"]["prompt_tokens"]
640
+ completion_tokens = response_json[
641
+ "usage"
642
+ ]["completion_tokens"]
643
+ response_content = response_json[
644
+ "choices"
645
+ ][0]["message"]["content"]
646
  except (KeyError, ValueError, IndexError) as e:
647
+ logging.error(
648
+ f"解析非流式响应 JSON 失败: {e}, "
649
+ f"完整内容: {response_json}"
650
+ )
651
  prompt_tokens = 0
652
  completion_tokens = 0
653
  response_content = ""
 
660
  user_content += message["content"] + " "
661
  elif isinstance(message["content"], list):
662
  for item in message["content"]:
663
+ if (
664
+ isinstance(item, dict) and
665
+ item.get("type") == "text"
666
+ ):
667
+ user_content += (
668
+ item.get("text", "") + " "
669
+ )
670
 
671
  user_content = user_content.strip()
672
 
673
+ user_content_replaced = user_content.replace(
674
+ '\n', '\\n'
675
+ ).replace('\r', '\\n')
676
+ response_content_replaced = response_content.replace(
677
+ '\n', '\\n'
678
+ ).replace('\r', '\\n')
679
 
680
  logging.info(
681
+ f"使用的key: {api_key}, "
682
+ f"提示token: {prompt_tokens}, "
683
+ f"输出token: {completion_tokens}, "
684
+ f"首字用时: 0, "
685
+ f"总共用时: {total_time:.4f}秒, "
686
+ f"使用的模型: {model_name}, "
687
+ f"用户的内容: {user_content_replaced}, "
688
+ f"输出的内容: {response_content_replaced}"
689
  )
690
  return jsonify(response_json)
691
 
 
734
  keys = valid_keys_global + unverified_keys_global
735
  total_balance = 0
736
 
737
+ with concurrent.futures.ThreadPoolExecutor(
738
+ max_workers=20
739
+ ) as executor:
740
+ futures = [
741
+ executor.submit(get_credit_summary, key) for key in keys
742
+ ]
743
 
744
  for future in concurrent.futures.as_completed(futures):
745
  try:
746
  credit_summary = future.result()
747
  if credit_summary:
748
+ total_balance += credit_summary.get(
749
+ "total_balance",
750
+ 0
751
+ )
752
  except Exception as exc:
753
  logging.error(f"获取额度信息生成异常: {exc}")
754
 
 
777
  "total_usage": 0
778
  })
779
 
780
+ @app.route(
781
+ '/handsome/v1/dashboard/billing/subscription',
782
+ methods=['GET']
783
+ )
784
  def billing_subscription():
785
  if not check_authorization(request):
786
  return jsonify({"error": "Unauthorized"}), 401
 
822
  return jsonify({"error": "Invalid request data"}), 400
823
 
824
  model_name = data['model']
825
+ request_type = determine_request_type(
826
+ model_name,
827
+ embedding_models,
828
+ free_embedding_models
829
+ )
830
  api_key = select_key(request_type, model_name)
831
 
832
  if not api_key:
833
  return jsonify(
834
+ {
835
+ "error": (
836
+ "No available API key for this "
837
+ "request type or all keys have "
838
+ "reached their limits"
839
+ )
840
+ }
841
+ ), 429
842
 
843
  headers = {
844
  "Authorization": f"Bearer {api_key}",
 
851
  EMBEDDINGS_ENDPOINT,
852
  headers=headers,
853
  json=data,
854
+ timeout=120
855
  )
856
 
857
  if response.status_code == 429:
 
866
  prompt_tokens = response_json["usage"]["prompt_tokens"]
867
  embedding_data = response_json["data"]
868
  except (KeyError, ValueError, IndexError) as e:
869
+ logging.error(
870
+ f"解析响应 JSON 失败: {e}, "
871
+ f"完整内容: {response_json}"
872
+ )
873
  prompt_tokens = 0
874
  embedding_data = []
875
 
876
  logging.info(
877
+ f"使用的key: {api_key}, "
878
+ f"提示token: {prompt_tokens}, "
879
+ f"总共用时: {total_time:.4f}秒, "
880
+ f"使用的模型: {model_name}"
881
  )
882
 
883
  return jsonify({
 
902
  unverified_keys_global = []
903
  valid_keys_global = []
904
 
905
+ # 启动时先调用一次 load_keys()
906
+ load_keys()
907
+ logging.info("程序启动时首次加载 keys 已执行")
908
+
909
  scheduler.start()
910
 
911
+ # load_keys()
912
  logging.info("首次加载 keys 已手动触发执行")
913
 
914
  refresh_models()
915
  logging.info("首次刷新模型列表已手动触发执行")
916
 
917
+ app.run(
918
+ debug=False,
919
+ host='0.0.0.0',
920
+ port=int(os.environ.get('PORT', 7860))
921
+ )