yangtb24 commited on
Commit
df482ac
·
verified ·
1 Parent(s): 4078885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -90
app.py CHANGED
@@ -1305,11 +1305,11 @@ def handsome_chat_completions():
1305
  try:
1306
  start_time = time.time()
1307
  response = requests.post(
1308
- "https://api.siliconflow.cn/v1/chat/completions",
1309
  headers=headers,
1310
  json=data,
1311
- timeout=120,
1312
- stream=data.get("stream", False)
1313
  )
1314
 
1315
  if response.status_code == 429:
@@ -1317,33 +1317,116 @@ def handsome_chat_completions():
1317
 
1318
  if data.get("stream", False):
1319
  def generate():
1320
- try:
1321
- response.raise_for_status()
1322
- for chunk in response.iter_lines():
1323
- if chunk:
1324
- chunk = chunk.decode('utf-8')
1325
- yield f"{chunk}\n\n".encode('utf-8')
1326
- except requests.exceptions.RequestException as e:
1327
- logging.error(f"请求转发异常: {e}")
1328
- error_chunk_data = {
1329
- "id": f"chatcmpl-{uuid.uuid4()}",
1330
- "object": "chat.completion.chunk",
1331
- "created": int(time.time()),
1332
- "model": model_name,
1333
- "choices": [
1334
- {
1335
- "index": 0,
1336
- "delta": {
1337
- "role": "assistant",
1338
- "content": "Failed to process data"
1339
- },
1340
- "finish_reason": "stop"
1341
- }
1342
- ]
1343
- }
1344
- yield f"data: {json.dumps(error_chunk_data)}\n\n".encode('utf-8')
1345
- yield "data: [DONE]\n\n".encode('utf-8')
1346
- return Response(stream_with_context(generate()), content_type='text/event-stream')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1347
  else:
1348
  response.raise_for_status()
1349
  end_time = time.time()
@@ -1351,74 +1434,66 @@ def handsome_chat_completions():
1351
  total_time = end_time - start_time
1352
 
1353
  try:
1354
- choices = response_json.get("choices", [])
1355
- if choices and isinstance(choices[0], dict):
1356
- message = choices[0].get("message",{})
1357
- content = message.get("content")
1358
- response_data = {
1359
- "id": f"chatcmpl-{uuid.uuid4()}",
1360
- "object": "chat.completion",
1361
- "created": int(time.time()),
1362
- "model": model_name,
1363
- "choices": [
1364
- {
1365
- "index": 0,
1366
- "message": {
1367
- "role": "assistant",
1368
- "content": content
1369
- },
1370
- "finish_reason": "stop"
1371
- }
1372
- ],
1373
- }
1374
- else:
1375
- response_data = {
1376
- "id": f"chatcmpl-{uuid.uuid4()}",
1377
- "object": "chat.completion",
1378
- "created": int(time.time()),
1379
- "model": model_name,
1380
- "choices": [
1381
- {
1382
- "index": 0,
1383
- "message": {
1384
- "role": "assistant",
1385
- "content": "No response content"
1386
- },
1387
- "finish_reason": "stop"
1388
- }
1389
- ]
1390
- }
1391
  except (KeyError, ValueError, IndexError) as e:
1392
  logging.error(
1393
- f"解析响应 JSON 失败: {e}, "
1394
  f"完整内容: {response_json}"
1395
  )
1396
- response_data = {
1397
- "id": f"chatcmpl-{uuid.uuid4()}",
1398
- "object": "chat.completion",
1399
- "created": int(time.time()),
1400
- "model": model_name,
1401
- "choices": [
1402
- {
1403
- "index": 0,
1404
- "message": {
1405
- "role": "assistant",
1406
- "content": "Failed to process data"
1407
- },
1408
- "finish_reason": "stop"
1409
- }
1410
- ]
1411
- }
1412
-
 
 
 
 
 
 
 
 
 
 
 
 
1413
  logging.info(
1414
- f"使用的key: {api_key}, "
1415
- f"总共用时: {total_time:.4f}秒, "
1416
- f"使用的模型: {model_name}"
 
 
 
 
 
1417
  )
1418
  with data_lock:
1419
  request_timestamps.append(time.time())
1420
- token_counts.append(0)
1421
- return jsonify(response_data)
 
 
 
 
 
1422
  except requests.exceptions.RequestException as e:
1423
  logging.error(f"请求转发异常: {e}")
1424
  return jsonify({"error": str(e)}), 500
 
1305
  try:
1306
  start_time = time.time()
1307
  response = requests.post(
1308
+ TEST_MODEL_ENDPOINT,
1309
  headers=headers,
1310
  json=data,
1311
+ stream=data.get("stream", False),
1312
+ timeout=60
1313
  )
1314
 
1315
  if response.status_code == 429:
 
1317
 
1318
  if data.get("stream", False):
1319
  def generate():
1320
+ first_chunk_time = None
1321
+ full_response_content = ""
1322
+ for chunk in response.iter_content(chunk_size=1024):
1323
+ if chunk:
1324
+ if first_chunk_time is None:
1325
+ first_chunk_time = time.time()
1326
+ full_response_content += chunk.decode("utf-8")
1327
+ yield chunk
1328
+
1329
+ end_time = time.time()
1330
+ first_token_time = (
1331
+ first_chunk_time - start_time
1332
+ if first_chunk_time else 0
1333
+ )
1334
+ total_time = end_time - start_time
1335
+
1336
+ prompt_tokens = 0
1337
+ completion_tokens = 0
1338
+ response_content = ""
1339
+ for line in full_response_content.splitlines():
1340
+ if line.startswith("data:"):
1341
+ line = line[5:].strip()
1342
+ if line == "[DONE]":
1343
+ continue
1344
+ try:
1345
+ response_json = json.loads(line)
1346
+
1347
+ if (
1348
+ "usage" in response_json and
1349
+ "completion_tokens" in response_json["usage"]
1350
+ ):
1351
+ completion_tokens = response_json[
1352
+ "usage"
1353
+ ]["completion_tokens"]
1354
+
1355
+ if (
1356
+ "choices" in response_json and
1357
+ len(response_json["choices"]) > 0 and
1358
+ "delta" in response_json["choices"][0] and
1359
+ "content" in response_json[
1360
+ "choices"
1361
+ ][0]["delta"]
1362
+ ):
1363
+ response_content += response_json[
1364
+ "choices"
1365
+ ][0]["delta"]["content"]
1366
+
1367
+ if (
1368
+ "usage" in response_json and
1369
+ "prompt_tokens" in response_json["usage"]
1370
+ ):
1371
+ prompt_tokens = response_json[
1372
+ "usage"
1373
+ ]["prompt_tokens"]
1374
+
1375
+ except (
1376
+ KeyError,
1377
+ ValueError,
1378
+ IndexError
1379
+ ) as e:
1380
+ logging.error(
1381
+ f"解析流式响应单行 JSON 失败: {e}, "
1382
+ f"行内容: {line}"
1383
+ )
1384
+
1385
+ user_content = ""
1386
+ messages = data.get("messages", [])
1387
+ for message in messages:
1388
+ if message["role"] == "user":
1389
+ if isinstance(message["content"], str):
1390
+ user_content += message["content"] + " "
1391
+ elif isinstance(message["content"], list):
1392
+ for item in message["content"]:
1393
+ if (
1394
+ isinstance(item, dict) and
1395
+ item.get("type") == "text"
1396
+ ):
1397
+ user_content += (
1398
+ item.get("text", "") +
1399
+ " "
1400
+ )
1401
+
1402
+ user_content = user_content.strip()
1403
+
1404
+ user_content_replaced = user_content.replace(
1405
+ '\n', '\\n'
1406
+ ).replace('\r', '\\n')
1407
+ response_content_replaced = response_content.replace(
1408
+ '\n', '\\n'
1409
+ ).replace('\r', '\\n')
1410
+
1411
+ logging.info(
1412
+ f"使用的key: {api_key}, "
1413
+ f"提示token: {prompt_tokens}, "
1414
+ f"输出token: {completion_tokens}, "
1415
+ f"首字用时: {first_token_time:.4f}秒, "
1416
+ f"总共用时: {total_time:.4f}秒, "
1417
+ f"使用的模型: {model_name}, "
1418
+ f"用户的内容: {user_content_replaced}, "
1419
+ f"输出的内容: {response_content_replaced}"
1420
+ )
1421
+
1422
+ with data_lock:
1423
+ request_timestamps.append(time.time())
1424
+ token_counts.append(prompt_tokens+completion_tokens)
1425
+
1426
+ return Response(
1427
+ stream_with_context(generate()),
1428
+ content_type=response.headers['Content-Type']
1429
+ )
1430
  else:
1431
  response.raise_for_status()
1432
  end_time = time.time()
 
1434
  total_time = end_time - start_time
1435
 
1436
  try:
1437
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
1438
+ completion_tokens = response_json[
1439
+ "usage"
1440
+ ]["completion_tokens"]
1441
+ response_content = response_json[
1442
+ "choices"
1443
+ ][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1444
  except (KeyError, ValueError, IndexError) as e:
1445
  logging.error(
1446
+ f"解析非流式响应 JSON 失败: {e}, "
1447
  f"完整内容: {response_json}"
1448
  )
1449
+ prompt_tokens = 0
1450
+ completion_tokens = 0
1451
+ response_content = ""
1452
+
1453
+ user_content = ""
1454
+ messages = data.get("messages", [])
1455
+ for message in messages:
1456
+ if message["role"] == "user":
1457
+ if isinstance(message["content"], str):
1458
+ user_content += message["content"] + " "
1459
+ elif isinstance(message["content"], list):
1460
+ for item in message["content"]:
1461
+ if (
1462
+ isinstance(item, dict) and
1463
+ item.get("type") == "text"
1464
+ ):
1465
+ user_content += (
1466
+ item.get("text", "") + " "
1467
+ )
1468
+
1469
+ user_content = user_content.strip()
1470
+
1471
+ user_content_replaced = user_content.replace(
1472
+ '\n', '\\n'
1473
+ ).replace('\r', '\\n')
1474
+ response_content_replaced = response_content.replace(
1475
+ '\n', '\\n'
1476
+ ).replace('\r', '\\n')
1477
+
1478
  logging.info(
1479
+ f"使用的key: {api_key}, "
1480
+ f"提示token: {prompt_tokens}, "
1481
+ f"输出token: {completion_tokens}, "
1482
+ f"首字用时: 0, "
1483
+ f"总共用时: {total_time:.4f}秒, "
1484
+ f"使用的模型: {model_name}, "
1485
+ f"用户的内容: {user_content_replaced}, "
1486
+ f"输出的内容: {response_content_replaced}"
1487
  )
1488
  with data_lock:
1489
  request_timestamps.append(time.time())
1490
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
1491
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
1492
+ else:
1493
+ token_counts.append(0)
1494
+
1495
+ return jsonify(response_json)
1496
+
1497
  except requests.exceptions.RequestException as e:
1498
  logging.error(f"请求转发异常: {e}")
1499
  return jsonify({"error": str(e)}), 500