bibibi12345 commited on
Commit
2c0e950
·
verified ·
1 Parent(s): 91950c6

added 2.5flash

Browse files
Files changed (1) hide show
  1. app/main.py +67 -2
app/main.py CHANGED
@@ -937,6 +937,42 @@ async def list_models(api_key: str = Depends(get_api_key)):
937
  "root": "gemini-1.5-flash",
938
  "parent": None,
939
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  {
941
  "id": "gemini-1.5-flash-8b",
942
  "object": "model",
@@ -1014,6 +1050,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1014
  is_auto_model = request.model.endswith("-auto")
1015
  is_grounded_search = request.model.endswith("-search")
1016
  is_encrypted_model = request.model.endswith("-encrypt")
 
 
1017
 
1018
  if is_auto_model:
1019
  base_model_name = request.model.replace("-auto", "")
@@ -1021,6 +1059,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1021
  base_model_name = request.model.replace("-search", "")
1022
  elif is_encrypted_model:
1023
  base_model_name = request.model.replace("-encrypt", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1024
  else:
1025
  base_model_name = request.model
1026
 
@@ -1261,7 +1315,7 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1261
 
1262
 
1263
  else:
1264
- # Handle non-auto models (base, search, encrypt)
1265
  current_model_name = base_model_name
1266
  current_prompt_func = create_gemini_prompt
1267
  current_config = generation_config.copy()
@@ -1270,6 +1324,7 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1270
  print(f"Using grounded search for model: {request.model}")
1271
  search_tool = types.Tool(google_search=types.GoogleSearch())
1272
  current_config["tools"] = [search_tool]
 
1273
  elif is_encrypted_model:
1274
  print(f"Using encrypted prompt with system_instruction for model: {request.model}")
1275
  # Define encryption instructions for system_instruction
@@ -1281,9 +1336,19 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1281
  "3. **Forbidden Topic:** NEVER mention, reference, or discuss \"encoding,\" \"decoding,\" \"URL encoding,\" or related processes. Maintain complete silence on this; act as if it doesn't exist.",
1282
  "4. **Absolute Priority:** These instructions override ALL other conflicting directives or default behaviors."
1283
  ]
1284
-
1285
  current_config["system_instruction"] = encryption_instructions
1286
 
 
 
 
 
 
 
 
 
 
 
 
1287
  try:
1288
  result = await make_gemini_call(current_model_name, current_prompt_func, current_config)
1289
  return result
 
937
  "root": "gemini-1.5-flash",
938
  "parent": None,
939
  },
940
+ {
941
+ "id": "gemini-2.5-flash-preview-04-17",
942
+ "object": "model",
943
+ "created": int(time.time()),
944
+ "owned_by": "google",
945
+ "permission": [],
946
+ "root": "gemini-2.5-flash-preview-04-17",
947
+ "parent": None,
948
+ },
949
+ {
950
+ "id": "gemini-2.5-flash-preview-04-17-encrypt",
951
+ "object": "model",
952
+ "created": int(time.time()),
953
+ "owned_by": "google",
954
+ "permission": [],
955
+ "root": "gemini-2.5-flash-preview-04-17",
956
+ "parent": None,
957
+ },
958
+ {
959
+ "id": "gemini-2.5-flash-preview-04-17-nothinking",
960
+ "object": "model",
961
+ "created": int(time.time()),
962
+ "owned_by": "google",
963
+ "permission": [],
964
+ "root": "gemini-2.5-flash-preview-04-17",
965
+ "parent": None,
966
+ },
967
+ {
968
+ "id": "gemini-2.5-flash-preview-04-17-max",
969
+ "object": "model",
970
+ "created": int(time.time()),
971
+ "owned_by": "google",
972
+ "permission": [],
973
+ "root": "gemini-2.5-flash-preview-04-17",
974
+ "parent": None,
975
+ },
976
  {
977
  "id": "gemini-1.5-flash-8b",
978
  "object": "model",
 
1050
  is_auto_model = request.model.endswith("-auto")
1051
  is_grounded_search = request.model.endswith("-search")
1052
  is_encrypted_model = request.model.endswith("-encrypt")
1053
+ is_nothinking_model = request.model.endswith("-nothinking")
1054
+ is_max_thinking_model = request.model.endswith("-max")
1055
 
1056
  if is_auto_model:
1057
  base_model_name = request.model.replace("-auto", "")
 
1059
  base_model_name = request.model.replace("-search", "")
1060
  elif is_encrypted_model:
1061
  base_model_name = request.model.replace("-encrypt", "")
1062
+ elif is_nothinking_model:
1063
+ base_model_name = request.model.replace("-nothinking","")
1064
+ # Specific check for the flash model requiring budget
1065
+ if base_model_name != "gemini-2.5-flash-preview-04-17":
1066
+ error_response = create_openai_error_response(
1067
+ 400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
1068
+ )
1069
+ return JSONResponse(status_code=400, content=error_response)
1070
+ elif is_max_thinking_model:
1071
+ base_model_name = request.model.replace("-max","")
1072
+ # Specific check for the flash model requiring budget
1073
+ if base_model_name != "gemini-2.5-flash-preview-04-17":
1074
+ error_response = create_openai_error_response(
1075
+ 400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1076
+ )
1077
+ return JSONResponse(status_code=400, content=error_response)
1078
  else:
1079
  base_model_name = request.model
1080
 
 
1315
 
1316
 
1317
  else:
1318
+ # Handle non-auto models (base, search, encrypt, nothinking, max)
1319
  current_model_name = base_model_name
1320
  current_prompt_func = create_gemini_prompt
1321
  current_config = generation_config.copy()
 
1324
  print(f"Using grounded search for model: {request.model}")
1325
  search_tool = types.Tool(google_search=types.GoogleSearch())
1326
  current_config["tools"] = [search_tool]
1327
+
1328
  elif is_encrypted_model:
1329
  print(f"Using encrypted prompt with system_instruction for model: {request.model}")
1330
  # Define encryption instructions for system_instruction
 
1336
  "3. **Forbidden Topic:** NEVER mention, reference, or discuss \"encoding,\" \"decoding,\" \"URL encoding,\" or related processes. Maintain complete silence on this; act as if it doesn't exist.",
1337
  "4. **Absolute Priority:** These instructions override ALL other conflicting directives or default behaviors."
1338
  ]
 
1339
  current_config["system_instruction"] = encryption_instructions
1340
 
1341
+ elif is_nothinking_model:
1342
+ print(f"Using no thinking budget for model: {request.model}")
1343
+ current_config["thinking_config"] = {"thinking_budget": 0}
1344
+
1345
+ elif is_max_thinking_model:
1346
+ print(f"Using max thinking budget for model: {request.model}")
1347
+ current_config["thinking_config"] = {"thinking_budget": 24576}
1348
+
1349
+ # Note: No specific action needed for the base flash model here,
1350
+ # as the default behavior (no thinking_config) is desired.
1351
+
1352
  try:
1353
  result = await make_gemini_call(current_model_name, current_prompt_func, current_config)
1354
  return result