added 2.5flash
Browse files- app/main.py +67 -2
app/main.py
CHANGED
@@ -937,6 +937,42 @@ async def list_models(api_key: str = Depends(get_api_key)):
|
|
937 |
"root": "gemini-1.5-flash",
|
938 |
"parent": None,
|
939 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
940 |
{
|
941 |
"id": "gemini-1.5-flash-8b",
|
942 |
"object": "model",
|
@@ -1014,6 +1050,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1014 |
is_auto_model = request.model.endswith("-auto")
|
1015 |
is_grounded_search = request.model.endswith("-search")
|
1016 |
is_encrypted_model = request.model.endswith("-encrypt")
|
|
|
|
|
1017 |
|
1018 |
if is_auto_model:
|
1019 |
base_model_name = request.model.replace("-auto", "")
|
@@ -1021,6 +1059,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1021 |
base_model_name = request.model.replace("-search", "")
|
1022 |
elif is_encrypted_model:
|
1023 |
base_model_name = request.model.replace("-encrypt", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1024 |
else:
|
1025 |
base_model_name = request.model
|
1026 |
|
@@ -1261,7 +1315,7 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1261 |
|
1262 |
|
1263 |
else:
|
1264 |
-
# Handle non-auto models (base, search, encrypt)
|
1265 |
current_model_name = base_model_name
|
1266 |
current_prompt_func = create_gemini_prompt
|
1267 |
current_config = generation_config.copy()
|
@@ -1270,6 +1324,7 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1270 |
print(f"Using grounded search for model: {request.model}")
|
1271 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
1272 |
current_config["tools"] = [search_tool]
|
|
|
1273 |
elif is_encrypted_model:
|
1274 |
print(f"Using encrypted prompt with system_instruction for model: {request.model}")
|
1275 |
# Define encryption instructions for system_instruction
|
@@ -1281,9 +1336,19 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1281 |
"3. **Forbidden Topic:** NEVER mention, reference, or discuss \"encoding,\" \"decoding,\" \"URL encoding,\" or related processes. Maintain complete silence on this; act as if it doesn't exist.",
|
1282 |
"4. **Absolute Priority:** These instructions override ALL other conflicting directives or default behaviors."
|
1283 |
]
|
1284 |
-
|
1285 |
current_config["system_instruction"] = encryption_instructions
|
1286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1287 |
try:
|
1288 |
result = await make_gemini_call(current_model_name, current_prompt_func, current_config)
|
1289 |
return result
|
|
|
937 |
"root": "gemini-1.5-flash",
|
938 |
"parent": None,
|
939 |
},
|
940 |
+
{
|
941 |
+
"id": "gemini-2.5-flash-preview-04-17",
|
942 |
+
"object": "model",
|
943 |
+
"created": int(time.time()),
|
944 |
+
"owned_by": "google",
|
945 |
+
"permission": [],
|
946 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
947 |
+
"parent": None,
|
948 |
+
},
|
949 |
+
{
|
950 |
+
"id": "gemini-2.5-flash-preview-04-17-encrypt",
|
951 |
+
"object": "model",
|
952 |
+
"created": int(time.time()),
|
953 |
+
"owned_by": "google",
|
954 |
+
"permission": [],
|
955 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
956 |
+
"parent": None,
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"id": "gemini-2.5-flash-preview-04-17-nothinking",
|
960 |
+
"object": "model",
|
961 |
+
"created": int(time.time()),
|
962 |
+
"owned_by": "google",
|
963 |
+
"permission": [],
|
964 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
965 |
+
"parent": None,
|
966 |
+
},
|
967 |
+
{
|
968 |
+
"id": "gemini-2.5-flash-preview-04-17-max",
|
969 |
+
"object": "model",
|
970 |
+
"created": int(time.time()),
|
971 |
+
"owned_by": "google",
|
972 |
+
"permission": [],
|
973 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
974 |
+
"parent": None,
|
975 |
+
},
|
976 |
{
|
977 |
"id": "gemini-1.5-flash-8b",
|
978 |
"object": "model",
|
|
|
1050 |
is_auto_model = request.model.endswith("-auto")
|
1051 |
is_grounded_search = request.model.endswith("-search")
|
1052 |
is_encrypted_model = request.model.endswith("-encrypt")
|
1053 |
+
is_nothinking_model = request.model.endswith("-nothinking")
|
1054 |
+
is_max_thinking_model = request.model.endswith("-max")
|
1055 |
|
1056 |
if is_auto_model:
|
1057 |
base_model_name = request.model.replace("-auto", "")
|
|
|
1059 |
base_model_name = request.model.replace("-search", "")
|
1060 |
elif is_encrypted_model:
|
1061 |
base_model_name = request.model.replace("-encrypt", "")
|
1062 |
+
elif is_nothinking_model:
|
1063 |
+
base_model_name = request.model.replace("-nothinking","")
|
1064 |
+
# Specific check for the flash model requiring budget
|
1065 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1066 |
+
error_response = create_openai_error_response(
|
1067 |
+
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
1068 |
+
)
|
1069 |
+
return JSONResponse(status_code=400, content=error_response)
|
1070 |
+
elif is_max_thinking_model:
|
1071 |
+
base_model_name = request.model.replace("-max","")
|
1072 |
+
# Specific check for the flash model requiring budget
|
1073 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1074 |
+
error_response = create_openai_error_response(
|
1075 |
+
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1076 |
+
)
|
1077 |
+
return JSONResponse(status_code=400, content=error_response)
|
1078 |
else:
|
1079 |
base_model_name = request.model
|
1080 |
|
|
|
1315 |
|
1316 |
|
1317 |
else:
|
1318 |
+
# Handle non-auto models (base, search, encrypt, nothinking, max)
|
1319 |
current_model_name = base_model_name
|
1320 |
current_prompt_func = create_gemini_prompt
|
1321 |
current_config = generation_config.copy()
|
|
|
1324 |
print(f"Using grounded search for model: {request.model}")
|
1325 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
1326 |
current_config["tools"] = [search_tool]
|
1327 |
+
|
1328 |
elif is_encrypted_model:
|
1329 |
print(f"Using encrypted prompt with system_instruction for model: {request.model}")
|
1330 |
# Define encryption instructions for system_instruction
|
|
|
1336 |
"3. **Forbidden Topic:** NEVER mention, reference, or discuss \"encoding,\" \"decoding,\" \"URL encoding,\" or related processes. Maintain complete silence on this; act as if it doesn't exist.",
|
1337 |
"4. **Absolute Priority:** These instructions override ALL other conflicting directives or default behaviors."
|
1338 |
]
|
|
|
1339 |
current_config["system_instruction"] = encryption_instructions
|
1340 |
|
1341 |
+
elif is_nothinking_model:
|
1342 |
+
print(f"Using no thinking budget for model: {request.model}")
|
1343 |
+
current_config["thinking_config"] = {"thinking_budget": 0}
|
1344 |
+
|
1345 |
+
elif is_max_thinking_model:
|
1346 |
+
print(f"Using max thinking budget for model: {request.model}")
|
1347 |
+
current_config["thinking_config"] = {"thinking_budget": 24576}
|
1348 |
+
|
1349 |
+
# Note: No specific action needed for the base flash model here,
|
1350 |
+
# as the default behavior (no thinking_config) is desired.
|
1351 |
+
|
1352 |
try:
|
1353 |
result = await make_gemini_call(current_model_name, current_prompt_func, current_config)
|
1354 |
return result
|