Fix some issues and error handling fix
Browse files
app.py
CHANGED
|
@@ -32,6 +32,11 @@ models_data = {
|
|
| 32 |
"llama-3.3-70b-versatile": {"rpm": 30, "rpd": 1000, "tpm": 12000, "tpd": 100000},
|
| 33 |
"llama3-70b-8192": {"rpm": 30, "rpd": 14400, "tpm": 6000, "tpd": 500000},
|
| 34 |
"llama3-8b-8192": {"rpm": 30, "rpd": 14400, "tpm": 6000, "tpd": 500000},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
|
| 37 |
model_list = [
|
|
@@ -121,12 +126,12 @@ def main_page():
|
|
| 121 |
@app.post("/chat")
|
| 122 |
def chat_with_groq(req: ChatRequest):
|
| 123 |
models = req.models
|
| 124 |
-
if len(models) == 1 and models[0] == "":
|
| 125 |
raise HTTPException(400, detail="Empty model field")
|
| 126 |
messages = [clean_message(m) for m in req.messages]
|
| 127 |
if len(models) == 1:
|
| 128 |
-
resp = router.completion(model=models[0], messages=messages, **req.model_dump(exclude={"models", "messages"}, exclude_defaults=True, exclude_none=True))
|
| 129 |
try:
|
|
|
|
| 130 |
print("Asked to", models[0], ":", messages)
|
| 131 |
return {"error": False, "content": resp.choices[0].message.content}
|
| 132 |
except Exception as e:
|
|
@@ -134,11 +139,14 @@ def chat_with_groq(req: ChatRequest):
|
|
| 134 |
return {"error": True, "content": "Aucune clé ne fonctionne avec le modèle sélectionné, patientez ...."}
|
| 135 |
else:
|
| 136 |
for model in models:
|
| 137 |
-
|
|
|
|
|
|
|
| 138 |
try:
|
|
|
|
| 139 |
print("Asked to", models[0], ":", messages)
|
| 140 |
return {"error": False, "content": resp.choices[0].message.content}
|
| 141 |
except Exception as e:
|
| 142 |
traceback.print_exception(e)
|
| 143 |
continue
|
| 144 |
-
return {"error": True, "content": "
|
|
|
|
| 32 |
"llama-3.3-70b-versatile": {"rpm": 30, "rpd": 1000, "tpm": 12000, "tpd": 100000},
|
| 33 |
"llama3-70b-8192": {"rpm": 30, "rpd": 14400, "tpm": 6000, "tpd": 500000},
|
| 34 |
"llama3-8b-8192": {"rpm": 30, "rpd": 14400, "tpm": 6000, "tpd": 500000},
|
| 35 |
+
"meta-llama/llama-4-maverick-17b-128e-instruct": {"rpm": 30, "rpd": 1000, "tpm": 6000, "tpd": None},
|
| 36 |
+
"meta-llama/llama-4-scout-17b-16e-instruct": {"rpm": 30, "rpd": 1000, "tpm": 30000, "tpd": None},
|
| 37 |
+
"meta-llama/llama-guard-4-12b": {"rpm": 30, "rpd": 14400, "tpm": 15000, "tpd": 500000},
|
| 38 |
+
"meta-llama/llama-prompt-guard-2-22m": {"rpm": 30, "rpd": 14400, "tpm": 15000, "tpd": None},
|
| 39 |
+
"meta-llama/llama-prompt-guard-2-86m": {"rpm": 30, "rpd": 14400, "tpm": None, "tpd": None},
|
| 40 |
}
|
| 41 |
|
| 42 |
model_list = [
|
|
|
|
| 126 |
@app.post("/chat")
|
| 127 |
def chat_with_groq(req: ChatRequest):
|
| 128 |
models = req.models
|
| 129 |
+
if len(models) == 1 and (models[0] == "" or models[0] not in models_data.keys()):
|
| 130 |
raise HTTPException(400, detail="Empty model field")
|
| 131 |
messages = [clean_message(m) for m in req.messages]
|
| 132 |
if len(models) == 1:
|
|
|
|
| 133 |
try:
|
| 134 |
+
resp = router.completion(model=models[0], messages=messages, **req.model_dump(exclude={"models", "messages"}, exclude_defaults=True, exclude_none=True))
|
| 135 |
print("Asked to", models[0], ":", messages)
|
| 136 |
return {"error": False, "content": resp.choices[0].message.content}
|
| 137 |
except Exception as e:
|
|
|
|
| 139 |
return {"error": True, "content": "Aucune clé ne fonctionne avec le modèle sélectionné, patientez ...."}
|
| 140 |
else:
|
| 141 |
for model in models:
|
| 142 |
+
if model not in models_data.keys():
|
| 143 |
+
print(f"Erreur: {model} n'existe pas")
|
| 144 |
+
continue
|
| 145 |
try:
|
| 146 |
+
resp = router.completion(model=model, messages=messages, **req.model_dump(exclude={"models", "messages"}, exclude_defaults=True, exclude_none=True))
|
| 147 |
print("Asked to", models[0], ":", messages)
|
| 148 |
return {"error": False, "content": resp.choices[0].message.content}
|
| 149 |
except Exception as e:
|
| 150 |
traceback.print_exception(e)
|
| 151 |
continue
|
| 152 |
+
return {"error": True, "content": "Tous les modèles n'ont pas fonctionné avec les différentes clé, patientez ...."}
|