Spaces:

Simonc-44
/

API

Running

App Files Files Community

Simonc-44 commited on 23 days ago

Commit

0fcecd1

verified ·

1 Parent(s): f6a33d3

Update main.py

Browse files

Files changed (1) hide show

main.py +32 -30

main.py CHANGED Viewed

@@ -11,35 +11,22 @@ app = FastAPI(title="CygnisAI Studio API")
 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
-# Mapping vers des modèles DISPONIBLES et STABLES sur le routeur Hugging Face
 MODELS = {
-    # Gemma 2 9B (Google)
     "google/gemma-3-27b-it": "google/gemma-2-9b-it",
-    # Llama 3.1 70B (Meta)
     "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-    # Qwen 2.5 7B (Alibaba)
     "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
-    # Phi 3.5 (Microsoft)
     "XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",
-    # DeepSeek R1 (Distill Llama 8B)
     "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
-    # Llama 3.2 3B (Meta)
     "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
-    # Llama 3.1 8B (Meta)
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    # Défaut : Gemma 2 2B (Plus récent et dispo que le 2b-it)
-    "default": "google/gemma-2-2b-it"
 }
-# Modèle de secours ultime (toujours dispo)
-SAFETY_NET_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 # URL de base UNIQUE pour le routeur HF
 HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
@@ -59,16 +46,16 @@ class ChatResponse(BaseModel):
 async def verify_api_key(authorization: str = Header(None)):
     if not authorization:
         print("⚠️ Missing Authorization header")
-        raise HTTPException(status_code=401, detail="Missing Authorization header")
     try:
         scheme, token = authorization.split()
         if scheme.lower() != 'bearer':
             raise HTTPException(status_code=401, detail="Invalid authentication scheme")
         if token != CYGNIS_API_KEY:
             print(f"⚠️ Invalid API Key: {token}")
-            raise HTTPException(status_code=403, detail="Invalid API Key")
     except ValueError:
-        raise HTTPException(status_code=401, detail="Invalid authorization header format")
 @app.get("/")
 def read_root():
@@ -103,9 +90,9 @@ def call_hf_api(model_id, messages, req):
         for msg in messages:
             role = msg['role']
             content = msg['content']
-            if role == 'system': prompt_str += f"<|system|>\n{content}</s>\n"
-            elif role == 'user': prompt_str += f"<|user|>\n{content}</s>\n"
-            elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}</s>\n"
         prompt_str += "<|assistant|>\n"
         payload_standard = {
@@ -127,7 +114,12 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     if not HF_TOKEN:
         print("❌ CRITICAL: HF_TOKEN is missing!")
-        raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
     model_id = MODELS.get(req.model, MODELS["default"])
     print(f"🤖 Routing request to: {model_id}")
@@ -138,18 +130,23 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     messages.append({"role": "user", "content": req.question})
     try:
-        # Premier essai avec le modèle demandé
         response = call_hf_api(model_id, messages, req)
-        # Si 404/503/500, on tente le SAFETY NET
         if response.status_code != 200:
             print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}")
             model_id = SAFETY_NET_MODEL
             response = call_hf_api(SAFETY_NET_MODEL, messages, req)
         if response.status_code != 200:
-            print(f"❌ HF Error ({response.status_code}): {response.text}")
-            raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
         data = response.json()
@@ -172,7 +169,12 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     except Exception as e:
         print(f"❌ Internal Exception: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
 if __name__ == "__main__":
     import uvicorn

 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
+# Mapping vers des modèles NON-GATED et POPULAIRES
 MODELS = {
     "google/gemma-3-27b-it": "google/gemma-2-9b-it",
     "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
     "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
     "XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",
     "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
     "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    # Défaut : Qwen 2.5 (Très robuste et souvent dispo)
+    "default": "Qwen/Qwen2.5-7B-Instruct"
 }
+# Modèle de secours ultime (Microsoft Phi 3.5 est très léger et souvent dispo)
+SAFETY_NET_MODEL = "microsoft/Phi-3.5-mini-instruct"
 # URL de base UNIQUE pour le routeur HF
 HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
 async def verify_api_key(authorization: str = Header(None)):
     if not authorization:
         print("⚠️ Missing Authorization header")
+        # On ne bloque pas pour faciliter le debug, mais on log
     try:
         scheme, token = authorization.split()
         if scheme.lower() != 'bearer':
             raise HTTPException(status_code=401, detail="Invalid authentication scheme")
         if token != CYGNIS_API_KEY:
             print(f"⚠️ Invalid API Key: {token}")
+            # raise HTTPException(status_code=403, detail="Invalid API Key") # Commenté pour debug
     except ValueError:
+        pass # On laisse passer pour le moment
 @app.get("/")
 def read_root():
         for msg in messages:
             role = msg['role']
             content = msg['content']
+            if role == 'system': prompt_str += f"<|system|>\n{content}\n"
+            elif role == 'user': prompt_str += f"<|user|>\n{content}\n"
+            elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}\n"
         prompt_str += "<|assistant|>\n"
         payload_standard = {
     if not HF_TOKEN:
         print("❌ CRITICAL: HF_TOKEN is missing!")
+        # Mock response instead of crash
+        return {
+            "answer": "Configuration Error: HF_TOKEN is missing on the server.",
+            "model_used": "error-handler",
+            "sources": []
+        }
     model_id = MODELS.get(req.model, MODELS["default"])
     print(f"🤖 Routing request to: {model_id}")
     messages.append({"role": "user", "content": req.question})
     try:
+        # Premier essai
         response = call_hf_api(model_id, messages, req)
+        # Si échec, Safety Net
         if response.status_code != 200:
             print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}")
             model_id = SAFETY_NET_MODEL
             response = call_hf_api(SAFETY_NET_MODEL, messages, req)
+        # Si tout échoue, Mock Response (ULTIMATE FALLBACK)
         if response.status_code != 200:
+            print(f"❌ ALL MODELS FAILED. Returning mock response. Last error: {response.text}")
+            return {
+                "answer": "Je suis désolé, mes serveurs de réflexion sont actuellement surchargés ou inaccessibles. Je ne peux pas traiter votre demande pour le moment. Veuillez réessayer dans quelques minutes.",
+                "model_used": "fallback-mock",
+                "sources": []
+            }
         data = response.json()
     except Exception as e:
         print(f"❌ Internal Exception: {str(e)}")
+        # Mock response on crash
+        return {
+            "answer": "Une erreur interne inattendue s'est produite. Mes excuses.",
+            "model_used": "exception-handler",
+            "sources": []
+        }
 if __name__ == "__main__":
     import uvicorn