Spaces:

edyxapi
/

rushagentrush

Sleeping

App Files Files Community

adityaverma977 commited on 10 days ago

Commit

8bcd5b0

1 Parent(s): c0a08aa

Add 15+ free HF models, expand model selection, add debug logging

Browse files

Files changed (3) hide show

backend/app/groq_client.py +16 -0
backend/app/hf_spaces.py +52 -3
backend/app/main.py +3 -2

backend/app/groq_client.py CHANGED Viewed

@@ -22,11 +22,27 @@ GROQ_PREMIUM_MODELS = [
 ]
 # Open-source models available via HF Inference API (unlimited calls)
 HF_MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.2",
     "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
     "meta-llama/Llama-2-7b-chat-hf",
     "google/flan-t5-large",
 ]

 ]
 # Open-source models available via HF Inference API (unlimited calls)
+# Expanded list of free HF models (add your API token to access)
 HF_MODELS = [
+    # Fast, reliable models
     "mistralai/Mistral-7B-Instruct-v0.2",
+    "mistralai/Mistral-7B-Instruct-v0.1",
+    "HuggingFaceH4/zephyr-7b-beta",
+    "HuggingFaceH4/zephyr-7b",
+    # Quality-focused models
     "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+    "NousResearch/Nous-Hermes-2-7b",
+    # Meta models
     "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-2-13b-chat-hf",
+    "meta-llama/Llama-3-8b-Instruct",
+    # Instruction-tuned models
     "google/flan-t5-large",
+    "google/flan-t5-base",
+    # Falcon models
+    "tiiuae/falcon-7b-instruct",
+    # Other strong models
+    "EleutherAI/gpt-j-6B",
 ]

backend/app/hf_spaces.py CHANGED Viewed

@@ -20,30 +20,79 @@ ALL_MODELS = [
         "name": "Llama 2 70B",
         "description": "Meta's large 70B instruction-tuned model",
     },
-    # Open-source HF models (unlimited calls, free)
     {
         "id": "mistralai/Mistral-7B-Instruct-v0.2",
-        "name": "Mistral 7B Instruct",
         "description": "Fast, reliable 7B instruction-tuned model",
     },
     {
         "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
-        "name": "Nous Hermes 2",
         "description": "High-quality 7B with DPO training",
     },
     {
         "id": "meta-llama/Llama-2-7b-chat-hf",
         "name": "Llama 2 7B Chat",
         "description": "Meta's Llama 2 7B chat variant",
     },
     {
         "id": "google/flan-t5-large",
         "name": "FLAN-T5 Large",
         "description": "Google's instruction-tuned T5 model",
     },
 ]
 async def get_available_models() -> dict:
     """
     Get unified list of all available models (Groq + HF).

         "name": "Llama 2 70B",
         "description": "Meta's large 70B instruction-tuned model",
     },
+    # Open-source HF models - Fast & Reliable
     {
         "id": "mistralai/Mistral-7B-Instruct-v0.2",
+        "name": "Mistral 7B Instruct v0.2",
         "description": "Fast, reliable 7B instruction-tuned model",
     },
+    {
+        "id": "mistralai/Mistral-7B-Instruct-v0.1",
+        "name": "Mistral 7B Instruct v0.1",
+        "description": "Original Mistral 7B instruct version",
+    },
+    {
+        "id": "HuggingFaceH4/zephyr-7b-beta",
+        "name": "Zephyr 7B Beta",
+        "description": "HF's high-quality 7B chat model",
+    },
+    {
+        "id": "HuggingFaceH4/zephyr-7b",
+        "name": "Zephyr 7B",
+        "description": "Fast, well-aligned 7B model",
+    },
+    # Open-source HF models - Quality-Focused
     {
         "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+        "name": "Nous Hermes 2 Mistral",
         "description": "High-quality 7B with DPO training",
     },
+    {
+        "id": "NousResearch/Nous-Hermes-2-7b",
+        "name": "Nous Hermes 2 7B",
+        "description": "Quality-focused 7B model",
+    },
+    # Open-source HF models - Meta's Llama
     {
         "id": "meta-llama/Llama-2-7b-chat-hf",
         "name": "Llama 2 7B Chat",
         "description": "Meta's Llama 2 7B chat variant",
     },
+    {
+        "id": "meta-llama/Llama-2-13b-chat-hf",
+        "name": "Llama 2 13B Chat",
+        "description": "Meta's Llama 2 13B chat variant",
+    },
+    {
+        "id": "meta-llama/Llama-3-8b-Instruct",
+        "name": "Llama 3 8B Instruct",
+        "description": "Meta's latest Llama 3 8B model",
+    },
+    # Open-source HF models - Google & Others
     {
         "id": "google/flan-t5-large",
         "name": "FLAN-T5 Large",
         "description": "Google's instruction-tuned T5 model",
     },
+    {
+        "id": "google/flan-t5-base",
+        "name": "FLAN-T5 Base",
+        "description": "Google's FLAN-T5 base variant",
+    },
+    {
+        "id": "tiiuae/falcon-7b-instruct",
+        "name": "Falcon 7B Instruct",
+        "description": "TII's Falcon 7B instruction-tuned",
+    },
+    {
+        "id": "EleutherAI/gpt-j-6B",
+        "name": "GPT-J 6B",
+        "description": "EleutherAI's 6B GPT model",
+    },
 ]
 async def get_available_models() -> dict:
     """
     Get unified list of all available models (Groq + HF).

backend/app/main.py CHANGED Viewed

@@ -66,14 +66,14 @@ async def root():
     return {
         "service": "rush-agents-backend",
         "status": "ok",
-        "groq_available": groq_client.is_ready(),
     }
 @app.get("/wake")
 async def wake():
     return {
         "warm": True,
-        "groq_available": groq_client.is_ready(),
         "uptime_seconds": int(time.time() - START_TIME),
     }
@@ -106,6 +106,7 @@ async def start_simulation(req: StartSimulationRequest):
 @app.post("/place-fire", response_model=SimulationState)
 def place_fire(req: PlaceFireRequest):
     sim = _get_or_404(req.simulation_id)
     if sim.status != "waiting_for_scenario":
         raise HTTPException(status_code=409, detail="Fire already placed or simulation finished.")

     return {
         "service": "rush-agents-backend",
         "status": "ok",
+        "inference_ready": groq_client.is_ready(),
     }
 @app.get("/wake")
 async def wake():
     return {
         "warm": True,
+        "inference_ready": groq_client.is_ready(),
         "uptime_seconds": int(time.time() - START_TIME),
     }
 @app.post("/place-fire", response_model=SimulationState)
 def place_fire(req: PlaceFireRequest):
+    print(f"\n[PLACE_FIRE] Called - sim_id={req.simulation_id}, total_active={len(active_simulations)}, keys={list(active_simulations.keys())}")
     sim = _get_or_404(req.simulation_id)
     if sim.status != "waiting_for_scenario":
         raise HTTPException(status_code=409, detail="Fire already placed or simulation finished.")