Spaces:

edyxapi
/

rushagentrush

Sleeping

App Files Files Community

adityaverma977 commited on 10 days ago

Commit

593e9c0

1 Parent(s): d4d710c

Remove Groq models; switch registry and clients to HF-only curated list

Browse files

Files changed (4) hide show

app/groq_client.py +40 -75
app/hf_spaces.py +25 -91
backend/app/groq_client.py +49 -127
backend/app/hf_spaces.py +36 -99

app/groq_client.py CHANGED Viewed

@@ -3,22 +3,20 @@ import os
 import random
 import math
 import httpx
-from groq import AsyncGroq
 from dotenv import load_dotenv
 load_dotenv()
-_GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
-_client = AsyncGroq(api_key=_GROQ_API_KEY) if _GROQ_API_KEY else None
 _HF_API_BASE = "https://api-inference.huggingface.co/models"
-DEFAULT_DECISION_MODEL = "mixtral-8x7b-32768"
 MAX_AGENT_SPEED = 80
 def is_ready():
-    return _client is not None
 def _build_fire_state_summary(agent, fire, all_agents) -> str:
@@ -119,77 +117,44 @@ RECENT RADIO CHAT:
 What do you do?"""
     try:
-        completion = await _client.chat.completions.create(
-            model=DEFAULT_DECISION_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": "Make your decision."}
-            ],
-            response_format={"type": "json_object"},
-            max_tokens=150,
-            timeout=3.0
-        )
-        decision = json.loads(completion.choices[0].message.content)
-        action = decision.get("action", "escape")
-        if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
-            action = "escape"
-        if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
-            action = "collect_water"
-        elif agent.water_collected and dist_to_fire <= 350:
-            action = "extinguish_fire"
-        return {
-            "action": action,
-            "vote_for": decision.get("vote_for"),
-            "message": decision.get("message", "Moving strategically."),
-            "reasoning": decision.get("reasoning", "Survival and teamwork.")
-        }
-    except Exception as e:
-        # If Groq fails (rate limits, network), try a HF fallback when possible
-        print(f"Error calling groq for {agent.model_name}: {e}")
-        err = str(e).lower()
-        if _HF_API_TOKEN and ("rate limit" in err or "rate_limit" in err or "429" in err):
-            fallback_hf = "mistralai/Mistral-7B-Instruct-v0.2"
             try:
-                async with httpx.AsyncClient(timeout=8.0) as client:
-                    resp = await client.post(
-                        f"{_HF_API_BASE}/{fallback_hf}",
-                        headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
-                        json={"inputs": system_prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.7}},
-                    )
-                    resp.raise_for_status()
-                    data = resp.json()
-                    if isinstance(data, list) and len(data) > 0:
-                        text = data[0].get("generated_text", "")
-                    else:
-                        text = data.get("generated_text", "")
-                    text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
-                    try:
-                        js = text[text.find('{'):text.rfind('}')+1]
-                        decision = json.loads(js)
-                    except Exception:
-                        decision = {}
-                    action = decision.get("action", "escape")
-                    if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
-                        action = "escape"
-                    if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
-                        action = "collect_water"
-                    elif agent.water_collected and dist_to_fire <= 350:
-                        action = "extinguish_fire"
-                    return {
-                        "action": action,
-                        "vote_for": decision.get("vote_for"),
-                        "message": decision.get("message", "Moving strategically."),
-                        "reasoning": decision.get("reasoning", "Survival and teamwork.")
-                    }
-            except Exception as e2:
-                print(f"HF fallback failed: {e2}")
-                return _fallback_escape(agent, fire)
         return _fallback_escape(agent, fire)

 import random
 import math
 import httpx
 from dotenv import load_dotenv
 load_dotenv()
 _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
 _HF_API_BASE = "https://api-inference.huggingface.co/models"
+# Default HF fallback
+DEFAULT_DECISION_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
 MAX_AGENT_SPEED = 80
 def is_ready():
+    return _HF_API_TOKEN is not None
 def _build_fire_state_summary(agent, fire, all_agents) -> str:
 What do you do?"""
     try:
+        # Use HF Inference API directly for the requested model (or default)
+        target_model = agent.model_name if agent.model_name else DEFAULT_DECISION_MODEL
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            resp = await client.post(
+                f"{_HF_API_BASE}/{target_model}",
+                headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
+                json={"inputs": system_prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.7}},
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            if isinstance(data, list) and len(data) > 0:
+                text = data[0].get("generated_text", "")
+            else:
+                text = data.get("generated_text", "")
+            text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
             try:
+                js = text[text.find('{'):text.rfind('}')+1]
+                decision = json.loads(js)
+            except Exception:
+                decision = {}
+            action = decision.get("action", "escape")
+            if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
+                action = "escape"
+            if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
+                action = "collect_water"
+            elif agent.water_collected and dist_to_fire <= 350:
+                action = "extinguish_fire"
+            return {
+                "action": action,
+                "vote_for": decision.get("vote_for"),
+                "message": decision.get("message", "Moving strategically."),
+                "reasoning": decision.get("reasoning", "Survival and teamwork.")
+            }
+    except Exception as e:
+        print(f"HF inference failed for {agent.model_name}: {e}")
         return _fallback_escape(agent, fire)

app/hf_spaces.py CHANGED Viewed

@@ -5,102 +5,36 @@ import os
 import httpx
 from typing import Optional
-HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN", "")
-# Curated list of verified open-source models on HF Spaces that work reliably
-KNOWN_SPACES_MODELS = [
-    {
-        "id": "tiiuae/Falcon-7B",
-        "name": "Falcon-7B",
-        "space_url": "https://huggingface.co/spaces/tiiuae/falcon-chat",
-        "description": "7B parameter open model",
-    },
-    {
-        "id": "meta-llama/Llama-2-7b",
-        "name": "Llama-2-7B",
-        "space_url": "https://huggingface.co/spaces/meta-llama/Llama-2-7b-chat",
-        "description": "Meta's 7B model",
-    },
-    {
-        "id": "mistralai/Mistral-7B",
-        "name": "Mistral-7B",
-        "space_url": "https://huggingface.co/spaces/mistralai/Mistral-7B-Instruct-v0.1",
-        "description": "Mistral's 7B model",
-    },
-    {
-        "id": "HuggingFaceH4/zephyr-7b",
-        "name": "Zephyr-7B",
-        "space_url": "https://huggingface.co/spaces/HuggingFaceH4/zephyr-7b-beta",
-        "description": "Zephyr 7B fine-tuned model",
-    },
-    {
-        "id": "teknium/OpenHermes-2.5-Mistral-7B",
-        "name": "OpenHermes-7B",
-        "space_url": "https://huggingface.co/spaces/teknium/OpenHermes-2.5-Mistral-7B",
-        "description": "OpenHermes instruction-tuned 7B",
-    },
-]
-# Groq models (built-in)
-GROQ_MODELS = [
-    {"id": "mixtral-8x7b-32768", "name": "Mixtral 8x7B", "backend": "groq"},
-    {"id": "llama2-70b-4096", "name": "Llama 2 70B", "backend": "groq"},
 ]
 async def get_available_models() -> dict:
-    """
-    Get list of available models from Groq and HF Spaces.
-    Returns both for frontend model selector.
-    """
-    return {
-        "groq_models": GROQ_MODELS,
-        "hf_spaces_models": KNOWN_SPACES_MODELS,
-        "total": len(GROQ_MODELS) + len(KNOWN_SPACES_MODELS),
-    }
-async def query_hf_space_model(model_id: str, prompt: str) -> Optional[str]:
-    """
-    Query a model on HuggingFace Spaces.
-    This is a fallback if we want to use HF spaces directly.
-    Note: HF spaces may have rate limits and require authentication.
-    """
-    if not HF_API_TOKEN:
-        return None
-    # Try to find the space URL for this model
-    space = next((m for m in KNOWN_SPACES_MODELS if m["id"] == model_id), None)
-    if not space:
-        return None
-    try:
-        # This would hit the HF inference API
-        # For now, we focus on Groq which is more reliable
-        async with httpx.AsyncClient(timeout=5.0) as client:
-            headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
-            response = await client.post(
-                "https://api-inference.huggingface.co/models/" + model_id,
-                json={"inputs": prompt},
-                headers=headers,
-            )
-            if response.status_code == 200:
-                result = response.json()
-                # Extract generated text from response
-                if isinstance(result, list) and len(result) > 0:
-                    return result[0].get("generated_text", "")
-    except Exception as e:
-        print(f"Error querying HF space {model_id}: {e}")
-    return None
 def get_model_display_name(model_id: str) -> str:
-    """Get a clean display name from model ID."""
-    # Try to find in known models
-    for model in GROQ_MODELS + KNOWN_SPACES_MODELS:
-        if model["id"] == model_id:
-            return model["name"]
-    # Fallback: clean up the ID
     return model_id.split("/")[-1].split("-")[0].capitalize()

 import httpx
 from typing import Optional
+HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") or os.environ.get("HF_API_TOKEN")
+# Unified HF-only list for the frontend (curated small→large)
+ALL_MODELS = [
+    {"id": "google/flan-t5-small", "name": "FLAN-T5 Small", "size": "small"},
+    {"id": "google/flan-t5-base", "name": "FLAN-T5 Base", "size": "small"},
+    {"id": "google/flan-t5-large", "name": "FLAN-T5 Large", "size": "medium"},
+    {"id": "bigscience/bloom-3b", "name": "BLOOM 3B", "size": "medium"},
+    {"id": "EleutherAI/gpt-neo-2.7B", "name": "GPT-Neo 2.7B", "size": "medium"},
+    {"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B Instruct v0.2", "size": "medium"},
+    {"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B Instruct v0.1", "size": "medium"},
+    {"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
+    {"id": "HuggingFaceH4/zephyr-7b", "name": "Zephyr 7B", "size": "medium"},
+    {"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
+    {"id": "EleutherAI/gpt-j-6B", "name": "GPT-J 6B", "size": "medium"},
+    {"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
+    {"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
+    {"id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama 2 70B Chat", "size": "xlarge"},
+    {"id": "bigscience/bloom-176b", "name": "BLOOM 176B", "size": "xlarge"},
+    {"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
+    {"id": "meta-llama/Llama-3-8b-Instruct", "name": "Llama 3 8B Instruct", "size": "large"},
 ]
 async def get_available_models() -> dict:
+    return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
 def get_model_display_name(model_id: str) -> str:
+    for m in ALL_MODELS:
+        if m["id"] == model_id:
+            return m["name"]
     return model_id.split("/")[-1].split("-")[0].capitalize()

backend/app/groq_client.py CHANGED Viewed

@@ -3,69 +3,44 @@ import os
 import random
 import math
 import httpx
-from groq import AsyncGroq
 from dotenv import load_dotenv
 load_dotenv()
-_GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
-# Accept either HF_API_TOKEN or HUGGINGFACE_API_TOKEN for compatibility
 _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
-_groq_client = AsyncGroq(api_key=_GROQ_API_KEY) if _GROQ_API_KEY else None
 _HF_API_BASE = "https://api-inference.huggingface.co/models"
 MAX_AGENT_SPEED = 80
-# Premium Groq models (high-token limits, no rate limits for these)
-GROQ_PREMIUM_MODELS = [
-    "mixtral-8x7b-32768",
-    "llama2-70b-4096",
-]
-# Open-source models available via HF Inference API (unlimited calls)
-# Expanded list of free HF models (add your API token to access)
 HF_MODELS = [
-    # Fast, reliable models
     "mistralai/Mistral-7B-Instruct-v0.2",
     "mistralai/Mistral-7B-Instruct-v0.1",
-    "HuggingFaceH4/zephyr-7b-beta",
-    "HuggingFaceH4/zephyr-7b",
-    # Quality-focused models
-    "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
     "NousResearch/Nous-Hermes-2-7b",
-    # Meta models
     "meta-llama/Llama-2-7b-chat-hf",
     "meta-llama/Llama-2-13b-chat-hf",
     "meta-llama/Llama-3-8b-Instruct",
-    # Instruction-tuned models
-    "google/flan-t5-large",
-    "google/flan-t5-base",
-    # Falcon models
-    "tiiuae/falcon-7b-instruct",
-    # Other strong models
-    "EleutherAI/gpt-j-6B",
 ]
-# Mapping from premium Groq models to reasonable HF fallback model IDs
-# Used when Groq is unavailable but a HF token exists.
-GROQ_TO_HF_FALLBACK = {
-    "mixtral-8x7b-32768": "mistralai/Mistral-7B-Instruct-v0.2",
-    "llama2-70b-4096": "meta-llama/Llama-2-13b-chat-hf",
-}
 def is_ready():
-    """Check if we have at least one backend available."""
-    return _groq_client is not None or _HF_API_TOKEN is not None
-def _is_groq_model(model_id: str) -> bool:
-    """Check if model is a Groq premium model."""
-    return model_id in GROQ_PREMIUM_MODELS
 def _is_hf_model(model_id: str) -> bool:
-    """Check if model is a HF model."""
     return model_id in HF_MODELS
@@ -166,96 +141,43 @@ Respond with ONLY valid JSON on a single line (no markdown, no code block):
 {{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": null, "message": "<sentence>", "reasoning": "<sentence>"}}"""
     try:
-        if _is_groq_model(agent.model_name) and _groq_client:
-            # Use Groq for premium models
-            completion = await _groq_client.chat.completions.create(
-                model=agent.model_name,
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": "Make your decision."}
-                ],
-                response_format={"type": "json_object"},
-                max_tokens=150,
-                timeout=3.0
-            )
-            decision = json.loads(completion.choices[0].message.content)
-        # If the agent requested a premium Groq model but Groq client is not configured,
-        # try to route the decision to a HF fallback model when possible.
-        elif _is_groq_model(agent.model_name) and not _groq_client and _HF_API_TOKEN:
-            fallback_model = GROQ_TO_HF_FALLBACK.get(agent.model_name)
-            if not fallback_model:
-                return _fallback_escape(agent, fire)
-            async with httpx.AsyncClient(timeout=10.0) as client:
-                response = await client.post(
-                    f"{_HF_API_BASE}/{fallback_model}",
-                    headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
-                    json={
-                        "inputs": system_prompt,
-                        "parameters": {
-                            "max_new_tokens": 200,
-                            "temperature": 0.7,
-                            "top_p": 0.9,
-                        }
                     }
-                )
-                response.raise_for_status()
-                data = response.json()
-                if isinstance(data, list) and len(data) > 0:
-                    text = data[0].get("generated_text", "")
-                else:
-                    text = data.get("generated_text", "")
-                text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
-                try:
-                    json_start = text.find('{')
-                    json_end = text.rfind('}') + 1
-                    if json_start >= 0 and json_end > json_start:
-                        json_str = text[json_start:json_end]
-                        decision = json.loads(json_str)
-                    else:
-                        decision = {}
-                except json.JSONDecodeError:
-                    decision = {}
-        elif _is_hf_model(agent.model_name) and _HF_API_TOKEN:
-            # Use HF Inference API for open-source models
-            async with httpx.AsyncClient(timeout=10.0) as client:
-                response = await client.post(
-                    f"{_HF_API_BASE}/{agent.model_name}",
-                    headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
-                    json={
-                        "inputs": system_prompt,
-                        "parameters": {
-                            "max_new_tokens": 200,
-                            "temperature": 0.7,
-                            "top_p": 0.9,
-                        }
-                    }
-                )
-                response.raise_for_status()
-                data = response.json()
-                if isinstance(data, list) and len(data) > 0:
-                    text = data[0].get("generated_text", "")
                 else:
-                    text = data.get("generated_text", "")
-                text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
-                try:
-                    json_start = text.find('{')
-                    json_end = text.rfind('}') + 1
-                    if json_start >= 0 and json_end > json_start:
-                        json_str = text[json_start:json_end]
-                        decision = json.loads(json_str)
-                    else:
-                        decision = {}
-                except json.JSONDecodeError:
                     decision = {}
-        else:
-            return _fallback_escape(agent, fire)
         action = decision.get("action", "escape")
         if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:

 import random
 import math
 import httpx
 from dotenv import load_dotenv
 load_dotenv()
+# Use HF tokens only — Groq models removed from registry
 _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
 _HF_API_BASE = "https://api-inference.huggingface.co/models"
 MAX_AGENT_SPEED = 80
+# Curated HF model ids (small → large)
 HF_MODELS = [
+    "google/flan-t5-small",
+    "google/flan-t5-base",
+    "google/flan-t5-large",
+    "bigscience/bloom-3b",
+    "EleutherAI/gpt-neo-2.7B",
     "mistralai/Mistral-7B-Instruct-v0.2",
     "mistralai/Mistral-7B-Instruct-v0.1",
     "NousResearch/Nous-Hermes-2-7b",
+    "HuggingFaceH4/zephyr-7b",
+    "tiiuae/falcon-7b-instruct",
+    "EleutherAI/gpt-j-6B",
     "meta-llama/Llama-2-7b-chat-hf",
     "meta-llama/Llama-2-13b-chat-hf",
+    "meta-llama/Llama-2-70b-chat-hf",
+    "bigscience/bloom-176b",
+    "stabilityai/stablelm-tuned-alpha-3b",
     "meta-llama/Llama-3-8b-Instruct",
 ]
 def is_ready():
+    """Check if HF inference token is available."""
+    return _HF_API_TOKEN is not None
 def _is_hf_model(model_id: str) -> bool:
     return model_id in HF_MODELS
 {{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": null, "message": "<sentence>", "reasoning": "<sentence>"}}"""
     try:
+        # Always prefer HF models — if agent requested a HF model use it, otherwise
+        # route to a default HF model from the list.
+        target_model = agent.model_name if _is_hf_model(agent.model_name) else HF_MODELS[0]
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.post(
+                f"{_HF_API_BASE}/{target_model}",
+                headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
+                json={
+                    "inputs": system_prompt,
+                    "parameters": {
+                        "max_new_tokens": 200,
+                        "temperature": 0.7,
+                        "top_p": 0.9,
                     }
+                }
+            )
+            response.raise_for_status()
+            data = response.json()
+            if isinstance(data, list) and len(data) > 0:
+                text = data[0].get("generated_text", "")
+            else:
+                text = data.get("generated_text", "")
+            text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
+            try:
+                json_start = text.find('{')
+                json_end = text.rfind('}') + 1
+                if json_start >= 0 and json_end > json_start:
+                    json_str = text[json_start:json_end]
+                    decision = json.loads(json_str)
                 else:
                     decision = {}
+            except json.JSONDecodeError:
+                decision = {}
         action = decision.get("action", "escape")
         if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:

backend/app/hf_spaces.py CHANGED Viewed

@@ -1,114 +1,51 @@
 """
-Model registry for unified inference API (Groq + HF Spaces).
-All models are returned without backend categorization.
 """
 import os
-from . import groq_client
-HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
-# All available models from both backends (unified list)
 ALL_MODELS = [
-    # Premium Groq models (unlimited calls, high-quality)
-    {
-        "id": "mixtral-8x7b-32768",
-        "name": "Mixtral 8x7B",
-        "description": "High-performance 8x7B mixture of experts model",
-    },
-    {
-        "id": "llama2-70b-4096",
-        "name": "Llama 2 70B",
-        "description": "Meta's large 70B instruction-tuned model",
-    },
-    # Open-source HF models - Fast & Reliable
-    {
-        "id": "mistralai/Mistral-7B-Instruct-v0.2",
-        "name": "Mistral 7B Instruct v0.2",
-        "description": "Fast, reliable 7B instruction-tuned model",
-    },
-    {
-        "id": "mistralai/Mistral-7B-Instruct-v0.1",
-        "name": "Mistral 7B Instruct v0.1",
-        "description": "Original Mistral 7B instruct version",
-    },
-    {
-        "id": "HuggingFaceH4/zephyr-7b-beta",
-        "name": "Zephyr 7B Beta",
-        "description": "HF's high-quality 7B chat model",
-    },
-    {
-        "id": "HuggingFaceH4/zephyr-7b",
-        "name": "Zephyr 7B",
-        "description": "Fast, well-aligned 7B model",
-    },
-    # Open-source HF models - Quality-Focused
-    {
-        "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
-        "name": "Nous Hermes 2 Mistral",
-        "description": "High-quality 7B with DPO training",
-    },
-    {
-        "id": "NousResearch/Nous-Hermes-2-7b",
-        "name": "Nous Hermes 2 7B",
-        "description": "Quality-focused 7B model",
-    },
-    # Open-source HF models - Meta's Llama
-    {
-        "id": "meta-llama/Llama-2-7b-chat-hf",
-        "name": "Llama 2 7B Chat",
-        "description": "Meta's Llama 2 7B chat variant",
-    },
-    {
-        "id": "meta-llama/Llama-2-13b-chat-hf",
-        "name": "Llama 2 13B Chat",
-        "description": "Meta's Llama 2 13B chat variant",
-    },
-    {
-        "id": "meta-llama/Llama-3-8b-Instruct",
-        "name": "Llama 3 8B Instruct",
-        "description": "Meta's latest Llama 3 8B model",
-    },
-    # Open-source HF models - Google & Others
-    {
-        "id": "google/flan-t5-large",
-        "name": "FLAN-T5 Large",
-        "description": "Google's instruction-tuned T5 model",
-    },
-    {
-        "id": "google/flan-t5-base",
-        "name": "FLAN-T5 Base",
-        "description": "Google's FLAN-T5 base variant",
-    },
-    {
-        "id": "tiiuae/falcon-7b-instruct",
-        "name": "Falcon 7B Instruct",
-        "description": "TII's Falcon 7B instruction-tuned",
-    },
-    {
-        "id": "EleutherAI/gpt-j-6B",
-        "name": "GPT-J 6B",
-        "description": "EleutherAI's 6B GPT model",
-    },
 ]
 async def get_available_models() -> dict:
-    """
-    Get unified list of all available models (Groq + HF).
-    Frontend receives models without backend categorization.
-    """
-    return {
-        "models": ALL_MODELS,
-        "total": len(ALL_MODELS),
-    }
 def get_model_display_name(model_id: str) -> str:
-    """Get clean display name from model ID."""
-    for model in ALL_MODELS:
-        if model["id"] == model_id:
-            return model["name"]
-    # Fallback
     return model_id.split("/")[-1].split("-")[0].capitalize()

 """
+Model registry: return only Hugging Face models (no Groq entries).
+This file lists a curated set of small, medium and large HF models
+to populate the frontend model selector.
 """
 import os
+HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
+# Curated HF model list grouped by rough size/role. This list focuses on
+# open-source models available via the HF Inference API. Availability
+# depends on your HF account and token privileges.
 ALL_MODELS = [
+    # Small / efficient
+    {"id": "google/flan-t5-small", "name": "FLAN-T5 Small", "size": "small"},
+    {"id": "google/flan-t5-base", "name": "FLAN-T5 Base", "size": "small"},
+    {"id": "google/flan-t5-large", "name": "FLAN-T5 Large", "size": "medium"},
+    {"id": "bigscience/bloom-3b", "name": "BLOOM 3B", "size": "medium"},
+    {"id": "EleutherAI/gpt-neo-2.7B", "name": "GPT-Neo 2.7B", "size": "medium"},
+    # Mid-size / strong instruction-tuned
+    {"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B Instruct v0.2", "size": "medium"},
+    {"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B Instruct v0.1", "size": "medium"},
+    {"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
+    {"id": "HuggingFaceH4/zephyr-7b", "name": "Zephyr 7B", "size": "medium"},
+    {"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
+    {"id": "EleutherAI/gpt-j-6B", "name": "GPT-J 6B", "size": "medium"},
+    # Large / chat-capable
+    {"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
+    {"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
+    {"id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama 2 70B Chat", "size": "xlarge"},
+    {"id": "bigscience/bloom-176b", "name": "BLOOM 176B", "size": "xlarge"},
+    # Other notable models
+    {"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
+    {"id": "meta-llama/Llama-3-8b-Instruct", "name": "Llama 3 8B Instruct", "size": "large"},
 ]
 async def get_available_models() -> dict:
+    """Return unified HF-only list for the frontend."""
+    return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
 def get_model_display_name(model_id: str) -> str:
+    for m in ALL_MODELS:
+        if m["id"] == model_id:
+            return m["name"]
     return model_id.split("/")[-1].split("-")[0].capitalize()