Spaces:
Sleeping
Sleeping
adityaverma977 commited on
Commit ·
593e9c0
1
Parent(s): d4d710c
Remove Groq models; switch registry and clients to HF-only curated list
Browse files- app/groq_client.py +40 -75
- app/hf_spaces.py +25 -91
- backend/app/groq_client.py +49 -127
- backend/app/hf_spaces.py +36 -99
app/groq_client.py
CHANGED
|
@@ -3,22 +3,20 @@ import os
|
|
| 3 |
import random
|
| 4 |
import math
|
| 5 |
import httpx
|
| 6 |
-
from groq import AsyncGroq
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
-
_GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 12 |
_HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
|
| 13 |
-
_client = AsyncGroq(api_key=_GROQ_API_KEY) if _GROQ_API_KEY else None
|
| 14 |
_HF_API_BASE = "https://api-inference.huggingface.co/models"
|
| 15 |
|
| 16 |
-
|
|
|
|
| 17 |
MAX_AGENT_SPEED = 80
|
| 18 |
|
| 19 |
|
| 20 |
def is_ready():
|
| 21 |
-
return
|
| 22 |
|
| 23 |
|
| 24 |
def _build_fire_state_summary(agent, fire, all_agents) -> str:
|
|
@@ -119,77 +117,44 @@ RECENT RADIO CHAT:
|
|
| 119 |
What do you do?"""
|
| 120 |
|
| 121 |
try:
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
{"
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
|
| 139 |
-
action = "collect_water"
|
| 140 |
-
elif agent.water_collected and dist_to_fire <= 350:
|
| 141 |
-
action = "extinguish_fire"
|
| 142 |
-
|
| 143 |
-
return {
|
| 144 |
-
"action": action,
|
| 145 |
-
"vote_for": decision.get("vote_for"),
|
| 146 |
-
"message": decision.get("message", "Moving strategically."),
|
| 147 |
-
"reasoning": decision.get("reasoning", "Survival and teamwork.")
|
| 148 |
-
}
|
| 149 |
-
except Exception as e:
|
| 150 |
-
# If Groq fails (rate limits, network), try a HF fallback when possible
|
| 151 |
-
print(f"Error calling groq for {agent.model_name}: {e}")
|
| 152 |
-
err = str(e).lower()
|
| 153 |
-
if _HF_API_TOKEN and ("rate limit" in err or "rate_limit" in err or "429" in err):
|
| 154 |
-
fallback_hf = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 155 |
try:
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
|
| 180 |
-
action = "collect_water"
|
| 181 |
-
elif agent.water_collected and dist_to_fire <= 350:
|
| 182 |
-
action = "extinguish_fire"
|
| 183 |
-
|
| 184 |
-
return {
|
| 185 |
-
"action": action,
|
| 186 |
-
"vote_for": decision.get("vote_for"),
|
| 187 |
-
"message": decision.get("message", "Moving strategically."),
|
| 188 |
-
"reasoning": decision.get("reasoning", "Survival and teamwork.")
|
| 189 |
-
}
|
| 190 |
-
except Exception as e2:
|
| 191 |
-
print(f"HF fallback failed: {e2}")
|
| 192 |
-
return _fallback_escape(agent, fire)
|
| 193 |
return _fallback_escape(agent, fire)
|
| 194 |
|
| 195 |
|
|
|
|
| 3 |
import random
|
| 4 |
import math
|
| 5 |
import httpx
|
|
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
load_dotenv()
|
| 9 |
|
|
|
|
| 10 |
_HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
|
|
|
|
| 11 |
_HF_API_BASE = "https://api-inference.huggingface.co/models"
|
| 12 |
|
| 13 |
+
# Default HF fallback
|
| 14 |
+
DEFAULT_DECISION_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 15 |
MAX_AGENT_SPEED = 80
|
| 16 |
|
| 17 |
|
| 18 |
def is_ready():
|
| 19 |
+
return _HF_API_TOKEN is not None
|
| 20 |
|
| 21 |
|
| 22 |
def _build_fire_state_summary(agent, fire, all_agents) -> str:
|
|
|
|
| 117 |
What do you do?"""
|
| 118 |
|
| 119 |
try:
|
| 120 |
+
# Use HF Inference API directly for the requested model (or default)
|
| 121 |
+
target_model = agent.model_name if agent.model_name else DEFAULT_DECISION_MODEL
|
| 122 |
+
async with httpx.AsyncClient(timeout=15.0) as client:
|
| 123 |
+
resp = await client.post(
|
| 124 |
+
f"{_HF_API_BASE}/{target_model}",
|
| 125 |
+
headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
|
| 126 |
+
json={"inputs": system_prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.7}},
|
| 127 |
+
)
|
| 128 |
+
resp.raise_for_status()
|
| 129 |
+
data = resp.json()
|
| 130 |
+
if isinstance(data, list) and len(data) > 0:
|
| 131 |
+
text = data[0].get("generated_text", "")
|
| 132 |
+
else:
|
| 133 |
+
text = data.get("generated_text", "")
|
| 134 |
+
text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
try:
|
| 136 |
+
js = text[text.find('{'):text.rfind('}')+1]
|
| 137 |
+
decision = json.loads(js)
|
| 138 |
+
except Exception:
|
| 139 |
+
decision = {}
|
| 140 |
+
|
| 141 |
+
action = decision.get("action", "escape")
|
| 142 |
+
if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
|
| 143 |
+
action = "escape"
|
| 144 |
+
|
| 145 |
+
if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
|
| 146 |
+
action = "collect_water"
|
| 147 |
+
elif agent.water_collected and dist_to_fire <= 350:
|
| 148 |
+
action = "extinguish_fire"
|
| 149 |
+
|
| 150 |
+
return {
|
| 151 |
+
"action": action,
|
| 152 |
+
"vote_for": decision.get("vote_for"),
|
| 153 |
+
"message": decision.get("message", "Moving strategically."),
|
| 154 |
+
"reasoning": decision.get("reasoning", "Survival and teamwork.")
|
| 155 |
+
}
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"HF inference failed for {agent.model_name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
return _fallback_escape(agent, fire)
|
| 159 |
|
| 160 |
|
app/hf_spaces.py
CHANGED
|
@@ -5,102 +5,36 @@ import os
|
|
| 5 |
import httpx
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
-
HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN"
|
| 9 |
-
|
| 10 |
-
#
|
| 11 |
-
|
| 12 |
-
{
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
},
|
| 24 |
-
{
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"id": "HuggingFaceH4/zephyr-7b",
|
| 32 |
-
"name": "Zephyr-7B",
|
| 33 |
-
"space_url": "https://huggingface.co/spaces/HuggingFaceH4/zephyr-7b-beta",
|
| 34 |
-
"description": "Zephyr 7B fine-tuned model",
|
| 35 |
-
},
|
| 36 |
-
{
|
| 37 |
-
"id": "teknium/OpenHermes-2.5-Mistral-7B",
|
| 38 |
-
"name": "OpenHermes-7B",
|
| 39 |
-
"space_url": "https://huggingface.co/spaces/teknium/OpenHermes-2.5-Mistral-7B",
|
| 40 |
-
"description": "OpenHermes instruction-tuned 7B",
|
| 41 |
-
},
|
| 42 |
-
]
|
| 43 |
-
|
| 44 |
-
# Groq models (built-in)
|
| 45 |
-
GROQ_MODELS = [
|
| 46 |
-
{"id": "mixtral-8x7b-32768", "name": "Mixtral 8x7B", "backend": "groq"},
|
| 47 |
-
{"id": "llama2-70b-4096", "name": "Llama 2 70B", "backend": "groq"},
|
| 48 |
]
|
| 49 |
|
| 50 |
|
| 51 |
async def get_available_models() -> dict:
|
| 52 |
-
"""
|
| 53 |
-
Get list of available models from Groq and HF Spaces.
|
| 54 |
-
Returns both for frontend model selector.
|
| 55 |
-
"""
|
| 56 |
-
return {
|
| 57 |
-
"groq_models": GROQ_MODELS,
|
| 58 |
-
"hf_spaces_models": KNOWN_SPACES_MODELS,
|
| 59 |
-
"total": len(GROQ_MODELS) + len(KNOWN_SPACES_MODELS),
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
async def query_hf_space_model(model_id: str, prompt: str) -> Optional[str]:
|
| 64 |
-
"""
|
| 65 |
-
Query a model on HuggingFace Spaces.
|
| 66 |
-
This is a fallback if we want to use HF spaces directly.
|
| 67 |
-
Note: HF spaces may have rate limits and require authentication.
|
| 68 |
-
"""
|
| 69 |
-
if not HF_API_TOKEN:
|
| 70 |
-
return None
|
| 71 |
-
|
| 72 |
-
# Try to find the space URL for this model
|
| 73 |
-
space = next((m for m in KNOWN_SPACES_MODELS if m["id"] == model_id), None)
|
| 74 |
-
if not space:
|
| 75 |
-
return None
|
| 76 |
-
|
| 77 |
-
try:
|
| 78 |
-
# This would hit the HF inference API
|
| 79 |
-
# For now, we focus on Groq which is more reliable
|
| 80 |
-
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 81 |
-
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 82 |
-
response = await client.post(
|
| 83 |
-
"https://api-inference.huggingface.co/models/" + model_id,
|
| 84 |
-
json={"inputs": prompt},
|
| 85 |
-
headers=headers,
|
| 86 |
-
)
|
| 87 |
-
if response.status_code == 200:
|
| 88 |
-
result = response.json()
|
| 89 |
-
# Extract generated text from response
|
| 90 |
-
if isinstance(result, list) and len(result) > 0:
|
| 91 |
-
return result[0].get("generated_text", "")
|
| 92 |
-
except Exception as e:
|
| 93 |
-
print(f"Error querying HF space {model_id}: {e}")
|
| 94 |
-
|
| 95 |
-
return None
|
| 96 |
|
| 97 |
|
| 98 |
def get_model_display_name(model_id: str) -> str:
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
if model["id"] == model_id:
|
| 103 |
-
return model["name"]
|
| 104 |
-
|
| 105 |
-
# Fallback: clean up the ID
|
| 106 |
return model_id.split("/")[-1].split("-")[0].capitalize()
|
|
|
|
| 5 |
import httpx
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
+
HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") or os.environ.get("HF_API_TOKEN")
|
| 9 |
+
|
| 10 |
+
# Unified HF-only list for the frontend (curated small→large)
|
| 11 |
+
ALL_MODELS = [
|
| 12 |
+
{"id": "google/flan-t5-small", "name": "FLAN-T5 Small", "size": "small"},
|
| 13 |
+
{"id": "google/flan-t5-base", "name": "FLAN-T5 Base", "size": "small"},
|
| 14 |
+
{"id": "google/flan-t5-large", "name": "FLAN-T5 Large", "size": "medium"},
|
| 15 |
+
{"id": "bigscience/bloom-3b", "name": "BLOOM 3B", "size": "medium"},
|
| 16 |
+
{"id": "EleutherAI/gpt-neo-2.7B", "name": "GPT-Neo 2.7B", "size": "medium"},
|
| 17 |
+
{"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B Instruct v0.2", "size": "medium"},
|
| 18 |
+
{"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B Instruct v0.1", "size": "medium"},
|
| 19 |
+
{"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
|
| 20 |
+
{"id": "HuggingFaceH4/zephyr-7b", "name": "Zephyr 7B", "size": "medium"},
|
| 21 |
+
{"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
|
| 22 |
+
{"id": "EleutherAI/gpt-j-6B", "name": "GPT-J 6B", "size": "medium"},
|
| 23 |
+
{"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
|
| 24 |
+
{"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
|
| 25 |
+
{"id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama 2 70B Chat", "size": "xlarge"},
|
| 26 |
+
{"id": "bigscience/bloom-176b", "name": "BLOOM 176B", "size": "xlarge"},
|
| 27 |
+
{"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
|
| 28 |
+
{"id": "meta-llama/Llama-3-8b-Instruct", "name": "Llama 3 8B Instruct", "size": "large"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
]
|
| 30 |
|
| 31 |
|
| 32 |
async def get_available_models() -> dict:
|
| 33 |
+
return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
def get_model_display_name(model_id: str) -> str:
|
| 37 |
+
for m in ALL_MODELS:
|
| 38 |
+
if m["id"] == model_id:
|
| 39 |
+
return m["name"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
return model_id.split("/")[-1].split("-")[0].capitalize()
|
backend/app/groq_client.py
CHANGED
|
@@ -3,69 +3,44 @@ import os
|
|
| 3 |
import random
|
| 4 |
import math
|
| 5 |
import httpx
|
| 6 |
-
from groq import AsyncGroq
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
-
|
| 12 |
-
# Accept either HF_API_TOKEN or HUGGINGFACE_API_TOKEN for compatibility
|
| 13 |
_HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
|
| 14 |
-
_groq_client = AsyncGroq(api_key=_GROQ_API_KEY) if _GROQ_API_KEY else None
|
| 15 |
_HF_API_BASE = "https://api-inference.huggingface.co/models"
|
| 16 |
|
| 17 |
MAX_AGENT_SPEED = 80
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
GROQ_PREMIUM_MODELS = [
|
| 21 |
-
"mixtral-8x7b-32768",
|
| 22 |
-
"llama2-70b-4096",
|
| 23 |
-
]
|
| 24 |
-
|
| 25 |
-
# Open-source models available via HF Inference API (unlimited calls)
|
| 26 |
-
# Expanded list of free HF models (add your API token to access)
|
| 27 |
HF_MODELS = [
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 30 |
"mistralai/Mistral-7B-Instruct-v0.1",
|
| 31 |
-
"HuggingFaceH4/zephyr-7b-beta",
|
| 32 |
-
"HuggingFaceH4/zephyr-7b",
|
| 33 |
-
# Quality-focused models
|
| 34 |
-
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
|
| 35 |
"NousResearch/Nous-Hermes-2-7b",
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
"meta-llama/Llama-2-7b-chat-hf",
|
| 38 |
"meta-llama/Llama-2-13b-chat-hf",
|
|
|
|
|
|
|
|
|
|
| 39 |
"meta-llama/Llama-3-8b-Instruct",
|
| 40 |
-
# Instruction-tuned models
|
| 41 |
-
"google/flan-t5-large",
|
| 42 |
-
"google/flan-t5-base",
|
| 43 |
-
# Falcon models
|
| 44 |
-
"tiiuae/falcon-7b-instruct",
|
| 45 |
-
# Other strong models
|
| 46 |
-
"EleutherAI/gpt-j-6B",
|
| 47 |
]
|
| 48 |
|
| 49 |
-
# Mapping from premium Groq models to reasonable HF fallback model IDs
|
| 50 |
-
# Used when Groq is unavailable but a HF token exists.
|
| 51 |
-
GROQ_TO_HF_FALLBACK = {
|
| 52 |
-
"mixtral-8x7b-32768": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 53 |
-
"llama2-70b-4096": "meta-llama/Llama-2-13b-chat-hf",
|
| 54 |
-
}
|
| 55 |
-
|
| 56 |
|
| 57 |
def is_ready():
|
| 58 |
-
"""Check if
|
| 59 |
-
return
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
def _is_groq_model(model_id: str) -> bool:
|
| 63 |
-
"""Check if model is a Groq premium model."""
|
| 64 |
-
return model_id in GROQ_PREMIUM_MODELS
|
| 65 |
|
| 66 |
|
| 67 |
def _is_hf_model(model_id: str) -> bool:
|
| 68 |
-
"""Check if model is a HF model."""
|
| 69 |
return model_id in HF_MODELS
|
| 70 |
|
| 71 |
|
|
@@ -166,96 +141,43 @@ Respond with ONLY valid JSON on a single line (no markdown, no code block):
|
|
| 166 |
{{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": null, "message": "<sentence>", "reasoning": "<sentence>"}}"""
|
| 167 |
|
| 168 |
try:
|
| 169 |
-
if
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
# try to route the decision to a HF fallback model when possible.
|
| 184 |
-
elif _is_groq_model(agent.model_name) and not _groq_client and _HF_API_TOKEN:
|
| 185 |
-
fallback_model = GROQ_TO_HF_FALLBACK.get(agent.model_name)
|
| 186 |
-
if not fallback_model:
|
| 187 |
-
return _fallback_escape(agent, fire)
|
| 188 |
-
|
| 189 |
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 190 |
-
response = await client.post(
|
| 191 |
-
f"{_HF_API_BASE}/{fallback_model}",
|
| 192 |
-
headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
|
| 193 |
-
json={
|
| 194 |
-
"inputs": system_prompt,
|
| 195 |
-
"parameters": {
|
| 196 |
-
"max_new_tokens": 200,
|
| 197 |
-
"temperature": 0.7,
|
| 198 |
-
"top_p": 0.9,
|
| 199 |
-
}
|
| 200 |
}
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
decision = {}
|
| 220 |
-
except json.JSONDecodeError:
|
| 221 |
-
decision = {}
|
| 222 |
-
elif _is_hf_model(agent.model_name) and _HF_API_TOKEN:
|
| 223 |
-
# Use HF Inference API for open-source models
|
| 224 |
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 225 |
-
response = await client.post(
|
| 226 |
-
f"{_HF_API_BASE}/{agent.model_name}",
|
| 227 |
-
headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
|
| 228 |
-
json={
|
| 229 |
-
"inputs": system_prompt,
|
| 230 |
-
"parameters": {
|
| 231 |
-
"max_new_tokens": 200,
|
| 232 |
-
"temperature": 0.7,
|
| 233 |
-
"top_p": 0.9,
|
| 234 |
-
}
|
| 235 |
-
}
|
| 236 |
-
)
|
| 237 |
-
response.raise_for_status()
|
| 238 |
-
data = response.json()
|
| 239 |
-
|
| 240 |
-
if isinstance(data, list) and len(data) > 0:
|
| 241 |
-
text = data[0].get("generated_text", "")
|
| 242 |
else:
|
| 243 |
-
text = data.get("generated_text", "")
|
| 244 |
-
|
| 245 |
-
text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
|
| 246 |
-
|
| 247 |
-
try:
|
| 248 |
-
json_start = text.find('{')
|
| 249 |
-
json_end = text.rfind('}') + 1
|
| 250 |
-
if json_start >= 0 and json_end > json_start:
|
| 251 |
-
json_str = text[json_start:json_end]
|
| 252 |
-
decision = json.loads(json_str)
|
| 253 |
-
else:
|
| 254 |
-
decision = {}
|
| 255 |
-
except json.JSONDecodeError:
|
| 256 |
decision = {}
|
| 257 |
-
|
| 258 |
-
|
| 259 |
|
| 260 |
action = decision.get("action", "escape")
|
| 261 |
if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
|
|
|
|
| 3 |
import random
|
| 4 |
import math
|
| 5 |
import httpx
|
|
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
load_dotenv()
|
| 9 |
|
| 10 |
+
# Use HF tokens only — Groq models removed from registry
|
|
|
|
| 11 |
_HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
|
|
|
|
| 12 |
_HF_API_BASE = "https://api-inference.huggingface.co/models"
|
| 13 |
|
| 14 |
MAX_AGENT_SPEED = 80
|
| 15 |
|
| 16 |
+
# Curated HF model ids (small → large)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
HF_MODELS = [
|
| 18 |
+
"google/flan-t5-small",
|
| 19 |
+
"google/flan-t5-base",
|
| 20 |
+
"google/flan-t5-large",
|
| 21 |
+
"bigscience/bloom-3b",
|
| 22 |
+
"EleutherAI/gpt-neo-2.7B",
|
| 23 |
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 24 |
"mistralai/Mistral-7B-Instruct-v0.1",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"NousResearch/Nous-Hermes-2-7b",
|
| 26 |
+
"HuggingFaceH4/zephyr-7b",
|
| 27 |
+
"tiiuae/falcon-7b-instruct",
|
| 28 |
+
"EleutherAI/gpt-j-6B",
|
| 29 |
"meta-llama/Llama-2-7b-chat-hf",
|
| 30 |
"meta-llama/Llama-2-13b-chat-hf",
|
| 31 |
+
"meta-llama/Llama-2-70b-chat-hf",
|
| 32 |
+
"bigscience/bloom-176b",
|
| 33 |
+
"stabilityai/stablelm-tuned-alpha-3b",
|
| 34 |
"meta-llama/Llama-3-8b-Instruct",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
]
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def is_ready():
|
| 39 |
+
"""Check if HF inference token is available."""
|
| 40 |
+
return _HF_API_TOKEN is not None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def _is_hf_model(model_id: str) -> bool:
|
|
|
|
| 44 |
return model_id in HF_MODELS
|
| 45 |
|
| 46 |
|
|
|
|
| 141 |
{{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": null, "message": "<sentence>", "reasoning": "<sentence>"}}"""
|
| 142 |
|
| 143 |
try:
|
| 144 |
+
# Always prefer HF models — if agent requested a HF model use it, otherwise
|
| 145 |
+
# route to a default HF model from the list.
|
| 146 |
+
target_model = agent.model_name if _is_hf_model(agent.model_name) else HF_MODELS[0]
|
| 147 |
+
|
| 148 |
+
async with httpx.AsyncClient(timeout=15.0) as client:
|
| 149 |
+
response = await client.post(
|
| 150 |
+
f"{_HF_API_BASE}/{target_model}",
|
| 151 |
+
headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
|
| 152 |
+
json={
|
| 153 |
+
"inputs": system_prompt,
|
| 154 |
+
"parameters": {
|
| 155 |
+
"max_new_tokens": 200,
|
| 156 |
+
"temperature": 0.7,
|
| 157 |
+
"top_p": 0.9,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
}
|
| 159 |
+
}
|
| 160 |
+
)
|
| 161 |
+
response.raise_for_status()
|
| 162 |
+
data = response.json()
|
| 163 |
+
|
| 164 |
+
if isinstance(data, list) and len(data) > 0:
|
| 165 |
+
text = data[0].get("generated_text", "")
|
| 166 |
+
else:
|
| 167 |
+
text = data.get("generated_text", "")
|
| 168 |
+
|
| 169 |
+
text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
json_start = text.find('{')
|
| 173 |
+
json_end = text.rfind('}') + 1
|
| 174 |
+
if json_start >= 0 and json_end > json_start:
|
| 175 |
+
json_str = text[json_start:json_end]
|
| 176 |
+
decision = json.loads(json_str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
decision = {}
|
| 179 |
+
except json.JSONDecodeError:
|
| 180 |
+
decision = {}
|
| 181 |
|
| 182 |
action = decision.get("action", "escape")
|
| 183 |
if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
|
backend/app/hf_spaces.py
CHANGED
|
@@ -1,114 +1,51 @@
|
|
| 1 |
"""
|
| 2 |
-
Model registry
|
| 3 |
-
|
|
|
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
-
from . import groq_client
|
| 7 |
|
| 8 |
-
HF_API_TOKEN = os.environ.get("HF_API_TOKEN"
|
| 9 |
|
| 10 |
-
#
|
|
|
|
|
|
|
| 11 |
ALL_MODELS = [
|
| 12 |
-
#
|
| 13 |
-
{
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
},
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
},
|
| 23 |
-
|
| 24 |
-
{
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
},
|
| 29 |
-
{
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
{
|
| 35 |
-
|
| 36 |
-
"name": "Zephyr 7B Beta",
|
| 37 |
-
"description": "HF's high-quality 7B chat model",
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"id": "HuggingFaceH4/zephyr-7b",
|
| 41 |
-
"name": "Zephyr 7B",
|
| 42 |
-
"description": "Fast, well-aligned 7B model",
|
| 43 |
-
},
|
| 44 |
-
# Open-source HF models - Quality-Focused
|
| 45 |
-
{
|
| 46 |
-
"id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
|
| 47 |
-
"name": "Nous Hermes 2 Mistral",
|
| 48 |
-
"description": "High-quality 7B with DPO training",
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"id": "NousResearch/Nous-Hermes-2-7b",
|
| 52 |
-
"name": "Nous Hermes 2 7B",
|
| 53 |
-
"description": "Quality-focused 7B model",
|
| 54 |
-
},
|
| 55 |
-
# Open-source HF models - Meta's Llama
|
| 56 |
-
{
|
| 57 |
-
"id": "meta-llama/Llama-2-7b-chat-hf",
|
| 58 |
-
"name": "Llama 2 7B Chat",
|
| 59 |
-
"description": "Meta's Llama 2 7B chat variant",
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"id": "meta-llama/Llama-2-13b-chat-hf",
|
| 63 |
-
"name": "Llama 2 13B Chat",
|
| 64 |
-
"description": "Meta's Llama 2 13B chat variant",
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
"id": "meta-llama/Llama-3-8b-Instruct",
|
| 68 |
-
"name": "Llama 3 8B Instruct",
|
| 69 |
-
"description": "Meta's latest Llama 3 8B model",
|
| 70 |
-
},
|
| 71 |
-
# Open-source HF models - Google & Others
|
| 72 |
-
{
|
| 73 |
-
"id": "google/flan-t5-large",
|
| 74 |
-
"name": "FLAN-T5 Large",
|
| 75 |
-
"description": "Google's instruction-tuned T5 model",
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"id": "google/flan-t5-base",
|
| 79 |
-
"name": "FLAN-T5 Base",
|
| 80 |
-
"description": "Google's FLAN-T5 base variant",
|
| 81 |
-
},
|
| 82 |
-
{
|
| 83 |
-
"id": "tiiuae/falcon-7b-instruct",
|
| 84 |
-
"name": "Falcon 7B Instruct",
|
| 85 |
-
"description": "TII's Falcon 7B instruction-tuned",
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
"id": "EleutherAI/gpt-j-6B",
|
| 89 |
-
"name": "GPT-J 6B",
|
| 90 |
-
"description": "EleutherAI's 6B GPT model",
|
| 91 |
-
},
|
| 92 |
]
|
| 93 |
|
| 94 |
|
| 95 |
-
|
| 96 |
async def get_available_models() -> dict:
|
| 97 |
-
"""
|
| 98 |
-
|
| 99 |
-
Frontend receives models without backend categorization.
|
| 100 |
-
"""
|
| 101 |
-
return {
|
| 102 |
-
"models": ALL_MODELS,
|
| 103 |
-
"total": len(ALL_MODELS),
|
| 104 |
-
}
|
| 105 |
|
| 106 |
|
| 107 |
def get_model_display_name(model_id: str) -> str:
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
return model["name"]
|
| 112 |
-
# Fallback
|
| 113 |
return model_id.split("/")[-1].split("-")[0].capitalize()
|
| 114 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
Model registry: return only Hugging Face models (no Groq entries).
|
| 3 |
+
This file lists a curated set of small, medium and large HF models
|
| 4 |
+
to populate the frontend model selector.
|
| 5 |
"""
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
+
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
|
| 9 |
|
| 10 |
+
# Curated HF model list grouped by rough size/role. This list focuses on
|
| 11 |
+
# open-source models available via the HF Inference API. Availability
|
| 12 |
+
# depends on your HF account and token privileges.
|
| 13 |
ALL_MODELS = [
|
| 14 |
+
# Small / efficient
|
| 15 |
+
{"id": "google/flan-t5-small", "name": "FLAN-T5 Small", "size": "small"},
|
| 16 |
+
{"id": "google/flan-t5-base", "name": "FLAN-T5 Base", "size": "small"},
|
| 17 |
+
{"id": "google/flan-t5-large", "name": "FLAN-T5 Large", "size": "medium"},
|
| 18 |
+
{"id": "bigscience/bloom-3b", "name": "BLOOM 3B", "size": "medium"},
|
| 19 |
+
{"id": "EleutherAI/gpt-neo-2.7B", "name": "GPT-Neo 2.7B", "size": "medium"},
|
| 20 |
+
|
| 21 |
+
# Mid-size / strong instruction-tuned
|
| 22 |
+
{"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B Instruct v0.2", "size": "medium"},
|
| 23 |
+
{"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B Instruct v0.1", "size": "medium"},
|
| 24 |
+
{"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
|
| 25 |
+
{"id": "HuggingFaceH4/zephyr-7b", "name": "Zephyr 7B", "size": "medium"},
|
| 26 |
+
{"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
|
| 27 |
+
{"id": "EleutherAI/gpt-j-6B", "name": "GPT-J 6B", "size": "medium"},
|
| 28 |
+
|
| 29 |
+
# Large / chat-capable
|
| 30 |
+
{"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
|
| 31 |
+
{"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
|
| 32 |
+
{"id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama 2 70B Chat", "size": "xlarge"},
|
| 33 |
+
{"id": "bigscience/bloom-176b", "name": "BLOOM 176B", "size": "xlarge"},
|
| 34 |
+
|
| 35 |
+
# Other notable models
|
| 36 |
+
{"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
|
| 37 |
+
{"id": "meta-llama/Llama-3-8b-Instruct", "name": "Llama 3 8B Instruct", "size": "large"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
]
|
| 39 |
|
| 40 |
|
|
|
|
| 41 |
async def get_available_models() -> dict:
|
| 42 |
+
"""Return unified HF-only list for the frontend."""
|
| 43 |
+
return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
def get_model_display_name(model_id: str) -> str:
|
| 47 |
+
for m in ALL_MODELS:
|
| 48 |
+
if m["id"] == model_id:
|
| 49 |
+
return m["name"]
|
|
|
|
|
|
|
| 50 |
return model_id.split("/")[-1].split("-")[0].capitalize()
|
| 51 |
|