adityaverma977 commited on
Commit
593e9c0
·
1 Parent(s): d4d710c

Remove Groq models; switch registry and clients to HF-only curated list

Browse files
app/groq_client.py CHANGED
@@ -3,22 +3,20 @@ import os
3
  import random
4
  import math
5
  import httpx
6
- from groq import AsyncGroq
7
  from dotenv import load_dotenv
8
 
9
  load_dotenv()
10
 
11
- _GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
  _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
13
- _client = AsyncGroq(api_key=_GROQ_API_KEY) if _GROQ_API_KEY else None
14
  _HF_API_BASE = "https://api-inference.huggingface.co/models"
15
 
16
- DEFAULT_DECISION_MODEL = "mixtral-8x7b-32768"
 
17
  MAX_AGENT_SPEED = 80
18
 
19
 
20
  def is_ready():
21
- return _client is not None
22
 
23
 
24
  def _build_fire_state_summary(agent, fire, all_agents) -> str:
@@ -119,77 +117,44 @@ RECENT RADIO CHAT:
119
  What do you do?"""
120
 
121
  try:
122
- completion = await _client.chat.completions.create(
123
- model=DEFAULT_DECISION_MODEL,
124
- messages=[
125
- {"role": "system", "content": system_prompt},
126
- {"role": "user", "content": "Make your decision."}
127
- ],
128
- response_format={"type": "json_object"},
129
- max_tokens=150,
130
- timeout=3.0
131
- )
132
- decision = json.loads(completion.choices[0].message.content)
133
-
134
- action = decision.get("action", "escape")
135
- if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
136
- action = "escape"
137
-
138
- if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
139
- action = "collect_water"
140
- elif agent.water_collected and dist_to_fire <= 350:
141
- action = "extinguish_fire"
142
-
143
- return {
144
- "action": action,
145
- "vote_for": decision.get("vote_for"),
146
- "message": decision.get("message", "Moving strategically."),
147
- "reasoning": decision.get("reasoning", "Survival and teamwork.")
148
- }
149
- except Exception as e:
150
- # If Groq fails (rate limits, network), try a HF fallback when possible
151
- print(f"Error calling groq for {agent.model_name}: {e}")
152
- err = str(e).lower()
153
- if _HF_API_TOKEN and ("rate limit" in err or "rate_limit" in err or "429" in err):
154
- fallback_hf = "mistralai/Mistral-7B-Instruct-v0.2"
155
  try:
156
- async with httpx.AsyncClient(timeout=8.0) as client:
157
- resp = await client.post(
158
- f"{_HF_API_BASE}/{fallback_hf}",
159
- headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
160
- json={"inputs": system_prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.7}},
161
- )
162
- resp.raise_for_status()
163
- data = resp.json()
164
- if isinstance(data, list) and len(data) > 0:
165
- text = data[0].get("generated_text", "")
166
- else:
167
- text = data.get("generated_text", "")
168
- text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
169
- try:
170
- js = text[text.find('{'):text.rfind('}')+1]
171
- decision = json.loads(js)
172
- except Exception:
173
- decision = {}
174
-
175
- action = decision.get("action", "escape")
176
- if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
177
- action = "escape"
178
-
179
- if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
180
- action = "collect_water"
181
- elif agent.water_collected and dist_to_fire <= 350:
182
- action = "extinguish_fire"
183
-
184
- return {
185
- "action": action,
186
- "vote_for": decision.get("vote_for"),
187
- "message": decision.get("message", "Moving strategically."),
188
- "reasoning": decision.get("reasoning", "Survival and teamwork.")
189
- }
190
- except Exception as e2:
191
- print(f"HF fallback failed: {e2}")
192
- return _fallback_escape(agent, fire)
193
  return _fallback_escape(agent, fire)
194
 
195
 
 
3
  import random
4
  import math
5
  import httpx
 
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
9
 
 
10
  _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
 
11
  _HF_API_BASE = "https://api-inference.huggingface.co/models"
12
 
13
+ # Default HF fallback
14
+ DEFAULT_DECISION_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
15
  MAX_AGENT_SPEED = 80
16
 
17
 
18
  def is_ready():
19
+ return _HF_API_TOKEN is not None
20
 
21
 
22
  def _build_fire_state_summary(agent, fire, all_agents) -> str:
 
117
  What do you do?"""
118
 
119
  try:
120
+ # Use HF Inference API directly for the requested model (or default)
121
+ target_model = agent.model_name if agent.model_name else DEFAULT_DECISION_MODEL
122
+ async with httpx.AsyncClient(timeout=15.0) as client:
123
+ resp = await client.post(
124
+ f"{_HF_API_BASE}/{target_model}",
125
+ headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
126
+ json={"inputs": system_prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.7}},
127
+ )
128
+ resp.raise_for_status()
129
+ data = resp.json()
130
+ if isinstance(data, list) and len(data) > 0:
131
+ text = data[0].get("generated_text", "")
132
+ else:
133
+ text = data.get("generated_text", "")
134
+ text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  try:
136
+ js = text[text.find('{'):text.rfind('}')+1]
137
+ decision = json.loads(js)
138
+ except Exception:
139
+ decision = {}
140
+
141
+ action = decision.get("action", "escape")
142
+ if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
143
+ action = "escape"
144
+
145
+ if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
146
+ action = "collect_water"
147
+ elif agent.water_collected and dist_to_fire <= 350:
148
+ action = "extinguish_fire"
149
+
150
+ return {
151
+ "action": action,
152
+ "vote_for": decision.get("vote_for"),
153
+ "message": decision.get("message", "Moving strategically."),
154
+ "reasoning": decision.get("reasoning", "Survival and teamwork.")
155
+ }
156
+ except Exception as e:
157
+ print(f"HF inference failed for {agent.model_name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  return _fallback_escape(agent, fire)
159
 
160
 
app/hf_spaces.py CHANGED
@@ -5,102 +5,36 @@ import os
5
  import httpx
6
  from typing import Optional
7
 
8
- HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN", "")
9
-
10
- # Curated list of verified open-source models on HF Spaces that work reliably
11
- KNOWN_SPACES_MODELS = [
12
- {
13
- "id": "tiiuae/Falcon-7B",
14
- "name": "Falcon-7B",
15
- "space_url": "https://huggingface.co/spaces/tiiuae/falcon-chat",
16
- "description": "7B parameter open model",
17
- },
18
- {
19
- "id": "meta-llama/Llama-2-7b",
20
- "name": "Llama-2-7B",
21
- "space_url": "https://huggingface.co/spaces/meta-llama/Llama-2-7b-chat",
22
- "description": "Meta's 7B model",
23
- },
24
- {
25
- "id": "mistralai/Mistral-7B",
26
- "name": "Mistral-7B",
27
- "space_url": "https://huggingface.co/spaces/mistralai/Mistral-7B-Instruct-v0.1",
28
- "description": "Mistral's 7B model",
29
- },
30
- {
31
- "id": "HuggingFaceH4/zephyr-7b",
32
- "name": "Zephyr-7B",
33
- "space_url": "https://huggingface.co/spaces/HuggingFaceH4/zephyr-7b-beta",
34
- "description": "Zephyr 7B fine-tuned model",
35
- },
36
- {
37
- "id": "teknium/OpenHermes-2.5-Mistral-7B",
38
- "name": "OpenHermes-7B",
39
- "space_url": "https://huggingface.co/spaces/teknium/OpenHermes-2.5-Mistral-7B",
40
- "description": "OpenHermes instruction-tuned 7B",
41
- },
42
- ]
43
-
44
- # Groq models (built-in)
45
- GROQ_MODELS = [
46
- {"id": "mixtral-8x7b-32768", "name": "Mixtral 8x7B", "backend": "groq"},
47
- {"id": "llama2-70b-4096", "name": "Llama 2 70B", "backend": "groq"},
48
  ]
49
 
50
 
51
  async def get_available_models() -> dict:
52
- """
53
- Get list of available models from Groq and HF Spaces.
54
- Returns both for frontend model selector.
55
- """
56
- return {
57
- "groq_models": GROQ_MODELS,
58
- "hf_spaces_models": KNOWN_SPACES_MODELS,
59
- "total": len(GROQ_MODELS) + len(KNOWN_SPACES_MODELS),
60
- }
61
-
62
-
63
- async def query_hf_space_model(model_id: str, prompt: str) -> Optional[str]:
64
- """
65
- Query a model on HuggingFace Spaces.
66
- This is a fallback if we want to use HF spaces directly.
67
- Note: HF spaces may have rate limits and require authentication.
68
- """
69
- if not HF_API_TOKEN:
70
- return None
71
-
72
- # Try to find the space URL for this model
73
- space = next((m for m in KNOWN_SPACES_MODELS if m["id"] == model_id), None)
74
- if not space:
75
- return None
76
-
77
- try:
78
- # This would hit the HF inference API
79
- # For now, we focus on Groq which is more reliable
80
- async with httpx.AsyncClient(timeout=5.0) as client:
81
- headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
82
- response = await client.post(
83
- "https://api-inference.huggingface.co/models/" + model_id,
84
- json={"inputs": prompt},
85
- headers=headers,
86
- )
87
- if response.status_code == 200:
88
- result = response.json()
89
- # Extract generated text from response
90
- if isinstance(result, list) and len(result) > 0:
91
- return result[0].get("generated_text", "")
92
- except Exception as e:
93
- print(f"Error querying HF space {model_id}: {e}")
94
-
95
- return None
96
 
97
 
98
  def get_model_display_name(model_id: str) -> str:
99
- """Get a clean display name from model ID."""
100
- # Try to find in known models
101
- for model in GROQ_MODELS + KNOWN_SPACES_MODELS:
102
- if model["id"] == model_id:
103
- return model["name"]
104
-
105
- # Fallback: clean up the ID
106
  return model_id.split("/")[-1].split("-")[0].capitalize()
 
5
  import httpx
6
  from typing import Optional
7
 
8
+ HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") or os.environ.get("HF_API_TOKEN")
9
+
10
+ # Unified HF-only list for the frontend (curated small→large)
11
+ ALL_MODELS = [
12
+ {"id": "google/flan-t5-small", "name": "FLAN-T5 Small", "size": "small"},
13
+ {"id": "google/flan-t5-base", "name": "FLAN-T5 Base", "size": "small"},
14
+ {"id": "google/flan-t5-large", "name": "FLAN-T5 Large", "size": "medium"},
15
+ {"id": "bigscience/bloom-3b", "name": "BLOOM 3B", "size": "medium"},
16
+ {"id": "EleutherAI/gpt-neo-2.7B", "name": "GPT-Neo 2.7B", "size": "medium"},
17
+ {"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B Instruct v0.2", "size": "medium"},
18
+ {"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B Instruct v0.1", "size": "medium"},
19
+ {"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
20
+ {"id": "HuggingFaceH4/zephyr-7b", "name": "Zephyr 7B", "size": "medium"},
21
+ {"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
22
+ {"id": "EleutherAI/gpt-j-6B", "name": "GPT-J 6B", "size": "medium"},
23
+ {"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
24
+ {"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
25
+ {"id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama 2 70B Chat", "size": "xlarge"},
26
+ {"id": "bigscience/bloom-176b", "name": "BLOOM 176B", "size": "xlarge"},
27
+ {"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
28
+ {"id": "meta-llama/Llama-3-8b-Instruct", "name": "Llama 3 8B Instruct", "size": "large"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  ]
30
 
31
 
32
  async def get_available_models() -> dict:
33
+ return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
 
36
  def get_model_display_name(model_id: str) -> str:
37
+ for m in ALL_MODELS:
38
+ if m["id"] == model_id:
39
+ return m["name"]
 
 
 
 
40
  return model_id.split("/")[-1].split("-")[0].capitalize()
backend/app/groq_client.py CHANGED
@@ -3,69 +3,44 @@ import os
3
  import random
4
  import math
5
  import httpx
6
- from groq import AsyncGroq
7
  from dotenv import load_dotenv
8
 
9
  load_dotenv()
10
 
11
- _GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
- # Accept either HF_API_TOKEN or HUGGINGFACE_API_TOKEN for compatibility
13
  _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
14
- _groq_client = AsyncGroq(api_key=_GROQ_API_KEY) if _GROQ_API_KEY else None
15
  _HF_API_BASE = "https://api-inference.huggingface.co/models"
16
 
17
  MAX_AGENT_SPEED = 80
18
 
19
- # Premium Groq models (high-token limits, no rate limits for these)
20
- GROQ_PREMIUM_MODELS = [
21
- "mixtral-8x7b-32768",
22
- "llama2-70b-4096",
23
- ]
24
-
25
- # Open-source models available via HF Inference API (unlimited calls)
26
- # Expanded list of free HF models (add your API token to access)
27
  HF_MODELS = [
28
- # Fast, reliable models
 
 
 
 
29
  "mistralai/Mistral-7B-Instruct-v0.2",
30
  "mistralai/Mistral-7B-Instruct-v0.1",
31
- "HuggingFaceH4/zephyr-7b-beta",
32
- "HuggingFaceH4/zephyr-7b",
33
- # Quality-focused models
34
- "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
35
  "NousResearch/Nous-Hermes-2-7b",
36
- # Meta models
 
 
37
  "meta-llama/Llama-2-7b-chat-hf",
38
  "meta-llama/Llama-2-13b-chat-hf",
 
 
 
39
  "meta-llama/Llama-3-8b-Instruct",
40
- # Instruction-tuned models
41
- "google/flan-t5-large",
42
- "google/flan-t5-base",
43
- # Falcon models
44
- "tiiuae/falcon-7b-instruct",
45
- # Other strong models
46
- "EleutherAI/gpt-j-6B",
47
  ]
48
 
49
- # Mapping from premium Groq models to reasonable HF fallback model IDs
50
- # Used when Groq is unavailable but a HF token exists.
51
- GROQ_TO_HF_FALLBACK = {
52
- "mixtral-8x7b-32768": "mistralai/Mistral-7B-Instruct-v0.2",
53
- "llama2-70b-4096": "meta-llama/Llama-2-13b-chat-hf",
54
- }
55
-
56
 
57
  def is_ready():
58
- """Check if we have at least one backend available."""
59
- return _groq_client is not None or _HF_API_TOKEN is not None
60
-
61
-
62
- def _is_groq_model(model_id: str) -> bool:
63
- """Check if model is a Groq premium model."""
64
- return model_id in GROQ_PREMIUM_MODELS
65
 
66
 
67
  def _is_hf_model(model_id: str) -> bool:
68
- """Check if model is a HF model."""
69
  return model_id in HF_MODELS
70
 
71
 
@@ -166,96 +141,43 @@ Respond with ONLY valid JSON on a single line (no markdown, no code block):
166
  {{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": null, "message": "<sentence>", "reasoning": "<sentence>"}}"""
167
 
168
  try:
169
- if _is_groq_model(agent.model_name) and _groq_client:
170
- # Use Groq for premium models
171
- completion = await _groq_client.chat.completions.create(
172
- model=agent.model_name,
173
- messages=[
174
- {"role": "system", "content": system_prompt},
175
- {"role": "user", "content": "Make your decision."}
176
- ],
177
- response_format={"type": "json_object"},
178
- max_tokens=150,
179
- timeout=3.0
180
- )
181
- decision = json.loads(completion.choices[0].message.content)
182
- # If the agent requested a premium Groq model but Groq client is not configured,
183
- # try to route the decision to a HF fallback model when possible.
184
- elif _is_groq_model(agent.model_name) and not _groq_client and _HF_API_TOKEN:
185
- fallback_model = GROQ_TO_HF_FALLBACK.get(agent.model_name)
186
- if not fallback_model:
187
- return _fallback_escape(agent, fire)
188
-
189
- async with httpx.AsyncClient(timeout=10.0) as client:
190
- response = await client.post(
191
- f"{_HF_API_BASE}/{fallback_model}",
192
- headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
193
- json={
194
- "inputs": system_prompt,
195
- "parameters": {
196
- "max_new_tokens": 200,
197
- "temperature": 0.7,
198
- "top_p": 0.9,
199
- }
200
  }
201
- )
202
- response.raise_for_status()
203
- data = response.json()
204
-
205
- if isinstance(data, list) and len(data) > 0:
206
- text = data[0].get("generated_text", "")
207
- else:
208
- text = data.get("generated_text", "")
209
-
210
- text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
211
-
212
- try:
213
- json_start = text.find('{')
214
- json_end = text.rfind('}') + 1
215
- if json_start >= 0 and json_end > json_start:
216
- json_str = text[json_start:json_end]
217
- decision = json.loads(json_str)
218
- else:
219
- decision = {}
220
- except json.JSONDecodeError:
221
- decision = {}
222
- elif _is_hf_model(agent.model_name) and _HF_API_TOKEN:
223
- # Use HF Inference API for open-source models
224
- async with httpx.AsyncClient(timeout=10.0) as client:
225
- response = await client.post(
226
- f"{_HF_API_BASE}/{agent.model_name}",
227
- headers={"Authorization": f"Bearer {_HF_API_TOKEN}"},
228
- json={
229
- "inputs": system_prompt,
230
- "parameters": {
231
- "max_new_tokens": 200,
232
- "temperature": 0.7,
233
- "top_p": 0.9,
234
- }
235
- }
236
- )
237
- response.raise_for_status()
238
- data = response.json()
239
-
240
- if isinstance(data, list) and len(data) > 0:
241
- text = data[0].get("generated_text", "")
242
  else:
243
- text = data.get("generated_text", "")
244
-
245
- text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
246
-
247
- try:
248
- json_start = text.find('{')
249
- json_end = text.rfind('}') + 1
250
- if json_start >= 0 and json_end > json_start:
251
- json_str = text[json_start:json_end]
252
- decision = json.loads(json_str)
253
- else:
254
- decision = {}
255
- except json.JSONDecodeError:
256
  decision = {}
257
- else:
258
- return _fallback_escape(agent, fire)
259
 
260
  action = decision.get("action", "escape")
261
  if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
 
3
  import random
4
  import math
5
  import httpx
 
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
9
 
10
+ # Use HF tokens only — Groq models removed from registry
 
11
  _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
 
12
  _HF_API_BASE = "https://api-inference.huggingface.co/models"
13
 
14
  MAX_AGENT_SPEED = 80
15
 
16
+ # Curated HF model ids (small large)
 
 
 
 
 
 
 
17
  HF_MODELS = [
18
+ "google/flan-t5-small",
19
+ "google/flan-t5-base",
20
+ "google/flan-t5-large",
21
+ "bigscience/bloom-3b",
22
+ "EleutherAI/gpt-neo-2.7B",
23
  "mistralai/Mistral-7B-Instruct-v0.2",
24
  "mistralai/Mistral-7B-Instruct-v0.1",
 
 
 
 
25
  "NousResearch/Nous-Hermes-2-7b",
26
+ "HuggingFaceH4/zephyr-7b",
27
+ "tiiuae/falcon-7b-instruct",
28
+ "EleutherAI/gpt-j-6B",
29
  "meta-llama/Llama-2-7b-chat-hf",
30
  "meta-llama/Llama-2-13b-chat-hf",
31
+ "meta-llama/Llama-2-70b-chat-hf",
32
+ "bigscience/bloom-176b",
33
+ "stabilityai/stablelm-tuned-alpha-3b",
34
  "meta-llama/Llama-3-8b-Instruct",
 
 
 
 
 
 
 
35
  ]
36
 
 
 
 
 
 
 
 
37
 
38
  def is_ready():
39
+ """Check if HF inference token is available."""
40
+ return _HF_API_TOKEN is not None
 
 
 
 
 
41
 
42
 
43
  def _is_hf_model(model_id: str) -> bool:
 
44
  return model_id in HF_MODELS
45
 
46
 
 
141
  {{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": null, "message": "<sentence>", "reasoning": "<sentence>"}}"""
142
 
143
  try:
144
+ # Always prefer HF models — if agent requested a HF model use it, otherwise
145
+ # route to a default HF model from the list.
146
+ target_model = agent.model_name if _is_hf_model(agent.model_name) else HF_MODELS[0]
147
+
148
+ async with httpx.AsyncClient(timeout=15.0) as client:
149
+ response = await client.post(
150
+ f"{_HF_API_BASE}/{target_model}",
151
+ headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
152
+ json={
153
+ "inputs": system_prompt,
154
+ "parameters": {
155
+ "max_new_tokens": 200,
156
+ "temperature": 0.7,
157
+ "top_p": 0.9,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  }
159
+ }
160
+ )
161
+ response.raise_for_status()
162
+ data = response.json()
163
+
164
+ if isinstance(data, list) and len(data) > 0:
165
+ text = data[0].get("generated_text", "")
166
+ else:
167
+ text = data.get("generated_text", "")
168
+
169
+ text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
170
+
171
+ try:
172
+ json_start = text.find('{')
173
+ json_end = text.rfind('}') + 1
174
+ if json_start >= 0 and json_end > json_start:
175
+ json_str = text[json_start:json_end]
176
+ decision = json.loads(json_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  decision = {}
179
+ except json.JSONDecodeError:
180
+ decision = {}
181
 
182
  action = decision.get("action", "escape")
183
  if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
backend/app/hf_spaces.py CHANGED
@@ -1,114 +1,51 @@
1
  """
2
- Model registry for unified inference API (Groq + HF Spaces).
3
- All models are returned without backend categorization.
 
4
  """
5
  import os
6
- from . import groq_client
7
 
8
- HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
9
 
10
- # All available models from both backends (unified list)
 
 
11
  ALL_MODELS = [
12
- # Premium Groq models (unlimited calls, high-quality)
13
- {
14
- "id": "mixtral-8x7b-32768",
15
- "name": "Mixtral 8x7B",
16
- "description": "High-performance 8x7B mixture of experts model",
17
- },
18
- {
19
- "id": "llama2-70b-4096",
20
- "name": "Llama 2 70B",
21
- "description": "Meta's large 70B instruction-tuned model",
22
- },
23
- # Open-source HF models - Fast & Reliable
24
- {
25
- "id": "mistralai/Mistral-7B-Instruct-v0.2",
26
- "name": "Mistral 7B Instruct v0.2",
27
- "description": "Fast, reliable 7B instruction-tuned model",
28
- },
29
- {
30
- "id": "mistralai/Mistral-7B-Instruct-v0.1",
31
- "name": "Mistral 7B Instruct v0.1",
32
- "description": "Original Mistral 7B instruct version",
33
- },
34
- {
35
- "id": "HuggingFaceH4/zephyr-7b-beta",
36
- "name": "Zephyr 7B Beta",
37
- "description": "HF's high-quality 7B chat model",
38
- },
39
- {
40
- "id": "HuggingFaceH4/zephyr-7b",
41
- "name": "Zephyr 7B",
42
- "description": "Fast, well-aligned 7B model",
43
- },
44
- # Open-source HF models - Quality-Focused
45
- {
46
- "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
47
- "name": "Nous Hermes 2 Mistral",
48
- "description": "High-quality 7B with DPO training",
49
- },
50
- {
51
- "id": "NousResearch/Nous-Hermes-2-7b",
52
- "name": "Nous Hermes 2 7B",
53
- "description": "Quality-focused 7B model",
54
- },
55
- # Open-source HF models - Meta's Llama
56
- {
57
- "id": "meta-llama/Llama-2-7b-chat-hf",
58
- "name": "Llama 2 7B Chat",
59
- "description": "Meta's Llama 2 7B chat variant",
60
- },
61
- {
62
- "id": "meta-llama/Llama-2-13b-chat-hf",
63
- "name": "Llama 2 13B Chat",
64
- "description": "Meta's Llama 2 13B chat variant",
65
- },
66
- {
67
- "id": "meta-llama/Llama-3-8b-Instruct",
68
- "name": "Llama 3 8B Instruct",
69
- "description": "Meta's latest Llama 3 8B model",
70
- },
71
- # Open-source HF models - Google & Others
72
- {
73
- "id": "google/flan-t5-large",
74
- "name": "FLAN-T5 Large",
75
- "description": "Google's instruction-tuned T5 model",
76
- },
77
- {
78
- "id": "google/flan-t5-base",
79
- "name": "FLAN-T5 Base",
80
- "description": "Google's FLAN-T5 base variant",
81
- },
82
- {
83
- "id": "tiiuae/falcon-7b-instruct",
84
- "name": "Falcon 7B Instruct",
85
- "description": "TII's Falcon 7B instruction-tuned",
86
- },
87
- {
88
- "id": "EleutherAI/gpt-j-6B",
89
- "name": "GPT-J 6B",
90
- "description": "EleutherAI's 6B GPT model",
91
- },
92
  ]
93
 
94
 
95
-
96
  async def get_available_models() -> dict:
97
- """
98
- Get unified list of all available models (Groq + HF).
99
- Frontend receives models without backend categorization.
100
- """
101
- return {
102
- "models": ALL_MODELS,
103
- "total": len(ALL_MODELS),
104
- }
105
 
106
 
107
  def get_model_display_name(model_id: str) -> str:
108
- """Get clean display name from model ID."""
109
- for model in ALL_MODELS:
110
- if model["id"] == model_id:
111
- return model["name"]
112
- # Fallback
113
  return model_id.split("/")[-1].split("-")[0].capitalize()
114
 
 
1
  """
2
+ Model registry: return only Hugging Face models (no Groq entries).
3
+ This file lists a curated set of small, medium and large HF models
4
+ to populate the frontend model selector.
5
  """
6
  import os
 
7
 
8
+ HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
9
 
10
+ # Curated HF model list grouped by rough size/role. This list focuses on
11
+ # open-source models available via the HF Inference API. Availability
12
+ # depends on your HF account and token privileges.
13
  ALL_MODELS = [
14
+ # Small / efficient
15
+ {"id": "google/flan-t5-small", "name": "FLAN-T5 Small", "size": "small"},
16
+ {"id": "google/flan-t5-base", "name": "FLAN-T5 Base", "size": "small"},
17
+ {"id": "google/flan-t5-large", "name": "FLAN-T5 Large", "size": "medium"},
18
+ {"id": "bigscience/bloom-3b", "name": "BLOOM 3B", "size": "medium"},
19
+ {"id": "EleutherAI/gpt-neo-2.7B", "name": "GPT-Neo 2.7B", "size": "medium"},
20
+
21
+ # Mid-size / strong instruction-tuned
22
+ {"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B Instruct v0.2", "size": "medium"},
23
+ {"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B Instruct v0.1", "size": "medium"},
24
+ {"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
25
+ {"id": "HuggingFaceH4/zephyr-7b", "name": "Zephyr 7B", "size": "medium"},
26
+ {"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
27
+ {"id": "EleutherAI/gpt-j-6B", "name": "GPT-J 6B", "size": "medium"},
28
+
29
+ # Large / chat-capable
30
+ {"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
31
+ {"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
32
+ {"id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama 2 70B Chat", "size": "xlarge"},
33
+ {"id": "bigscience/bloom-176b", "name": "BLOOM 176B", "size": "xlarge"},
34
+
35
+ # Other notable models
36
+ {"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
37
+ {"id": "meta-llama/Llama-3-8b-Instruct", "name": "Llama 3 8B Instruct", "size": "large"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ]
39
 
40
 
 
41
  async def get_available_models() -> dict:
42
+ """Return unified HF-only list for the frontend."""
43
+ return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
 
 
 
 
 
 
44
 
45
 
46
  def get_model_display_name(model_id: str) -> str:
47
+ for m in ALL_MODELS:
48
+ if m["id"] == model_id:
49
+ return m["name"]
 
 
50
  return model_id.split("/")[-1].split("-")[0].capitalize()
51