billyaungmyint commited on
Commit
61edca4
·
verified ·
1 Parent(s): 7330843

Sync from GitHub via hub-sync

Browse files
Files changed (2) hide show
  1. VERSION +1 -1
  2. main.py +197 -14
VERSION CHANGED
@@ -1 +1 @@
1
- e53591dafabc331639f010ada7aeec3330d50800
 
1
+ 9e5bc2edbb4834010576ab5c6e6cc01eebd0e242
main.py CHANGED
@@ -32,6 +32,12 @@ def _env(name: str, default: str = "") -> str:
32
  return (os.getenv(name) or default).strip()
33
 
34
 
 
 
 
 
 
 
35
  HF_TOKEN = _env("HF_TOKEN")
36
  HF_MODEL = _env("HF_MODEL", "zai-org/GLM-5.1")
37
 
@@ -56,6 +62,74 @@ GOOGLE_API_KEY = _env("GOOGLE_API_KEY")
56
  GOOGLE_MODEL = _env("GOOGLE_MODEL", "gemini-2.0-flash")
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Explicit token passing helps avoid auth ambiguity across local and Space runtimes.
60
  hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
61
 
@@ -73,6 +147,74 @@ def _runtime_label() -> str:
73
  return f"Backend: {backend_name} | Model: {model_text}"
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def _history_to_messages(history: list, user_message: str) -> list:
77
  messages = []
78
 
@@ -152,20 +294,22 @@ def _chat_openai_compatible(
152
  return _extract_content(message) or "(empty response)"
153
 
154
 
155
- def _chat_hf(messages: list) -> str:
 
156
  response = hf_client.chat_completion(
157
- model=HF_MODEL,
158
  messages=messages,
159
  max_tokens=AI_MAX_TOKENS,
160
  )
161
  return response.choices[0].message.content or "(empty response)"
162
 
163
 
164
- def _chat_github(messages: list) -> str:
 
165
  return _chat_openai_compatible(
166
  endpoint="https://models.github.ai/inference/chat/completions",
167
  api_key=GITHUB_TOKEN,
168
- model=GITHUB_MODEL,
169
  messages=messages,
170
  )
171
 
@@ -192,10 +336,11 @@ def _chat_fireworks(messages: list) -> str:
192
  )
193
 
194
 
195
- def _chat_google(messages: list) -> str:
 
196
  if not GOOGLE_API_KEY:
197
  raise ValueError("GOOGLE_API_KEY is missing.")
198
- if not GOOGLE_MODEL:
199
  raise ValueError("GOOGLE_MODEL is not configured.")
200
 
201
  contents = []
@@ -211,7 +356,7 @@ def _chat_google(messages: list) -> str:
211
  contents.append({"role": "user", "parts": [{"text": text}]})
212
 
213
  endpoint = (
214
- f"https://generativelanguage.googleapis.com/v1beta/models/{GOOGLE_MODEL}:generateContent"
215
  f"?key={GOOGLE_API_KEY}"
216
  )
217
  payload = {
@@ -242,13 +387,13 @@ def _chat_google(messages: list) -> str:
242
  return result or "(empty response)"
243
 
244
 
245
- def _chat_once(backend: str, messages: list) -> str:
246
  if backend == "hf":
247
- return _chat_hf(messages)
248
  if backend == "google":
249
- return _chat_google(messages)
250
  if backend == "github":
251
- return _chat_github(messages)
252
  if backend == "openrouter":
253
  return _chat_openrouter(messages)
254
  if backend == "fireworks":
@@ -258,15 +403,17 @@ def _chat_once(backend: str, messages: list) -> str:
258
  )
259
 
260
 
261
- def chat_response(message: str, history: list) -> str:
262
  """Send a user message using the configured backend and return assistant text."""
263
  if not message or not message.strip():
264
  return "Please enter a message."
265
 
266
  messages = _history_to_messages(history, message.strip())
 
 
267
 
268
  try:
269
- if AI_BACKEND == "auto":
270
  errors = []
271
  for backend in AI_FALLBACK_ORDER:
272
  try:
@@ -275,7 +422,7 @@ def chat_response(message: str, history: list) -> str:
275
  errors.append(f"{backend}: {exc}")
276
  return "All providers failed. " + " | ".join(errors)
277
 
278
- return _chat_once(AI_BACKEND, messages)
279
  except Exception as e:
280
  return f"Error: {str(e)}"
281
 
@@ -288,6 +435,41 @@ with gr.Blocks(title="GitHub + HuggingFace + AI Chat Demo") as demo:
288
  )
289
  gr.Markdown(f"**{_runtime_label()}**")
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  gr.ChatInterface(
292
  chat_response,
293
  examples=[
@@ -295,6 +477,7 @@ with gr.Blocks(title="GitHub + HuggingFace + AI Chat Demo") as demo:
295
  "Explain quantum computing in simple terms.",
296
  "Give me a low-cost model selection strategy for dev vs prod.",
297
  ],
 
298
  title=None,
299
  description="Ask me anything!",
300
  )
 
32
  return (os.getenv(name) or default).strip()
33
 
34
 
35
+ def _csv_env(name: str, default_csv: str) -> list:
36
+ raw = _env(name, default_csv)
37
+ values = [item.strip() for item in raw.split(",") if item.strip()]
38
+ return values
39
+
40
+
41
  HF_TOKEN = _env("HF_TOKEN")
42
  HF_MODEL = _env("HF_MODEL", "zai-org/GLM-5.1")
43
 
 
62
  GOOGLE_MODEL = _env("GOOGLE_MODEL", "gemini-2.0-flash")
63
 
64
 
65
+ HF_MODELS = _csv_env("HF_MODELS", HF_MODEL or "zai-org/GLM-5.1")
66
+ GOOGLE_MODELS = _csv_env(
67
+ "GOOGLE_MODELS",
68
+ GOOGLE_MODEL or "gemini-2.0-flash,gemini-2.0-flash-lite,gemini-1.5-flash",
69
+ )
70
+ GITHUB_MODELS = _csv_env(
71
+ "GITHUB_MODELS",
72
+ GITHUB_MODEL or "openai/gpt-4.1-mini,meta/Llama-3.3-70B-Instruct,mistral-ai/Mistral-Nemo-Instruct-2407",
73
+ )
74
+
75
+ PROVIDER_MODELS = {
76
+ "hf": HF_MODELS,
77
+ "google": GOOGLE_MODELS,
78
+ "github": GITHUB_MODELS,
79
+ }
80
+
81
+ PROVIDER_LABELS = {
82
+ "hf": "Hugging Face (z.ai)",
83
+ "google": "Google Gemini",
84
+ "github": "GitHub Models",
85
+ }
86
+
87
+ UI_PROVIDERS = ["hf", "google", "github"]
88
+
89
+ PRESET_CONFIGS = {
90
+ "ultra-cheap": {
91
+ "label": "Ultra Cheap",
92
+ "provider": "google",
93
+ "preferred_models": [
94
+ "gemini-2.0-flash-lite",
95
+ "gemini-1.5-flash",
96
+ "gemini-2.0-flash",
97
+ ],
98
+ "note": "Minimum-cost setup for rapid dev testing.",
99
+ },
100
+ "budget-dev": {
101
+ "label": "Budget Dev",
102
+ "provider": "github",
103
+ "preferred_models": [
104
+ "mistral-ai/Mistral-Nemo-Instruct-2407",
105
+ "openai/gpt-4.1-mini",
106
+ "meta/Llama-3.3-70B-Instruct",
107
+ ],
108
+ "note": "Lowest-cost first for rapid iteration.",
109
+ },
110
+ "balanced": {
111
+ "label": "Balanced",
112
+ "provider": "google",
113
+ "preferred_models": [
114
+ "gemini-2.0-flash",
115
+ "gemini-1.5-flash",
116
+ "gemini-2.0-flash-lite",
117
+ ],
118
+ "note": "Good quality and speed for day-to-day testing.",
119
+ },
120
+ "quality-check": {
121
+ "label": "Quality Check",
122
+ "provider": "hf",
123
+ "preferred_models": [
124
+ "zai-org/GLM-5.1",
125
+ "zai-org/GLM-5",
126
+ "zai-org/GLM-4.7",
127
+ ],
128
+ "note": "Higher-quality pass for final validation.",
129
+ },
130
+ }
131
+
132
+
133
  # Explicit token passing helps avoid auth ambiguity across local and Space runtimes.
134
  hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
135
 
 
147
  return f"Backend: {backend_name} | Model: {model_text}"
148
 
149
 
150
+ def _default_model_for(provider: str) -> str:
151
+ provider = (provider or "").lower()
152
+ models = PROVIDER_MODELS.get(provider, [])
153
+ if models:
154
+ return models[0]
155
+
156
+ if provider == "hf":
157
+ return HF_MODEL
158
+ if provider == "google":
159
+ return GOOGLE_MODEL
160
+ if provider == "github":
161
+ return GITHUB_MODEL
162
+ return ""
163
+
164
+
165
+ def _model_choices_for(provider: str) -> tuple:
166
+ provider = (provider or "").lower()
167
+ choices = PROVIDER_MODELS.get(provider, [])
168
+ selected = _default_model_for(provider)
169
+ if selected and selected not in choices:
170
+ choices = [selected] + choices
171
+ return choices, selected
172
+
173
+
174
+ def _on_provider_change(provider: str):
175
+ choices, selected = _model_choices_for(provider)
176
+ return gr.update(choices=choices, value=selected)
177
+
178
+
179
+ def _pick_model_for_preset(provider: str, preferred_models: list) -> str:
180
+ choices, selected = _model_choices_for(provider)
181
+ for model in preferred_models:
182
+ if model in choices:
183
+ return model
184
+ if choices:
185
+ return choices[0]
186
+ return selected
187
+
188
+
189
+ def _apply_preset(preset_key: str):
190
+ preset = PRESET_CONFIGS.get(preset_key)
191
+ if not preset:
192
+ return (
193
+ gr.update(),
194
+ gr.update(),
195
+ "Preset not found.",
196
+ )
197
+
198
+ provider = preset["provider"]
199
+ model_choices, _ = _model_choices_for(provider)
200
+ selected_model = _pick_model_for_preset(provider, preset["preferred_models"])
201
+ if selected_model and selected_model not in model_choices:
202
+ model_choices = [selected_model] + model_choices
203
+
204
+ status = (
205
+ f"Applied preset: {preset['label']} | "
206
+ f"Provider: {PROVIDER_LABELS.get(provider, provider)} | "
207
+ f"Model: {selected_model or 'not-set'} | "
208
+ f"{preset['note']}"
209
+ )
210
+
211
+ return (
212
+ gr.update(value=provider),
213
+ gr.update(choices=model_choices, value=selected_model),
214
+ status,
215
+ )
216
+
217
+
218
  def _history_to_messages(history: list, user_message: str) -> list:
219
  messages = []
220
 
 
294
  return _extract_content(message) or "(empty response)"
295
 
296
 
297
+ def _chat_hf(messages: list, model_override: str = "") -> str:
298
+ model = model_override or _default_model_for("hf")
299
  response = hf_client.chat_completion(
300
+ model=model,
301
  messages=messages,
302
  max_tokens=AI_MAX_TOKENS,
303
  )
304
  return response.choices[0].message.content or "(empty response)"
305
 
306
 
307
+ def _chat_github(messages: list, model_override: str = "") -> str:
308
+ model = model_override or _default_model_for("github")
309
  return _chat_openai_compatible(
310
  endpoint="https://models.github.ai/inference/chat/completions",
311
  api_key=GITHUB_TOKEN,
312
+ model=model,
313
  messages=messages,
314
  )
315
 
 
336
  )
337
 
338
 
339
+ def _chat_google(messages: list, model_override: str = "") -> str:
340
+ model = model_override or _default_model_for("google")
341
  if not GOOGLE_API_KEY:
342
  raise ValueError("GOOGLE_API_KEY is missing.")
343
+ if not model:
344
  raise ValueError("GOOGLE_MODEL is not configured.")
345
 
346
  contents = []
 
356
  contents.append({"role": "user", "parts": [{"text": text}]})
357
 
358
  endpoint = (
359
+ f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
360
  f"?key={GOOGLE_API_KEY}"
361
  )
362
  payload = {
 
387
  return result or "(empty response)"
388
 
389
 
390
+ def _chat_once(backend: str, messages: list, model_override: str = "") -> str:
391
  if backend == "hf":
392
+ return _chat_hf(messages, model_override)
393
  if backend == "google":
394
+ return _chat_google(messages, model_override)
395
  if backend == "github":
396
+ return _chat_github(messages, model_override)
397
  if backend == "openrouter":
398
  return _chat_openrouter(messages)
399
  if backend == "fireworks":
 
403
  )
404
 
405
 
406
+ def chat_response(message: str, history: list, selected_provider: str, selected_model: str) -> str:
407
  """Send a user message using the configured backend and return assistant text."""
408
  if not message or not message.strip():
409
  return "Please enter a message."
410
 
411
  messages = _history_to_messages(history, message.strip())
412
+ provider = (selected_provider or AI_BACKEND or "hf").lower().strip()
413
+ selected_model = (selected_model or "").strip()
414
 
415
  try:
416
+ if provider == "auto":
417
  errors = []
418
  for backend in AI_FALLBACK_ORDER:
419
  try:
 
422
  errors.append(f"{backend}: {exc}")
423
  return "All providers failed. " + " | ".join(errors)
424
 
425
+ return _chat_once(provider, messages, model_override=selected_model)
426
  except Exception as e:
427
  return f"Error: {str(e)}"
428
 
 
435
  )
436
  gr.Markdown(f"**{_runtime_label()}**")
437
 
438
+ initial_provider = AI_BACKEND if AI_BACKEND in UI_PROVIDERS else "hf"
439
+ initial_model_choices, initial_model = _model_choices_for(initial_provider)
440
+
441
+ preset_dropdown = gr.Dropdown(
442
+ label="Quick Preset",
443
+ choices=[(v["label"], k) for k, v in PRESET_CONFIGS.items()],
444
+ value="ultra-cheap",
445
+ )
446
+ preset_apply_btn = gr.Button("Apply Preset")
447
+ preset_status = gr.Markdown("Preset tip: start with Ultra Cheap while iterating.")
448
+
449
+ provider_dropdown = gr.Dropdown(
450
+ label="Provider",
451
+ choices=[(PROVIDER_LABELS[p], p) for p in UI_PROVIDERS],
452
+ value=initial_provider,
453
+ )
454
+ model_dropdown = gr.Dropdown(
455
+ label="Model",
456
+ choices=initial_model_choices,
457
+ value=initial_model,
458
+ allow_custom_value=True,
459
+ )
460
+
461
+ provider_dropdown.change(
462
+ fn=_on_provider_change,
463
+ inputs=[provider_dropdown],
464
+ outputs=[model_dropdown],
465
+ )
466
+
467
+ preset_apply_btn.click(
468
+ fn=_apply_preset,
469
+ inputs=[preset_dropdown],
470
+ outputs=[provider_dropdown, model_dropdown, preset_status],
471
+ )
472
+
473
  gr.ChatInterface(
474
  chat_response,
475
  examples=[
 
477
  "Explain quantum computing in simple terms.",
478
  "Give me a low-cost model selection strategy for dev vs prod.",
479
  ],
480
+ additional_inputs=[provider_dropdown, model_dropdown],
481
  title=None,
482
  description="Ask me anything!",
483
  )