Kai Izumoto commited on
Commit
774ea65
Β·
verified Β·
1 Parent(s): 7df3a4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +211 -550
app.py CHANGED
@@ -1,600 +1,261 @@
1
- ## 🚨 Hugging Face Space 503 Error - Need Code Fix
2
-
3
- I have a persistent 503 "Something went wrong when restarting this Space" error with my Hugging Face Space. The local server works perfectly, but the HF deployment fails. Here's my complete setup:
4
-
5
- ### πŸ“‹ Project Overview
6
- **SuperCoder** - AI coding assistant running locally with Ollama, accessible via HF Space web interface
7
- - **Local Backend:** Python server using Ollama models
8
- - **Web Frontend:** Gradio interface on Hugging Face Space
9
- - **Connection:** ngrok tunnel from HF to local server
10
-
11
- ### 🏠 Complete Local Setup (All Working)
12
-
13
- #### 1. requirements.txt (Local)
14
- ```txt
15
- gradio>=4.0.0
16
- requests>=2.25.0
17
- 2. config.py (Local - 3,798 bytes)
18
- python
19
- """
20
- Configuration and constants for SuperCoder.
21
- Centralized settings for easy maintenance and deployment.
22
  """
23
- import os
24
- from pathlib import Path
25
-
26
- # ============================================================================
27
- # Model Configuration (llama.cpp Server)
28
- # ============================================================================
29
- LLAMA_SERVER_PATH = "/Users/izumotofam/llama.cpp/build/bin/llama-server"
30
- LLAMA_MODEL = "llama2:latest"
31
- LLAMA_SERVER_HOST = "127.0.0.1"
32
- LLAMA_SERVER_PORT = 8080
33
- LLAMA_SERVER_URL = "http://localhost:11434"
34
-
35
- # Server startup settings
36
- SERVER_STARTUP_TIMEOUT = 30 # seconds to wait for server to start
37
- SERVER_HEALTH_CHECK_INTERVAL = 0.5 # seconds between health checks
38
-
39
- # Model parameters
40
- MODEL_THREADS = 1
41
- MODEL_CONTEXT_WINDOW = 1024
42
- MODEL_GPU_LAYERS = 0
43
- MODEL_BATCH_SIZE = 64
44
-
45
- # Gradio UI settings
46
- SERVER_NAME = "127.0.0.1"
47
- SERVER_PORT = 7860
48
- APP_TITLE = "SuperCoder Pro"
49
- APP_DESCRIPTION = "AI-Powered Coding Assistant"
50
- CHAT_HEIGHT = 500
51
-
52
- # Model settings
53
- DEFAULT_TEMPERATURE = 0.1
54
- DEFAULT_MAX_TOKENS = 512
55
- DEFAULT_TOP_P = 0.9
56
- MIN_TOKENS = 128
57
- SAFE_MAX_TOKENS_CAP = 2048
58
- SYSTEM_OVERHEAD_TOKENS = 100
59
- 3. supercoder.py (Local Backend - 413 lines - DON'T upload to HF)
60
- python
61
- """
62
- SuperCoder - Unified Application
63
- All-in-one file containing Gradio UI, API server, tunnel support, and AI logic.
64
- """
65
- import os
66
- import sys
67
- import time
68
- import uuid
69
- import argparse
70
- import subprocess
71
- import traceback
72
- import requests
73
- import json
74
- from pathlib import Path
75
- from typing import Optional, List, Dict, Any, Generator, Tuple
76
- from collections import defaultdict
77
- from functools import partial
78
- from multiprocessing import Process
79
-
80
- import gradio as gr
81
- from fastapi import FastAPI, HTTPException
82
- from fastapi.middleware.cors import CORSMiddleware
83
- from pydantic import BaseModel
84
- import uvicorn
85
-
86
- # Import config (only external dependency)
87
- from config import *
88
-
89
- # ============================================================================
90
- # SERVER MANAGER - llama.cpp server lifecycle
91
- # ============================================================================
92
- _server_process = None
93
- _server_info = {}
94
-
95
- def check_server_health() -> bool:
96
- try:
97
- # Check if Ollama is responding
98
- response = requests.get(f"{LLAMA_SERVER_URL}/api/tags", timeout=2)
99
- return response.status_code == 200 and len(response.json().get("models", [])) > 0
100
- except:
101
- return False
102
-
103
- def start_llama_server() -> bool:
104
- global _server_process, _server_info
105
-
106
- if _server_process and check_server_health():
107
- return True
108
-
109
- print(f"\nπŸš€ Starting llama.cpp server on {LLAMA_SERVER_URL}")
110
-
111
- try:
112
- cmd = [
113
- LLAMA_SERVER_PATH, "-hf", LLAMA_MODEL,
114
- "-c", str(MODEL_CONTEXT_WINDOW),
115
- "-t", str(MODEL_THREADS),
116
- "-ngl", str(MODEL_GPU_LAYERS),
117
- "--host", LLAMA_SERVER_HOST, "--port", str(LLAMA_SERVER_PORT)
118
- ]
119
-
120
- _server_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
121
- _server_info = {'pid': _server_process.pid, 'url': LLAMA_SERVER_URL}
122
-
123
- # Wait for ready
124
- for _ in range(SERVER_STARTUP_TIMEOUT * 2):
125
- if check_server_health():
126
- print(f"βœ… Server ready (PID: {_server_process.pid})")
127
- return True
128
- time.sleep(0.5)
129
-
130
- return False
131
- except Exception as e:
132
- print(f"❌ Server start failed: {e}")
133
- return False
134
-
135
- def stop_llama_server():
136
- global _server_process
137
- if _server_process:
138
- _server_process.terminate()
139
- _server_process.wait()
140
- _server_process = None
141
-
142
- def get_llm():
143
- return True if check_server_health() else None
144
-
145
- def get_model_info():
146
- return _server_info.copy()
147
-
148
- # ============================================================================
149
- # SESSION MANAGER - Chat history
150
- # ============================================================================
151
- SESSION_STORE = {}
152
- SESSION_METADATA = defaultdict(dict)
153
-
154
- def get_session_id(request: gr.Request) -> str:
155
- return request.session_hash
156
-
157
- def get_history(session_id: str, create_if_missing: bool = False) -> List[Dict]:
158
- if session_id not in SESSION_STORE and create_if_missing:
159
- SESSION_STORE[session_id] = []
160
- return SESSION_STORE.get(session_id, [])
161
-
162
- def add_to_history(session_id: str, role: str, text: str):
163
- history = get_history(session_id, create_if_missing=True)
164
- history.append({"role": role, "text": text, "timestamp": time.time()})
165
-
166
- def clear_history(session_id: str):
167
- if session_id in SESSION_STORE:
168
- SESSION_STORE[session_id] = []
169
-
170
- def convert_history_to_gradio_messages(history: List[Dict]) -> List[Dict]:
171
- return [{"role": msg["role"], "content": msg["text"]} for msg in history]
172
-
173
- def calculate_safe_max_tokens(history: List[Dict], requested: int, max_context: int) -> int:
174
- history_chars = sum(len(msg["text"]) for msg in history)
175
- estimated_tokens = history_chars // 4
176
- available = max_context - estimated_tokens - SYSTEM_OVERHEAD_TOKENS
177
- return max(min(requested, available, SAFE_MAX_TOKENS_CAP), MIN_TOKENS)
178
-
179
- def get_recent_history(session_id: str, max_messages: int = 10) -> List[Dict]:
180
- history = get_history(session_id)
181
- return history[-max_messages:] if len(history) > max_messages else history
182
-
183
- def update_session_activity(session_id: str):
184
- SESSION_METADATA[session_id]['last_activity'] = time.time()
185
-
186
- # ============================================================================
187
- # GENERATION - AI response generation
188
- # ============================================================================
189
- def generate_response_stream(session_id: str, user_message: str, max_tokens: int,
190
- temperature: float, stream: bool = True) -> Generator[str, None, None]:
191
- if not get_llm():
192
- yield "⚠️ Server not running"
193
- return
194
-
195
- update_session_activity(session_id)
196
- recent_history = get_recent_history(session_id, max_messages=6)
197
- safe_tokens = calculate_safe_max_tokens(recent_history, max_tokens, MODEL_CONTEXT_WINDOW)
198
-
199
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
200
- for msg in recent_history:
201
- messages.append({"role": msg["role"], "content": msg["text"]})
202
- messages.append({"role": "user", "content": user_message})
203
-
204
- try:
205
- payload = {
206
- "messages": messages, "max_tokens": safe_tokens,
207
- "temperature": max(0.01, temperature),
208
- "top_p": DEFAULT_TOP_P, "stream": stream
209
- }
210
-
211
- if stream:
212
- response = requests.post(f"{LLAMA_SERVER_URL}/v1/chat/completions",
213
- json=payload, stream=True, timeout=300)
214
- full_response = ""
215
- for line in response.iter_lines():
216
- if line:
217
- line_text = line.decode('utf-8')
218
- if line_text.startswith('data: '):
219
- line_text = line_text[6:]
220
- if line_text.strip() == '[DONE]':
221
- break
222
- try:
223
- chunk = json.loads(line_text)
224
- content = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
225
- if content:
226
- full_response += content
227
- yield full_response.strip()
228
- except:
229
- continue
230
- else:
231
- # Use Ollama API format instead of OpenAI format
232
- ollama_payload = {
233
- "model": LLAMA_MODEL,
234
- "messages": messages,
235
- "stream": False
236
- }
237
- response = requests.post(f"{LLAMA_SERVER_URL}/api/chat",
238
- json=ollama_payload, timeout=300)
239
- yield response.json()["message"]["content"].strip()
240
-
241
- except Exception as e:
242
- yield f"⚠️ Error: {str(e)}"
243
-
244
- # ============================================================================
245
- # GRADIO UI COMPONENTS
246
- # ============================================================================
247
- def create_gradio_interface(error_msg: Optional[str] = None):
248
- with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft(primary_hue=PRIMARY_HUE)) as demo:
249
- gr.Markdown(f"# πŸ€– {APP_TITLE}\n### {APP_DESCRIPTION}\n---")
250
-
251
- if error_msg:
252
- gr.Markdown(f"⚠️ {error_msg}")
253
-
254
- with gr.Row():
255
- with gr.Column(scale=3):
256
- chatbot = gr.Chatbot(label="πŸ’¬ Conversation", height=CHAT_HEIGHT,
257
- type="messages", show_copy_button=True)
258
- with gr.Row():
259
- txt_input = gr.Textbox(placeholder="Ask me about code...",
260
- show_label=False, scale=5, lines=2)
261
- send_btn = gr.Button("Send πŸš€", scale=1, variant="primary")
262
-
263
- with gr.Column(scale=1):
264
- gr.Markdown("### βš™οΈ Settings")
265
- temp_slider = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05,
266
- label="🌑️ Temperature")
267
- tokens_slider = gr.Slider(MIN_TOKENS, SAFE_MAX_TOKENS_CAP,
268
- value=DEFAULT_MAX_TOKENS, step=128, label="πŸ“ Max Tokens")
269
- stream_checkbox = gr.Checkbox(label="⚑ Stream", value=True)
270
- clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="stop", size="sm")
271
-
272
- session_state = gr.State()
273
-
274
- # Event handlers
275
- def handle_message(session_id, msg, temp, tokens, stream, request: gr.Request):
276
- session_id = session_id or get_session_id(request)
277
- if not msg.strip():
278
- return session_id, convert_history_to_gradio_messages(get_history(session_id)), ""
279
-
280
- add_to_history(session_id, "user", msg)
281
- yield session_id, convert_history_to_gradio_messages(get_history(session_id)), ""
282
-
283
- full_response = ""
284
- for partial in generate_response_stream(session_id, msg, tokens, temp, stream):
285
- full_response = partial
286
- temp_hist = get_history(session_id).copy()
287
- temp_hist.append({"role": "assistant", "text": full_response})
288
- yield session_id, convert_history_to_gradio_messages(temp_hist), ""
289
-
290
- add_to_history(session_id, "assistant", full_response)
291
- yield session_id, convert_history_to_gradio_messages(get_history(session_id)), ""
292
-
293
- def handle_clear(session_id, request: gr.Request):
294
- session_id = session_id or get_session_id(request)
295
- clear_history(session_id)
296
- return session_id, [], ""
297
-
298
- txt_input.submit(handle_message, [session_state, txt_input, temp_slider, tokens_slider, stream_checkbox],
299
- [session_state, chatbot, txt_input])
300
- send_btn.click(handle_message, [session_state, txt_input, temp_slider, tokens_slider, stream_checkbox],
301
- [session_state, chatbot, txt_input])
302
- clear_btn.click(handle_clear, [session_state], [session_state, chatbot, txt_input])
303
-
304
- return demo
305
-
306
- # ============================================================================
307
- # FASTAPI SERVER
308
- # ============================================================================
309
- api_app = FastAPI(title="SuperCoder API")
310
- api_app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
311
-
312
- api_sessions = {}
313
-
314
- class ChatMessage(BaseModel):
315
- role: str
316
- content: str
317
-
318
- class ChatRequest(BaseModel):
319
- messages: List[ChatMessage]
320
- temperature: Optional[float] = 0.1
321
- max_tokens: Optional[int] = 512
322
-
323
- class ChatResponse(BaseModel):
324
- response: str
325
- session_id: str
326
-
327
- @api_app.get("/health")
328
- async def health():
329
- return {"status": "ok" if get_llm() else "model_not_loaded"}
330
-
331
- @api_app.post("/api/chat", response_model=ChatResponse)
332
- async def chat(request: ChatRequest):
333
- if not get_llm():
334
- raise HTTPException(503, "Model not loaded")
335
-
336
- session_id = str(uuid.uuid4())
337
- api_sessions[session_id] = []
338
-
339
- user_message = request.messages[-1].content
340
- api_sessions[session_id].append({"role": "user", "text": user_message})
341
-
342
- full_response = ""
343
- for partial in generate_response_stream(session_id, user_message, request.max_tokens,
344
- request.temperature, False):
345
- full_response = partial
346
-
347
- api_sessions[session_id].append({"role": "assistant", "text": full_response})
348
- return ChatResponse(response=full_response, session_id=session_id)
349
-
350
- def run_api_server():
351
- uvicorn.run(api_app, host="0.0.0.0", port=8000, log_level="info")
352
-
353
- # ============================================================================
354
- # TUNNEL SUPPORT
355
- # ============================================================================
356
- def start_ngrok_tunnel(port: int = 8000) -> Optional[str]:
357
- try:
358
- subprocess.run(["which", "ngrok"], capture_output=True, check=True)
359
- subprocess.Popen(["ngrok", "http", str(port)], stdout=subprocess.PIPE)
360
- time.sleep(3)
361
-
362
- response = requests.get("http://127.0.0.1:4040/api/tunnels", timeout=5)
363
- tunnels = response.json()
364
- if tunnels.get("tunnels"):
365
- url = tunnels["tunnels"][0]["public_url"]
366
- print(f"βœ… Tunnel: {url}")
367
- return url
368
- except:
369
- print("❌ ngrok not found. Install: brew install ngrok")
370
- return None
371
-
372
- def start_cloudflare_tunnel(port: int = 8000) -> Optional[str]:
373
- try:
374
- subprocess.run(["which", "cloudflared"], capture_output=True, check=True)
375
- proc = subprocess.Popen(["cloudflared", "tunnel", "--url", f"http://localhost:{port}"],
376
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
377
- time.sleep(3)
378
-
379
- for _ in range(30):
380
- line = proc.stdout.readline()
381
- if "trycloudflare.com" in line:
382
- import re
383
- urls = re.findall(r'https://[^\s]+\.trycloudflare\.com', line)
384
- if urls:
385
- print(f"βœ… Tunnel: {urls[0]}")
386
- return urls[0]
387
- time.sleep(1)
388
- except:
389
- print("❌ cloudflared not found. Install: brew install cloudflared")
390
- return None
391
-
392
- # ============================================================================
393
- # MAIN LAUNCHER
394
- # ============================================================================
395
- def main():
396
- parser = argparse.ArgumentParser(description="SuperCoder - All-in-One AI Coding Assistant")
397
- parser.add_argument("--mode", choices=["gradio", "api", "both"], default="gradio",
398
- help="Run mode: gradio (UI), api (server), or both")
399
- parser.add_argument("--tunnel", choices=["ngrok", "cloudflare"],
400
- help="Start tunnel for public access")
401
- parser.add_argument("--no-server", action="store_true",
402
- help="Don't start llama.cpp server (assume already running)")
403
-
404
- args = parser.parse_args()
405
-
406
- print("╔════════════════════════════════════════════════╗")
407
- print("β•‘ SuperCoder - Unified Launcher β•‘")
408
- print("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•")
409
-
410
- # Start llama.cpp server
411
- if not args.no_server:
412
- success = start_llama_server()
413
- error_msg = None if success else "Failed to start llama.cpp server"
414
- else:
415
- error_msg = None
416
-
417
- # Run selected mode
418
- if args.mode == "gradio":
419
- print(f"\nπŸ“Œ Mode: Gradio UI\n🌐 Access: http://localhost:{SERVER_PORT}\n")
420
- demo = create_gradio_interface(error_msg)
421
- demo.launch(server_name=SERVER_NAME, server_port=SERVER_PORT)
422
-
423
- elif args.mode == "api":
424
- print(f"\nπŸ“Œ Mode: API Server\nπŸ“‘ API: http://localhost:8000/api/chat\n")
425
-
426
- if args.tunnel:
427
- api_proc = Process(target=run_api_server)
428
- api_proc.start()
429
- time.sleep(3)
430
-
431
- if args.tunnel == "ngrok":
432
- start_ngrok_tunnel(8000)
433
- else:
434
- start_cloudflare_tunnel(8000)
435
-
436
- try:
437
- api_proc.join()
438
- except KeyboardInterrupt:
439
- api_proc.terminate()
440
- else:
441
- run_api_server()
442
-
443
- elif args.mode == "both":
444
- print(f"\nπŸ“Œ Mode: Both Gradio + API\n🎨 UI: http://localhost:{SERVER_PORT}\nπŸ“‘ API: http://localhost:8000\n")
445
-
446
- gradio_proc = Process(target=lambda: create_gradio_interface(error_msg).launch(
447
- server_name=SERVER_NAME, server_port=SERVER_PORT))
448
- api_proc = Process(target=run_api_server)
449
-
450
- gradio_proc.start()
451
- api_proc.start()
452
-
453
- if args.tunnel:
454
- time.sleep(3)
455
- if args.tunnel == "ngrok":
456
- start_ngrok_tunnel(8000)
457
- else:
458
- start_cloudflare_tunnel(8000)
459
-
460
- try:
461
- gradio_proc.join()
462
- api_proc.join()
463
- except KeyboardInterrupt:
464
- gradio_proc.terminate()
465
- api_proc.terminate()
466
-
467
- if __name__ == "__main__":
468
- try:
469
- main()
470
- except KeyboardInterrupt:
471
- print("\nπŸ‘‹ Shutting down...")
472
- stop_llama_server()
473
- 🌐 Hugging Face Space Setup (Currently Broken)
474
- Current HF Space Files:
475
- Space: https://huggingface.co/spaces/SeccondDefense/Advanced_AI_Coder
476
- Error: 503 Something went wrong when restarting this Space
477
- Request ID: Root=1-68e635bc-446c06181ed65dee7e15e4f5
478
- 4. app.py (HF Frontend - Currently uploaded to HF)
479
- python
480
- """
481
- SuperCoder - Hugging Face Spaces Frontend
482
  Connects to your local API server via tunnel
483
  """
484
  import gradio as gr
485
  import requests
486
- from typing import List, Tuple
 
487
 
488
  # ============================================================================
489
  # Configuration - EDIT THIS WITH YOUR TUNNEL URL
490
  # ============================================================================
491
- API_URL = "https://inge-chalcographic-helene.ngrok-free.dev"
 
 
 
492
 
 
493
  # API Client Functions
 
494
  def call_api(message: str, temperature: float = 0.1, max_tokens: int = 512) -> str:
 
 
 
 
495
  try:
496
  response = requests.post(
497
  f"{API_URL}/api/chat",
498
  json={
499
  "messages": [{"role": "user", "content": message}],
500
  "temperature": temperature,
501
- "max_tokens": max_tokens,
502
- "stream": False
503
  },
504
- timeout=60
 
 
 
 
505
  )
 
506
  if response.status_code == 200:
507
  result = response.json()
508
  return result.get("response", "No response from API")
 
 
509
  else:
510
- return f"❌ API Error ({response.status_code}): {response.text}"
 
511
  except requests.exceptions.Timeout:
512
- return "⏱️ Request timed out. The model might be processing a complex request."
513
  except requests.exceptions.ConnectionError:
514
- return "πŸ”Œ Connection failed. Please ensure your local API server is running."
515
  except Exception as e:
516
- return f"⚠️ Error: {str(e)}"
517
 
518
  def check_api_status() -> str:
 
519
  try:
520
- response = requests.get(f"{API_URL}/health", timeout=5)
 
 
 
 
 
521
  if response.status_code == 200:
522
  data = response.json()
523
- if data.get("model_loaded"):
524
- return "βœ… Connected - Model Ready"
 
525
  else:
526
- return "⚠️ Connected but model not loaded"
527
  else:
528
  return f"❌ API returned status {response.status_code}"
529
- except:
530
- return "πŸ”΄ Not connected to API"
 
 
 
 
 
531
 
532
- # Gradio Interface (Complete UI)
533
- with gr.Blocks(title="SuperCoder Pro", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
534
- gr.Markdown("# πŸ€– SuperCoder Pro\n### AI-Powered Coding Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  with gr.Row():
537
- status_text = gr.Textbox(value=check_api_status(), label="πŸ”Œ API Status")
538
- refresh_btn = gr.Button("πŸ”„ Refresh Status", size="sm")
 
 
 
 
 
 
 
539
 
 
540
  with gr.Row():
541
  with gr.Column(scale=3):
542
- chatbot = gr.Chatbot(label="πŸ’¬ Conversation", height=500)
 
 
 
 
 
 
543
  with gr.Row():
544
- msg_input = gr.Textbox(placeholder="Ask me to write, explain, debug, or review code...", scale=5, lines=2)
 
 
 
 
 
 
545
  send_btn = gr.Button("Send πŸš€", scale=1, variant="primary")
546
 
 
547
  with gr.Column(scale=1):
548
- gr.Markdown("### βš™οΈ Settings")
549
- temperature = gr.Slider(0.0, 1.0, value=0.1, step=0.05, label="🌑️ Temperature")
550
- max_tokens = gr.Slider(128, 2048, value=512, step=128, label="πŸ“ Max Tokens")
551
- gr.Markdown("### 🎯 Quick Actions")
552
- template_dropdown = gr.Dropdown(choices=["Explain Code", "Debug Code", "Write Function", "Optimize Code", "Add Comments"], label="Select Template")
553
- use_template_btn = gr.Button("Use Template", size="sm")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="stop", size="sm")
 
 
 
 
 
 
 
 
 
 
 
 
555
 
556
  # Event handlers
557
- msg_input.submit(lambda m, h, t, mt: (h + [(m, call_api(m, t, mt))], ""),
558
- [msg_input, chatbot, temperature, max_tokens], [chatbot, msg_input])
559
- send_btn.click(lambda m, h, t, mt: (h + [(m, call_api(m, t, mt))], ""),
560
- [msg_input, chatbot, temperature, max_tokens], [chatbot, msg_input])
561
- refresh_btn.click(lambda: check_api_status(), outputs=status_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
 
 
 
563
  if __name__ == "__main__":
564
- demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, share=False)
565
- πŸ”§ Current Status
566
- βœ… Working:
567
- Local server: python supercoder.py --mode api runs successfully
568
- Ollama: llama2:latest model loaded and responding
569
- ngrok tunnel: https://inge-chalcographic-helene.ngrok-free.dev active
570
- File syntax: All Python files parse correctly
571
- ❌ Broken:
572
- HF Space: 503 Something went wrong when restarting this Space
573
- File uploads: May not be working properly via CLI
574
- Space deployment: Failing to start properly
575
- 🎯 Expected vs Actual
576
- Expected Behavior:
577
-
578
- HF Space loads Gradio interface
579
- Connects to local server via tunnel
580
- Users can chat with AI assistant
581
- Settings and templates work
582
- Actual Behavior:
583
-
584
- HF Space shows 503 error
585
- Cannot access the web interface
586
- Files may not be properly deployed
587
- ❓ Request for Fix
588
- Please analyze all the provided code and tell me:
589
-
590
- What's causing the 503 error in the HF Space?
591
- How to fix the code so it works on HF Spaces?
592
- Are there missing dependencies or configuration issues?
593
- Should I modify the launch configuration for HF compatibility?
594
- Is there a better way to structure this for HF deployment?
595
- Provide the corrected, working code for both local and HF files.
596
-
597
- The local setup works perfectly - the issue is specifically with HF Space deployment.
598
-
599
-
600
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ SuperCoder - Hugging Face Spaces Frontend (FIXED)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  Connects to your local API server via tunnel
4
  """
5
  import gradio as gr
6
  import requests
7
+ from typing import List, Tuple, Optional
8
+ import os
9
 
10
  # ============================================================================
11
  # Configuration - EDIT THIS WITH YOUR TUNNEL URL
12
  # ============================================================================
13
+ API_URL = os.getenv("API_URL", "https://inge-chalcographic-helene.ngrok-free.dev")
14
+
15
+ # Remove trailing slash if present
16
+ API_URL = API_URL.rstrip('/')
17
 
18
+ # ============================================================================
19
  # API Client Functions
20
+ # ============================================================================
21
  def call_api(message: str, temperature: float = 0.1, max_tokens: int = 512) -> str:
22
+ """Call the remote API with error handling"""
23
+ if not message or not message.strip():
24
+ return "⚠️ Please enter a message"
25
+
26
  try:
27
  response = requests.post(
28
  f"{API_URL}/api/chat",
29
  json={
30
  "messages": [{"role": "user", "content": message}],
31
  "temperature": temperature,
32
+ "max_tokens": max_tokens
 
33
  },
34
+ timeout=90,
35
+ headers={
36
+ "Content-Type": "application/json",
37
+ "ngrok-skip-browser-warning": "true" # Skip ngrok warning page
38
+ }
39
  )
40
+
41
  if response.status_code == 200:
42
  result = response.json()
43
  return result.get("response", "No response from API")
44
+ elif response.status_code == 503:
45
+ return "πŸ”§ Backend service unavailable. Please ensure your local server is running."
46
  else:
47
+ return f"❌ API Error ({response.status_code}): {response.text[:200]}"
48
+
49
  except requests.exceptions.Timeout:
50
+ return "⏱️ Request timed out. The model might be processing a complex request or the server is down."
51
  except requests.exceptions.ConnectionError:
52
+ return f"πŸ”Œ Cannot connect to API at {API_URL}. Please verify:\n1. Local server is running\n2. Tunnel (ngrok/cloudflare) is active\n3. API_URL is correct"
53
  except Exception as e:
54
+ return f"⚠️ Unexpected error: {str(e)}"
55
 
56
  def check_api_status() -> str:
57
+ """Check if the API is reachable and healthy"""
58
  try:
59
+ response = requests.get(
60
+ f"{API_URL}/health",
61
+ timeout=5,
62
+ headers={"ngrok-skip-browser-warning": "true"}
63
+ )
64
+
65
  if response.status_code == 200:
66
  data = response.json()
67
+ status = data.get("status", "unknown")
68
+ if status == "ok":
69
+ return "βœ… Connected - Backend Ready"
70
  else:
71
+ return f"⚠️ Connected but status: {status}"
72
  else:
73
  return f"❌ API returned status {response.status_code}"
74
+
75
+ except requests.exceptions.ConnectionError:
76
+ return f"πŸ”΄ Cannot reach {API_URL} - Check tunnel status"
77
+ except requests.exceptions.Timeout:
78
+ return "⏱️ Health check timed out"
79
+ except Exception as e:
80
+ return f"❌ Error: {str(e)}"
81
 
82
+ # ============================================================================
83
+ # Gradio Interface
84
+ # ============================================================================
85
+ def respond(message: str, history: List[Tuple[str, str]], temperature: float, max_tokens: int):
86
+ """Handle chat responses"""
87
+ if not message.strip():
88
+ return history
89
+
90
+ # Add user message
91
+ history.append((message, None))
92
+
93
+ # Get bot response
94
+ bot_response = call_api(message, temperature, max_tokens)
95
+
96
+ # Update with bot response
97
+ history[-1] = (message, bot_response)
98
 
99
+ return history
100
+
101
+ def apply_template(template: str, history: List[Tuple[str, str]]) -> tuple:
102
+ """Apply a code template"""
103
+ templates = {
104
+ "Explain Code": "Please explain the following code in detail:\n```\n# Paste your code here\n```",
105
+ "Debug Code": "I have a bug in my code. Can you help me debug it?\n```\n# Paste your buggy code here\n```",
106
+ "Write Function": "Please write a function that: [describe what you need]",
107
+ "Optimize Code": "Can you optimize this code for better performance?\n```\n# Paste your code here\n```",
108
+ "Add Comments": "Please add clear comments to this code:\n```\n# Paste your code here\n```"
109
+ }
110
+ return templates.get(template, ""), history
111
+
112
+ # Create the Gradio interface
113
+ with gr.Blocks(
114
+ title="SuperCoder Pro",
115
+ theme=gr.themes.Soft(primary_hue="indigo"),
116
+ css=".gradio-container {max-width: 1200px !important}"
117
+ ) as demo:
118
+
119
+ gr.Markdown(
120
+ """
121
+ # πŸ€– SuperCoder Pro
122
+ ### AI-Powered Coding Assistant
123
+
124
+ > **Note:** This interface connects to a local backend via tunnel.
125
+ > Make sure your local server is running with `python supercoder.py --mode api --tunnel ngrok`
126
+ """
127
+ )
128
+
129
+ # Status bar
130
  with gr.Row():
131
+ with gr.Column(scale=4):
132
+ status_display = gr.Textbox(
133
+ value=check_api_status(),
134
+ label="πŸ”Œ Backend Status",
135
+ interactive=False,
136
+ show_copy_button=True
137
+ )
138
+ with gr.Column(scale=1):
139
+ refresh_btn = gr.Button("πŸ”„ Refresh", size="sm", variant="secondary")
140
 
141
+ # Main chat interface
142
  with gr.Row():
143
  with gr.Column(scale=3):
144
+ chatbot = gr.Chatbot(
145
+ label="πŸ’¬ Conversation",
146
+ height=500,
147
+ show_copy_button=True,
148
+ avatar_images=(None, "πŸ€–")
149
+ )
150
+
151
  with gr.Row():
152
+ msg_input = gr.Textbox(
153
+ placeholder="Ask me to write, explain, debug, or review code...",
154
+ scale=5,
155
+ lines=2,
156
+ show_label=False,
157
+ autofocus=True
158
+ )
159
  send_btn = gr.Button("Send πŸš€", scale=1, variant="primary")
160
 
161
+ # Settings sidebar
162
  with gr.Column(scale=1):
163
+ gr.Markdown("### βš™οΈ Model Settings")
164
+
165
+ temperature = gr.Slider(
166
+ minimum=0.0,
167
+ maximum=1.0,
168
+ value=0.1,
169
+ step=0.05,
170
+ label="🌑️ Temperature",
171
+ info="Lower = more focused, Higher = more creative"
172
+ )
173
+
174
+ max_tokens = gr.Slider(
175
+ minimum=128,
176
+ maximum=2048,
177
+ value=512,
178
+ step=128,
179
+ label="πŸ“ Max Tokens",
180
+ info="Maximum response length"
181
+ )
182
+
183
+ gr.Markdown("---")
184
+ gr.Markdown("### 🎯 Quick Templates")
185
+
186
+ template_dropdown = gr.Dropdown(
187
+ choices=[
188
+ "Explain Code",
189
+ "Debug Code",
190
+ "Write Function",
191
+ "Optimize Code",
192
+ "Add Comments"
193
+ ],
194
+ label="Select Template",
195
+ value="Explain Code"
196
+ )
197
+
198
+ use_template_btn = gr.Button("πŸ“ Use Template", size="sm", variant="secondary")
199
+
200
+ gr.Markdown("---")
201
+
202
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="stop", size="sm")
203
+
204
+ gr.Markdown("---")
205
+ gr.Markdown(
206
+ f"""
207
+ ### πŸ“‘ Connection Info
208
+ **API Endpoint:**
209
+ `{API_URL}`
210
+
211
+ **Tunnel Status:**
212
+ Check status above ⬆️
213
+ """
214
+ )
215
 
216
  # Event handlers
217
+ msg_input.submit(
218
+ respond,
219
+ inputs=[msg_input, chatbot, temperature, max_tokens],
220
+ outputs=[chatbot]
221
+ ).then(
222
+ lambda: "",
223
+ outputs=[msg_input]
224
+ )
225
+
226
+ send_btn.click(
227
+ respond,
228
+ inputs=[msg_input, chatbot, temperature, max_tokens],
229
+ outputs=[chatbot]
230
+ ).then(
231
+ lambda: "",
232
+ outputs=[msg_input]
233
+ )
234
+
235
+ refresh_btn.click(
236
+ check_api_status,
237
+ outputs=[status_display]
238
+ )
239
+
240
+ use_template_btn.click(
241
+ apply_template,
242
+ inputs=[template_dropdown, chatbot],
243
+ outputs=[msg_input, chatbot]
244
+ )
245
+
246
+ clear_btn.click(
247
+ lambda: [],
248
+ outputs=[chatbot]
249
+ )
250
 
251
+ # ============================================================================
252
+ # Launch Configuration for HF Spaces
253
+ # ============================================================================
254
  if __name__ == "__main__":
255
+ demo.launch(
256
+ server_name="0.0.0.0", # Required for HF Spaces
257
+ server_port=7860, # Required for HF Spaces
258
+ show_error=True,
259
+ show_api=False,
260
+ share=False # Don't create gradio.live share link
261
+ )