Kai Izumoto commited on
Commit
7df3a4b
Β·
verified Β·
1 Parent(s): 7e77368

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +535 -191
app.py CHANGED
@@ -1,3 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  SuperCoder - Hugging Face Spaces Frontend
3
  Connects to your local API server via tunnel
@@ -11,17 +490,8 @@ from typing import List, Tuple
11
  # ============================================================================
12
  API_URL = "https://inge-chalcographic-helene.ngrok-free.dev"
13
 
14
- # Example URLs:
15
- # ngrok: https://abc123.ngrok-free.app
16
- # cloudflare: https://abc123.trycloudflare.com
17
-
18
- # ============================================================================
19
  # API Client Functions
20
- # ============================================================================
21
  def call_api(message: str, temperature: float = 0.1, max_tokens: int = 512) -> str:
22
- """
23
- Call the SuperCoder API running on your local machine.
24
- """
25
  try:
26
  response = requests.post(
27
  f"{API_URL}/api/chat",
@@ -33,25 +503,19 @@ def call_api(message: str, temperature: float = 0.1, max_tokens: int = 512) -> s
33
  },
34
  timeout=60
35
  )
36
-
37
  if response.status_code == 200:
38
  result = response.json()
39
  return result.get("response", "No response from API")
40
  else:
41
  return f"❌ API Error ({response.status_code}): {response.text}"
42
-
43
  except requests.exceptions.Timeout:
44
  return "⏱️ Request timed out. The model might be processing a complex request."
45
-
46
  except requests.exceptions.ConnectionError:
47
  return "πŸ”Œ Connection failed. Please ensure your local API server is running."
48
-
49
  except Exception as e:
50
  return f"⚠️ Error: {str(e)}"
51
 
52
-
53
  def check_api_status() -> str:
54
- """Check if the API is reachable."""
55
  try:
56
  response = requests.get(f"{API_URL}/health", timeout=5)
57
  if response.status_code == 200:
@@ -65,192 +529,72 @@ def check_api_status() -> str:
65
  except:
66
  return "πŸ”΄ Not connected to API"
67
 
68
-
69
- # ============================================================================
70
- # Gradio Interface
71
- # ============================================================================
72
- def chat_interface(message: str, history: List[Tuple[str, str]],
73
- temperature: float, max_tokens: int) -> Tuple[List[Tuple[str, str]], str]:
74
- """Handle chat interaction."""
75
- if not message.strip():
76
- return history, ""
77
-
78
- # Add user message to history
79
- history = history + [(message, None)]
80
-
81
- # Get AI response
82
- response = call_api(message, temperature, max_tokens)
83
-
84
- # Update history with response
85
- history[-1] = (message, response)
86
-
87
- return history, ""
88
-
89
-
90
- # ============================================================================
91
- # Quick Action Templates
92
- # ============================================================================
93
- QUICK_ACTIONS = {
94
- "Explain Code": "Explain the following code:\n\n```python\n# PASTE YOUR CODE HERE\n```",
95
- "Debug Code": "Help me debug this code:\n\n```python\n# PASTE YOUR CODE HERE\n```",
96
- "Write Function": "Write a Python function that:",
97
- "Optimize Code": "Optimize this code for better performance:\n\n```python\n# PASTE YOUR CODE HERE\n```",
98
- "Add Comments": "Add detailed comments to this code:\n\n```python\n# PASTE YOUR CODE HERE\n```",
99
- }
100
-
101
- def use_template(template_name: str) -> str:
102
- """Return the selected template."""
103
- return QUICK_ACTIONS.get(template_name, "")
104
-
105
-
106
- # ============================================================================
107
- # Build Gradio UI
108
- # ============================================================================
109
- with gr.Blocks(
110
- title="SuperCoder Pro",
111
- theme=gr.themes.Soft(primary_hue="indigo"),
112
- css="""
113
- .container { max-width: 1200px; margin: auto; }
114
- .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; }
115
- .status-connected { background-color: #d4edda; }
116
- .status-disconnected { background-color: #f8d7da; }
117
- """
118
- ) as demo:
119
-
120
- # Header
121
- gr.Markdown("""
122
- # πŸ€– SuperCoder Pro
123
- ### AI-Powered Coding Assistant
124
-
125
- Your personal AI coding assistant powered by local hardware. Ask me to write,
126
- explain, debug, or optimize code!
127
-
128
- ---
129
- """)
130
-
131
- # API Status
132
  with gr.Row():
133
- status_text = gr.Textbox(
134
- value=check_api_status(),
135
- label="πŸ”Œ API Status",
136
- interactive=False,
137
- show_label=True
138
- )
139
  refresh_btn = gr.Button("πŸ”„ Refresh Status", size="sm")
140
-
141
- refresh_btn.click(
142
- fn=check_api_status,
143
- outputs=status_text
144
- )
145
-
146
- # Main Interface
147
  with gr.Row():
148
- # Left Column: Chat
149
  with gr.Column(scale=3):
150
- chatbot = gr.Chatbot(
151
- label="πŸ’¬ Conversation",
152
- height=500,
153
- show_copy_button=True,
154
- avatar_images=(None, "πŸ€–")
155
- )
156
-
157
  with gr.Row():
158
- msg_input = gr.Textbox(
159
- placeholder="Ask me to write, explain, debug, or review code...",
160
- show_label=False,
161
- scale=5,
162
- lines=2
163
- )
164
  send_btn = gr.Button("Send πŸš€", scale=1, variant="primary")
165
-
166
- # Right Column: Settings & Actions
167
  with gr.Column(scale=1):
168
  gr.Markdown("### βš™οΈ Settings")
169
-
170
- temperature = gr.Slider(
171
- 0.0, 1.0,
172
- value=0.1,
173
- step=0.05,
174
- label="🌑️ Temperature",
175
- info="Lower = precise, Higher = creative"
176
- )
177
-
178
- max_tokens = gr.Slider(
179
- 128, 2048,
180
- value=512,
181
- step=128,
182
- label="πŸ“ Max Tokens",
183
- info="Response length limit"
184
- )
185
-
186
  gr.Markdown("### 🎯 Quick Actions")
187
-
188
- template_dropdown = gr.Dropdown(
189
- choices=list(QUICK_ACTIONS.keys()),
190
- label="Select Template",
191
- value=None
192
- )
193
-
194
  use_template_btn = gr.Button("Use Template", size="sm")
195
-
196
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="stop", size="sm")
 
 
 
 
 
 
 
197
 
198
- # Event Handlers
199
- msg_input.submit(
200
- fn=chat_interface,
201
- inputs=[msg_input, chatbot, temperature, max_tokens],
202
- outputs=[chatbot, msg_input]
203
- )
204
-
205
- send_btn.click(
206
- fn=chat_interface,
207
- inputs=[msg_input, chatbot, temperature, max_tokens],
208
- outputs=[chatbot, msg_input]
209
- )
210
-
211
- use_template_btn.click(
212
- fn=use_template,
213
- inputs=[template_dropdown],
214
- outputs=[msg_input]
215
- )
216
-
217
- clear_btn.click(
218
- fn=lambda: ([], ""),
219
- outputs=[chatbot, msg_input]
220
- )
221
-
222
- # Footer
223
- gr.Markdown("""
224
- ---
225
-
226
- ### πŸ’‘ Tips
227
- - **Be specific** in your requests for better results
228
- - **Paste code** directly in your messages
229
- - Use **templates** for common tasks
230
- - Adjust **temperature** for more creative or precise outputs
231
-
232
- ### ⚠️ Important
233
- This Space connects to a **locally-running** AI model via tunnel.
234
- If you see connection errors, the local server may be offline.
235
-
236
- ### πŸ”’ Privacy
237
- - All processing happens on the owner's local machine
238
- - No data is stored by Hugging Face
239
- - Each chat session is independent
240
-
241
- ---
242
-
243
- **Built with ❀️ using llama.cpp and Gradio**
244
- """)
245
-
246
-
247
- # ============================================================================
248
- # HF Spaces Launch Configuration
249
- # ============================================================================
250
  if __name__ == "__main__":
251
- demo.launch(
252
- server_name="0.0.0.0",
253
- server_port=7860,
254
- show_error=True,
255
- share=False # Important for HF Spaces compatibility
256
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 🚨 Hugging Face Space 503 Error - Need Code Fix
2
+
3
+ I have a persistent 503 "Something went wrong when restarting this Space" error with my Hugging Face Space. The local server works perfectly, but the HF deployment fails. Here's my complete setup:
4
+
5
+ ### πŸ“‹ Project Overview
6
+ **SuperCoder** - AI coding assistant running locally with Ollama, accessible via HF Space web interface
7
+ - **Local Backend:** Python server using Ollama models
8
+ - **Web Frontend:** Gradio interface on Hugging Face Space
9
+ - **Connection:** ngrok tunnel from HF to local server
10
+
11
+ ### 🏠 Complete Local Setup (All Working)
12
+
13
+ #### 1. requirements.txt (Local)
14
+ ```txt
15
+ gradio>=4.0.0
16
+ requests>=2.25.0
17
+ 2. config.py (Local - 3,798 bytes)
18
+ python
19
+ """
20
+ Configuration and constants for SuperCoder.
21
+ Centralized settings for easy maintenance and deployment.
22
+ """
23
+ import os
24
+ from pathlib import Path
25
+
26
+ # ============================================================================
27
+ # Model Configuration (llama.cpp Server)
28
+ # ============================================================================
29
+ LLAMA_SERVER_PATH = "/Users/izumotofam/llama.cpp/build/bin/llama-server"
30
+ LLAMA_MODEL = "llama2:latest"
31
+ LLAMA_SERVER_HOST = "127.0.0.1"
32
+ LLAMA_SERVER_PORT = 8080
33
+ LLAMA_SERVER_URL = "http://localhost:11434"
34
+
35
+ # Server startup settings
36
+ SERVER_STARTUP_TIMEOUT = 30 # seconds to wait for server to start
37
+ SERVER_HEALTH_CHECK_INTERVAL = 0.5 # seconds between health checks
38
+
39
+ # Model parameters
40
+ MODEL_THREADS = 1
41
+ MODEL_CONTEXT_WINDOW = 1024
42
+ MODEL_GPU_LAYERS = 0
43
+ MODEL_BATCH_SIZE = 64
44
+
45
+ # Gradio UI settings
46
+ SERVER_NAME = "127.0.0.1"
47
+ SERVER_PORT = 7860
48
+ APP_TITLE = "SuperCoder Pro"
49
+ APP_DESCRIPTION = "AI-Powered Coding Assistant"
50
+ CHAT_HEIGHT = 500
51
+
52
+ # Model settings
53
+ DEFAULT_TEMPERATURE = 0.1
54
+ DEFAULT_MAX_TOKENS = 512
55
+ DEFAULT_TOP_P = 0.9
56
+ MIN_TOKENS = 128
57
+ SAFE_MAX_TOKENS_CAP = 2048
58
+ SYSTEM_OVERHEAD_TOKENS = 100
59
+ 3. supercoder.py (Local Backend - 413 lines - DON'T upload to HF)
60
+ python
61
+ """
62
+ SuperCoder - Unified Application
63
+ All-in-one file containing Gradio UI, API server, tunnel support, and AI logic.
64
+ """
65
+ import os
66
+ import sys
67
+ import time
68
+ import uuid
69
+ import argparse
70
+ import subprocess
71
+ import traceback
72
+ import requests
73
+ import json
74
+ from pathlib import Path
75
+ from typing import Optional, List, Dict, Any, Generator, Tuple
76
+ from collections import defaultdict
77
+ from functools import partial
78
+ from multiprocessing import Process
79
+
80
+ import gradio as gr
81
+ from fastapi import FastAPI, HTTPException
82
+ from fastapi.middleware.cors import CORSMiddleware
83
+ from pydantic import BaseModel
84
+ import uvicorn
85
+
86
+ # Import config (only external dependency)
87
+ from config import *
88
+
89
+ # ============================================================================
90
+ # SERVER MANAGER - llama.cpp server lifecycle
91
+ # ============================================================================
92
+ _server_process = None
93
+ _server_info = {}
94
+
95
+ def check_server_health() -> bool:
96
+ try:
97
+ # Check if Ollama is responding
98
+ response = requests.get(f"{LLAMA_SERVER_URL}/api/tags", timeout=2)
99
+ return response.status_code == 200 and len(response.json().get("models", [])) > 0
100
+ except:
101
+ return False
102
+
103
+ def start_llama_server() -> bool:
104
+ global _server_process, _server_info
105
+
106
+ if _server_process and check_server_health():
107
+ return True
108
+
109
+ print(f"\nπŸš€ Starting llama.cpp server on {LLAMA_SERVER_URL}")
110
+
111
+ try:
112
+ cmd = [
113
+ LLAMA_SERVER_PATH, "-hf", LLAMA_MODEL,
114
+ "-c", str(MODEL_CONTEXT_WINDOW),
115
+ "-t", str(MODEL_THREADS),
116
+ "-ngl", str(MODEL_GPU_LAYERS),
117
+ "--host", LLAMA_SERVER_HOST, "--port", str(LLAMA_SERVER_PORT)
118
+ ]
119
+
120
+ _server_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
121
+ _server_info = {'pid': _server_process.pid, 'url': LLAMA_SERVER_URL}
122
+
123
+ # Wait for ready
124
+ for _ in range(SERVER_STARTUP_TIMEOUT * 2):
125
+ if check_server_health():
126
+ print(f"βœ… Server ready (PID: {_server_process.pid})")
127
+ return True
128
+ time.sleep(0.5)
129
+
130
+ return False
131
+ except Exception as e:
132
+ print(f"❌ Server start failed: {e}")
133
+ return False
134
+
135
+ def stop_llama_server():
136
+ global _server_process
137
+ if _server_process:
138
+ _server_process.terminate()
139
+ _server_process.wait()
140
+ _server_process = None
141
+
142
+ def get_llm():
143
+ return True if check_server_health() else None
144
+
145
+ def get_model_info():
146
+ return _server_info.copy()
147
+
148
+ # ============================================================================
149
+ # SESSION MANAGER - Chat history
150
+ # ============================================================================
151
+ SESSION_STORE = {}
152
+ SESSION_METADATA = defaultdict(dict)
153
+
154
+ def get_session_id(request: gr.Request) -> str:
155
+ return request.session_hash
156
+
157
+ def get_history(session_id: str, create_if_missing: bool = False) -> List[Dict]:
158
+ if session_id not in SESSION_STORE and create_if_missing:
159
+ SESSION_STORE[session_id] = []
160
+ return SESSION_STORE.get(session_id, [])
161
+
162
+ def add_to_history(session_id: str, role: str, text: str):
163
+ history = get_history(session_id, create_if_missing=True)
164
+ history.append({"role": role, "text": text, "timestamp": time.time()})
165
+
166
+ def clear_history(session_id: str):
167
+ if session_id in SESSION_STORE:
168
+ SESSION_STORE[session_id] = []
169
+
170
+ def convert_history_to_gradio_messages(history: List[Dict]) -> List[Dict]:
171
+ return [{"role": msg["role"], "content": msg["text"]} for msg in history]
172
+
173
+ def calculate_safe_max_tokens(history: List[Dict], requested: int, max_context: int) -> int:
174
+ history_chars = sum(len(msg["text"]) for msg in history)
175
+ estimated_tokens = history_chars // 4
176
+ available = max_context - estimated_tokens - SYSTEM_OVERHEAD_TOKENS
177
+ return max(min(requested, available, SAFE_MAX_TOKENS_CAP), MIN_TOKENS)
178
+
179
+ def get_recent_history(session_id: str, max_messages: int = 10) -> List[Dict]:
180
+ history = get_history(session_id)
181
+ return history[-max_messages:] if len(history) > max_messages else history
182
+
183
+ def update_session_activity(session_id: str):
184
+ SESSION_METADATA[session_id]['last_activity'] = time.time()
185
+
186
+ # ============================================================================
187
+ # GENERATION - AI response generation
188
+ # ============================================================================
189
+ def generate_response_stream(session_id: str, user_message: str, max_tokens: int,
190
+ temperature: float, stream: bool = True) -> Generator[str, None, None]:
191
+ if not get_llm():
192
+ yield "⚠️ Server not running"
193
+ return
194
+
195
+ update_session_activity(session_id)
196
+ recent_history = get_recent_history(session_id, max_messages=6)
197
+ safe_tokens = calculate_safe_max_tokens(recent_history, max_tokens, MODEL_CONTEXT_WINDOW)
198
+
199
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
200
+ for msg in recent_history:
201
+ messages.append({"role": msg["role"], "content": msg["text"]})
202
+ messages.append({"role": "user", "content": user_message})
203
+
204
+ try:
205
+ payload = {
206
+ "messages": messages, "max_tokens": safe_tokens,
207
+ "temperature": max(0.01, temperature),
208
+ "top_p": DEFAULT_TOP_P, "stream": stream
209
+ }
210
+
211
+ if stream:
212
+ response = requests.post(f"{LLAMA_SERVER_URL}/v1/chat/completions",
213
+ json=payload, stream=True, timeout=300)
214
+ full_response = ""
215
+ for line in response.iter_lines():
216
+ if line:
217
+ line_text = line.decode('utf-8')
218
+ if line_text.startswith('data: '):
219
+ line_text = line_text[6:]
220
+ if line_text.strip() == '[DONE]':
221
+ break
222
+ try:
223
+ chunk = json.loads(line_text)
224
+ content = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
225
+ if content:
226
+ full_response += content
227
+ yield full_response.strip()
228
+ except:
229
+ continue
230
+ else:
231
+ # Use Ollama API format instead of OpenAI format
232
+ ollama_payload = {
233
+ "model": LLAMA_MODEL,
234
+ "messages": messages,
235
+ "stream": False
236
+ }
237
+ response = requests.post(f"{LLAMA_SERVER_URL}/api/chat",
238
+ json=ollama_payload, timeout=300)
239
+ yield response.json()["message"]["content"].strip()
240
+
241
+ except Exception as e:
242
+ yield f"⚠️ Error: {str(e)}"
243
+
244
+ # ============================================================================
245
+ # GRADIO UI COMPONENTS
246
+ # ============================================================================
247
+ def create_gradio_interface(error_msg: Optional[str] = None):
248
+ with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft(primary_hue=PRIMARY_HUE)) as demo:
249
+ gr.Markdown(f"# πŸ€– {APP_TITLE}\n### {APP_DESCRIPTION}\n---")
250
+
251
+ if error_msg:
252
+ gr.Markdown(f"⚠️ {error_msg}")
253
+
254
+ with gr.Row():
255
+ with gr.Column(scale=3):
256
+ chatbot = gr.Chatbot(label="πŸ’¬ Conversation", height=CHAT_HEIGHT,
257
+ type="messages", show_copy_button=True)
258
+ with gr.Row():
259
+ txt_input = gr.Textbox(placeholder="Ask me about code...",
260
+ show_label=False, scale=5, lines=2)
261
+ send_btn = gr.Button("Send πŸš€", scale=1, variant="primary")
262
+
263
+ with gr.Column(scale=1):
264
+ gr.Markdown("### βš™οΈ Settings")
265
+ temp_slider = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05,
266
+ label="🌑️ Temperature")
267
+ tokens_slider = gr.Slider(MIN_TOKENS, SAFE_MAX_TOKENS_CAP,
268
+ value=DEFAULT_MAX_TOKENS, step=128, label="πŸ“ Max Tokens")
269
+ stream_checkbox = gr.Checkbox(label="⚑ Stream", value=True)
270
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="stop", size="sm")
271
+
272
+ session_state = gr.State()
273
+
274
+ # Event handlers
275
+ def handle_message(session_id, msg, temp, tokens, stream, request: gr.Request):
276
+ session_id = session_id or get_session_id(request)
277
+ if not msg.strip():
278
+ return session_id, convert_history_to_gradio_messages(get_history(session_id)), ""
279
+
280
+ add_to_history(session_id, "user", msg)
281
+ yield session_id, convert_history_to_gradio_messages(get_history(session_id)), ""
282
+
283
+ full_response = ""
284
+ for partial in generate_response_stream(session_id, msg, tokens, temp, stream):
285
+ full_response = partial
286
+ temp_hist = get_history(session_id).copy()
287
+ temp_hist.append({"role": "assistant", "text": full_response})
288
+ yield session_id, convert_history_to_gradio_messages(temp_hist), ""
289
+
290
+ add_to_history(session_id, "assistant", full_response)
291
+ yield session_id, convert_history_to_gradio_messages(get_history(session_id)), ""
292
+
293
+ def handle_clear(session_id, request: gr.Request):
294
+ session_id = session_id or get_session_id(request)
295
+ clear_history(session_id)
296
+ return session_id, [], ""
297
+
298
+ txt_input.submit(handle_message, [session_state, txt_input, temp_slider, tokens_slider, stream_checkbox],
299
+ [session_state, chatbot, txt_input])
300
+ send_btn.click(handle_message, [session_state, txt_input, temp_slider, tokens_slider, stream_checkbox],
301
+ [session_state, chatbot, txt_input])
302
+ clear_btn.click(handle_clear, [session_state], [session_state, chatbot, txt_input])
303
+
304
+ return demo
305
+
306
+ # ============================================================================
307
+ # FASTAPI SERVER
308
+ # ============================================================================
309
+ api_app = FastAPI(title="SuperCoder API")
310
+ api_app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
311
+
312
+ api_sessions = {}
313
+
314
+ class ChatMessage(BaseModel):
315
+ role: str
316
+ content: str
317
+
318
+ class ChatRequest(BaseModel):
319
+ messages: List[ChatMessage]
320
+ temperature: Optional[float] = 0.1
321
+ max_tokens: Optional[int] = 512
322
+
323
+ class ChatResponse(BaseModel):
324
+ response: str
325
+ session_id: str
326
+
327
+ @api_app.get("/health")
328
+ async def health():
329
+ return {"status": "ok" if get_llm() else "model_not_loaded"}
330
+
331
+ @api_app.post("/api/chat", response_model=ChatResponse)
332
+ async def chat(request: ChatRequest):
333
+ if not get_llm():
334
+ raise HTTPException(503, "Model not loaded")
335
+
336
+ session_id = str(uuid.uuid4())
337
+ api_sessions[session_id] = []
338
+
339
+ user_message = request.messages[-1].content
340
+ api_sessions[session_id].append({"role": "user", "text": user_message})
341
+
342
+ full_response = ""
343
+ for partial in generate_response_stream(session_id, user_message, request.max_tokens,
344
+ request.temperature, False):
345
+ full_response = partial
346
+
347
+ api_sessions[session_id].append({"role": "assistant", "text": full_response})
348
+ return ChatResponse(response=full_response, session_id=session_id)
349
+
350
+ def run_api_server():
351
+ uvicorn.run(api_app, host="0.0.0.0", port=8000, log_level="info")
352
+
353
+ # ============================================================================
354
+ # TUNNEL SUPPORT
355
+ # ============================================================================
356
+ def start_ngrok_tunnel(port: int = 8000) -> Optional[str]:
357
+ try:
358
+ subprocess.run(["which", "ngrok"], capture_output=True, check=True)
359
+ subprocess.Popen(["ngrok", "http", str(port)], stdout=subprocess.PIPE)
360
+ time.sleep(3)
361
+
362
+ response = requests.get("http://127.0.0.1:4040/api/tunnels", timeout=5)
363
+ tunnels = response.json()
364
+ if tunnels.get("tunnels"):
365
+ url = tunnels["tunnels"][0]["public_url"]
366
+ print(f"βœ… Tunnel: {url}")
367
+ return url
368
+ except:
369
+ print("❌ ngrok not found. Install: brew install ngrok")
370
+ return None
371
+
372
+ def start_cloudflare_tunnel(port: int = 8000) -> Optional[str]:
373
+ try:
374
+ subprocess.run(["which", "cloudflared"], capture_output=True, check=True)
375
+ proc = subprocess.Popen(["cloudflared", "tunnel", "--url", f"http://localhost:{port}"],
376
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
377
+ time.sleep(3)
378
+
379
+ for _ in range(30):
380
+ line = proc.stdout.readline()
381
+ if "trycloudflare.com" in line:
382
+ import re
383
+ urls = re.findall(r'https://[^\s]+\.trycloudflare\.com', line)
384
+ if urls:
385
+ print(f"βœ… Tunnel: {urls[0]}")
386
+ return urls[0]
387
+ time.sleep(1)
388
+ except:
389
+ print("❌ cloudflared not found. Install: brew install cloudflared")
390
+ return None
391
+
392
+ # ============================================================================
393
+ # MAIN LAUNCHER
394
+ # ============================================================================
395
+ def main():
396
+ parser = argparse.ArgumentParser(description="SuperCoder - All-in-One AI Coding Assistant")
397
+ parser.add_argument("--mode", choices=["gradio", "api", "both"], default="gradio",
398
+ help="Run mode: gradio (UI), api (server), or both")
399
+ parser.add_argument("--tunnel", choices=["ngrok", "cloudflare"],
400
+ help="Start tunnel for public access")
401
+ parser.add_argument("--no-server", action="store_true",
402
+ help="Don't start llama.cpp server (assume already running)")
403
+
404
+ args = parser.parse_args()
405
+
406
+ print("╔════════════════════════════════════════════════╗")
407
+ print("β•‘ SuperCoder - Unified Launcher β•‘")
408
+ print("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•")
409
+
410
+ # Start llama.cpp server
411
+ if not args.no_server:
412
+ success = start_llama_server()
413
+ error_msg = None if success else "Failed to start llama.cpp server"
414
+ else:
415
+ error_msg = None
416
+
417
+ # Run selected mode
418
+ if args.mode == "gradio":
419
+ print(f"\nπŸ“Œ Mode: Gradio UI\n🌐 Access: http://localhost:{SERVER_PORT}\n")
420
+ demo = create_gradio_interface(error_msg)
421
+ demo.launch(server_name=SERVER_NAME, server_port=SERVER_PORT)
422
+
423
+ elif args.mode == "api":
424
+ print(f"\nπŸ“Œ Mode: API Server\nπŸ“‘ API: http://localhost:8000/api/chat\n")
425
+
426
+ if args.tunnel:
427
+ api_proc = Process(target=run_api_server)
428
+ api_proc.start()
429
+ time.sleep(3)
430
+
431
+ if args.tunnel == "ngrok":
432
+ start_ngrok_tunnel(8000)
433
+ else:
434
+ start_cloudflare_tunnel(8000)
435
+
436
+ try:
437
+ api_proc.join()
438
+ except KeyboardInterrupt:
439
+ api_proc.terminate()
440
+ else:
441
+ run_api_server()
442
+
443
+ elif args.mode == "both":
444
+ print(f"\nπŸ“Œ Mode: Both Gradio + API\n🎨 UI: http://localhost:{SERVER_PORT}\nπŸ“‘ API: http://localhost:8000\n")
445
+
446
+ gradio_proc = Process(target=lambda: create_gradio_interface(error_msg).launch(
447
+ server_name=SERVER_NAME, server_port=SERVER_PORT))
448
+ api_proc = Process(target=run_api_server)
449
+
450
+ gradio_proc.start()
451
+ api_proc.start()
452
+
453
+ if args.tunnel:
454
+ time.sleep(3)
455
+ if args.tunnel == "ngrok":
456
+ start_ngrok_tunnel(8000)
457
+ else:
458
+ start_cloudflare_tunnel(8000)
459
+
460
+ try:
461
+ gradio_proc.join()
462
+ api_proc.join()
463
+ except KeyboardInterrupt:
464
+ gradio_proc.terminate()
465
+ api_proc.terminate()
466
+
467
+ if __name__ == "__main__":
468
+ try:
469
+ main()
470
+ except KeyboardInterrupt:
471
+ print("\nπŸ‘‹ Shutting down...")
472
+ stop_llama_server()
473
+ 🌐 Hugging Face Space Setup (Currently Broken)
474
+ Current HF Space Files:
475
+ Space: https://huggingface.co/spaces/SeccondDefense/Advanced_AI_Coder
476
+ Error: 503 Something went wrong when restarting this Space
477
+ Request ID: Root=1-68e635bc-446c06181ed65dee7e15e4f5
478
+ 4. app.py (HF Frontend - Currently uploaded to HF)
479
+ python
480
  """
481
  SuperCoder - Hugging Face Spaces Frontend
482
  Connects to your local API server via tunnel
 
490
  # ============================================================================
491
  API_URL = "https://inge-chalcographic-helene.ngrok-free.dev"
492
 
 
 
 
 
 
493
  # API Client Functions
 
494
  def call_api(message: str, temperature: float = 0.1, max_tokens: int = 512) -> str:
 
 
 
495
  try:
496
  response = requests.post(
497
  f"{API_URL}/api/chat",
 
503
  },
504
  timeout=60
505
  )
 
506
  if response.status_code == 200:
507
  result = response.json()
508
  return result.get("response", "No response from API")
509
  else:
510
  return f"❌ API Error ({response.status_code}): {response.text}"
 
511
  except requests.exceptions.Timeout:
512
  return "⏱️ Request timed out. The model might be processing a complex request."
 
513
  except requests.exceptions.ConnectionError:
514
  return "πŸ”Œ Connection failed. Please ensure your local API server is running."
 
515
  except Exception as e:
516
  return f"⚠️ Error: {str(e)}"
517
 
 
518
  def check_api_status() -> str:
 
519
  try:
520
  response = requests.get(f"{API_URL}/health", timeout=5)
521
  if response.status_code == 200:
 
529
  except:
530
  return "πŸ”΄ Not connected to API"
531
 
532
+ # Gradio Interface (Complete UI)
533
+ with gr.Blocks(title="SuperCoder Pro", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
534
+ gr.Markdown("# πŸ€– SuperCoder Pro\n### AI-Powered Coding Assistant")
535
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  with gr.Row():
537
+ status_text = gr.Textbox(value=check_api_status(), label="πŸ”Œ API Status")
 
 
 
 
 
538
  refresh_btn = gr.Button("πŸ”„ Refresh Status", size="sm")
539
+
 
 
 
 
 
 
540
  with gr.Row():
 
541
  with gr.Column(scale=3):
542
+ chatbot = gr.Chatbot(label="πŸ’¬ Conversation", height=500)
 
 
 
 
 
 
543
  with gr.Row():
544
+ msg_input = gr.Textbox(placeholder="Ask me to write, explain, debug, or review code...", scale=5, lines=2)
 
 
 
 
 
545
  send_btn = gr.Button("Send πŸš€", scale=1, variant="primary")
546
+
 
547
  with gr.Column(scale=1):
548
  gr.Markdown("### βš™οΈ Settings")
549
+ temperature = gr.Slider(0.0, 1.0, value=0.1, step=0.05, label="🌑️ Temperature")
550
+ max_tokens = gr.Slider(128, 2048, value=512, step=128, label="πŸ“ Max Tokens")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  gr.Markdown("### 🎯 Quick Actions")
552
+ template_dropdown = gr.Dropdown(choices=["Explain Code", "Debug Code", "Write Function", "Optimize Code", "Add Comments"], label="Select Template")
 
 
 
 
 
 
553
  use_template_btn = gr.Button("Use Template", size="sm")
 
554
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="stop", size="sm")
555
+
556
+ # Event handlers
557
+ msg_input.submit(lambda m, h, t, mt: (h + [(m, call_api(m, t, mt))], ""),
558
+ [msg_input, chatbot, temperature, max_tokens], [chatbot, msg_input])
559
+ send_btn.click(lambda m, h, t, mt: (h + [(m, call_api(m, t, mt))], ""),
560
+ [msg_input, chatbot, temperature, max_tokens], [chatbot, msg_input])
561
+ refresh_btn.click(lambda: check_api_status(), outputs=status_text)
562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  if __name__ == "__main__":
564
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, share=False)
565
+ πŸ”§ Current Status
566
+ βœ… Working:
567
+ Local server: python supercoder.py --mode api runs successfully
568
+ Ollama: llama2:latest model loaded and responding
569
+ ngrok tunnel: https://inge-chalcographic-helene.ngrok-free.dev active
570
+ File syntax: All Python files parse correctly
571
+ ❌ Broken:
572
+ HF Space: 503 Something went wrong when restarting this Space
573
+ File uploads: May not be working properly via CLI
574
+ Space deployment: Failing to start properly
575
+ 🎯 Expected vs Actual
576
+ Expected Behavior:
577
+
578
+ HF Space loads Gradio interface
579
+ Connects to local server via tunnel
580
+ Users can chat with AI assistant
581
+ Settings and templates work
582
+ Actual Behavior:
583
+
584
+ HF Space shows 503 error
585
+ Cannot access the web interface
586
+ Files may not be properly deployed
587
+ ❓ Request for Fix
588
+ Please analyze all the provided code and tell me:
589
+
590
+ What's causing the 503 error in the HF Space?
591
+ How to fix the code so it works on HF Spaces?
592
+ Are there missing dependencies or configuration issues?
593
+ Should I modify the launch configuration for HF compatibility?
594
+ Is there a better way to structure this for HF deployment?
595
+ Provide the corrected, working code for both local and HF files.
596
+
597
+ The local setup works perfectly - the issue is specifically with HF Space deployment.
598
+
599
+
600
+ ---