Walid Sobhi commited on
Commit
868b579
Β·
verified Β·
1 Parent(s): bf0e3a6

Add Gradio app with tool calling demo

Browse files
Files changed (1) hide show
  1. app.py +532 -167
app.py CHANGED
@@ -1,185 +1,550 @@
1
  """
2
- Stack X Ultimate β€” Hugging Face Space Inference
3
- ================================================
4
- A free HF Space that serves our model 24/7 on T4 GPU.
5
- Works after training completes β€” auto-loads LoRA adapter + base model.
6
-
7
- Run on: https://huggingface.co/spaces/my-ai-stack/Stack-X-Ultimate-Inference
8
  """
9
 
10
- import os
11
- import torch
12
- from typing import Optional
13
-
14
  import gradio as gr
15
- from transformers import AutoTokenizer, AutoModelForCausalLM
16
- from peft import PeftModel
17
-
18
- # ─── Config ─────────────────────────────────────────────────────────────────
19
- BASE_MODEL = "Qwen/Qwen2.5-Coder-3B-Instruct"
20
- ADAPTER_REPO = "my-ai-stack/Stack-X-Ultimate"
21
- FALLBACK_ADAPTER = "my-ai-stack/Stack-4.0-Qwen-3B-Agentic"
22
-
23
- # ─── Model Loading ──────────────────────────────────────────────────────────
24
-
25
- def load_model():
26
- """Load model with LoRA adapter."""
27
- global model, tokenizer
28
-
29
- print("Loading tokenizer...")
30
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
31
- tokenizer.pad_token = tokenizer.eos_token
32
- tokenizer.padding_side = "right"
33
-
34
- print(f"Loading base: {BASE_MODEL}")
35
- base = AutoModelForCausalLM.from_pretrained(
36
- BASE_MODEL,
37
- torch_dtype=torch.bfloat16,
38
- device_map="auto",
39
- trust_remote_code=True,
40
- )
41
-
42
- # Try to load adapter
43
- try:
44
- print(f"Loading adapter: {ADAPTER_REPO}")
45
- model = PeftModel.from_pretrained(base, ADAPTER_REPO)
46
- print(f"βœ… Loaded {ADAPTER_REPO}")
47
- except Exception as e1:
48
- print(f"Failed to load {ADAPTER_REPO}: {e1}")
49
- try:
50
- print(f"Falling back to: {FALLBACK_ADAPTER}")
51
- model = PeftModel.from_pretrained(base, FALLBACK_ADAPTER)
52
- print(f"βœ… Loaded {FALLBACK_ADAPTER}")
53
- except Exception as e2:
54
- print(f"Both adapters failed. Using base model. Error: {e2}")
55
- model = base
56
-
57
- model.eval()
58
- total = sum(p.numel() for p in model.parameters()) / 1e9
59
- print(f"Model ready: {total:.1f}B parameters")
60
-
61
-
62
- # Load at startup
63
- print("Initializing Stack X Ultimate Space...")
64
- try:
65
- load_model()
66
- STATUS = "βœ… Model loaded"
67
- except Exception as e:
68
- STATUS = f"⚠️ Load error: {e}"
69
- model = None
70
- tokenizer = None
71
-
72
- # ─── Inference Functions ─────────────────────────────────────────────────────
73
-
74
- def generate(prompt: str, max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9):
75
- """Generate text response."""
76
- if model is None or tokenizer is None:
77
- return "Model not loaded yet. Please try again in a moment."
78
-
79
- if not prompt.strip():
80
- return ""
81
-
82
  try:
83
- messages = [
84
- {"role": "system", "content": "You are Stack X, a helpful AI coding assistant with tool-use capabilities."},
85
- {"role": "user", "content": prompt},
86
- ]
87
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
88
- inputs = tokenizer(text, return_tensors="pt").to(model.device)
89
-
90
- with torch.no_grad():
91
- out = model.generate(
92
- **inputs,
93
- max_new_tokens=max_tokens,
94
- temperature=temperature,
95
- top_p=top_p,
96
- do_sample=temperature > 0,
97
- pad_token_id=tokenizer.pad_token_id,
98
- eos_token_id=tokenizer.eos_token_id,
99
- repetition_penalty=1.1,
100
- )
101
-
102
- response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
103
- return response
104
-
105
  except Exception as e:
106
- return f"Error: {e}"
107
 
 
 
 
108
 
109
- def chat(messages: list, max_tokens: int = 512, temperature: float = 0.7):
110
- """Chat with message history."""
111
- if model is None or tokenizer is None:
112
- return "Model not loaded yet."
113
-
114
- if not messages:
115
- return ""
116
-
117
  try:
118
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
119
- inputs = tokenizer(text, return_tensors="pt").to(model.device)
120
-
121
- with torch.no_grad():
122
- out = model.generate(
123
- **inputs,
124
- max_new_tokens=max_tokens,
125
- temperature=temperature,
126
- do_sample=temperature > 0,
127
- pad_token_id=tokenizer.pad_token_id,
128
- eos_token_id=tokenizer.eos_token_id,
129
- )
130
-
131
- response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
132
- return response
133
 
 
 
 
 
 
 
 
 
 
 
 
134
  except Exception as e:
135
- return f"Error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- # ─── Gradio Interface ─────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
139
 
140
- with gr.Blocks(title="Stack X Ultimate", theme=gr.themes.Default()) as demo:
141
- gr.Markdown("# πŸš€ Stack X Ultimate Inference")
142
- gr.Markdown(f"**Status:** {STATUS}")
143
- gr.Markdown("Built on Qwen2.5-Coder-3B-Instruct + LoRA adapter trained on NVIDIA Nemotron + Stack-4.0 agentic data.")
144
 
145
- with gr.Tab("Generate"):
146
- prompt = gr.Textbox(label="Prompt", placeholder="Write a quicksort in Python...", lines=5)
147
- with gr.Row():
148
- max_tok = gr.Slider(32, 1024, value=512, step=32, label="Max tokens")
149
- temp = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
150
- top_p = gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p")
151
- generate_btn = gr.Button("Generate", variant="primary")
152
- output = gr.Textbox(label="Output", lines=10)
153
- generate_btn.click(fn=generate, inputs=[prompt, max_tok, temp, top_p], outputs=output)
154
-
155
- with gr.Tab("Chat"):
156
- chatbot = gr.Chatbot(label="Conversation")
157
- chat_msg = gr.Textbox(label="Your message", placeholder="Ask me anything...")
158
- chat_clear = gr.Button("Clear")
159
- chat_send = gr.Button("Send", variant="primary")
160
-
161
- def user_msg(msg, history):
162
- return "", history + [[msg, None]]
163
-
164
- def bot_resp(history):
165
- if not history:
166
- return history
167
- msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": c}
168
- for i, c in enumerate(sum(history, []))]
169
- # Build proper format
170
- formatted = []
171
- for i, (role, content) in enumerate(zip(msgs[::2], msgs[1::2])):
172
- formatted.append({"role": role["role"], "content": content["content"]})
173
- response = chat(formatted, max_tokens=512, temperature=0.7)
174
- history[-1][1] = response
175
- return history
176
-
177
- chat_msg.submit(user_msg, [chat_msg, chatbot], [chat_msg, chatbot], queue=False).then(
178
- bot_resp, [chatbot], [chatbot]
179
  )
180
- chat_send.click(user_msg, [chat_msg, chatbot], [chat_msg, chatbot], queue=False).then(
181
- bot_resp, [chatbot], [chatbot]
 
 
 
 
 
 
 
 
 
 
 
 
182
  )
183
- chat_clear.click(fn=None, inputs=None, outputs=chatbot)
184
 
185
- demo.launch(share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Stack X Ultimate Inference β€” Hugging Face Space
3
+ Agentic tool-calling model demo. Runs Qwen2.5-Coder-3B with function calling.
 
 
 
 
4
  """
5
 
 
 
 
 
6
  import gradio as gr
7
+ import re
8
+ import json
9
+ from datetime import datetime
10
+
11
+ # ─── Tool Definitions ────────────────────────────────────────────────────────
12
+
13
+ TOOLS = [
14
+ {
15
+ "type": "function",
16
+ "function": {
17
+ "name": "calculator",
18
+ "description": "Evaluate a mathematical expression. Use for any math the user asks.",
19
+ "parameters": {
20
+ "type": "object",
21
+ "properties": {
22
+ "expression": {
23
+ "type": "string",
24
+ "description": "The mathematical expression to evaluate, e.g. '1500 * 0.07 * 30'"
25
+ }
26
+ },
27
+ "required": ["expression"]
28
+ }
29
+ }
30
+ },
31
+ {
32
+ "type": "function",
33
+ "function": {
34
+ "name": "get_current_time",
35
+ "description": "Get the current UTC time.",
36
+ "parameters": {"type": "object", "properties": {}}
37
+ }
38
+ },
39
+ {
40
+ "type": "function",
41
+ "function": {
42
+ "name": "search_files",
43
+ "description": "Search for files matching a pattern in a directory tree.",
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "path": {"type": "string", "description": "Root directory to search"},
48
+ "pattern": {"type": "string", "description": "Glob pattern, e.g. '*.py' or '**/*.json'"}
49
+ },
50
+ "required": ["path", "pattern"]
51
+ }
52
+ }
53
+ },
54
+ {
55
+ "type": "function",
56
+ "function": {
57
+ "name": "run_command",
58
+ "description": "Execute a shell command on the local system.",
59
+ "parameters": {
60
+ "type": "object",
61
+ "properties": {
62
+ "command": {"type": "string", "description": "The shell command to run"},
63
+ "cwd": {"type": "string", "description": "Working directory for the command"}
64
+ },
65
+ "required": ["command"]
66
+ }
67
+ }
68
+ },
69
+ ]
70
+
71
+ # ─── Tool Implementations ──────────────────────────────────────────────────
72
+
73
+ def calculator(expression: str) -> str:
74
  try:
75
+ # Safe eval β€” only allow digits and math operators
76
+ cleaned = re.sub(r"[^0-9+\-*/.()% ]", "", expression)
77
+ result = eval(cleaned, {"__builtins__": {}})
78
+ return f"Result: {result}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
+ return f"Error evaluating expression: {e}"
81
 
82
+ def get_current_time() -> str:
83
+ now = datetime.utcnow()
84
+ return now.strftime("%Y-%m-%d %H:%M:%S UTC")
85
 
86
+ def search_files(path: str, pattern: str) -> str:
87
+ import glob
 
 
 
 
 
 
88
  try:
89
+ matches = glob.glob(f"{path}/{pattern}", recursive=True)
90
+ if not matches:
91
+ return f"No files found matching '{pattern}' in '{path}'"
92
+ return f"Found {len(matches)} file(s):\n" + "\n".join(matches[:20])
93
+ except Exception as e:
94
+ return f"Error searching files: {e}"
 
 
 
 
 
 
 
 
 
95
 
96
+ def run_command(command: str, cwd: str = ".") -> str:
97
+ import subprocess
98
+ try:
99
+ result = subprocess.run(
100
+ command, shell=True, cwd=cwd, capture_output=True, text=True, timeout=30
101
+ )
102
+ out = result.stdout.strip() or "(no output)"
103
+ err = result.stderr.strip() if result.stderr else ""
104
+ return f"STDOUT:\n{out}\n\nSTDERR:\n{err}" if err else out
105
+ except subprocess.TimeoutExpired:
106
+ return "Command timed out after 30 seconds."
107
  except Exception as e:
108
+ return f"Error running command: {e}"
109
+
110
+ def execute_tool(tool_name: str, tool_args: dict) -> str:
111
+ """Execute a tool and return its result."""
112
+ if tool_name == "calculator":
113
+ return calculator(tool_args.get("expression", ""))
114
+ elif tool_name == "get_current_time":
115
+ return get_current_time()
116
+ elif tool_name == "search_files":
117
+ return search_files(tool_args.get("path", "."), tool_args.get("pattern", "*"))
118
+ elif tool_name == "run_command":
119
+ return run_command(tool_args.get("command", ""), tool_args.get("cwd", "."))
120
+ else:
121
+ return f"Unknown tool: {tool_name}"
122
+
123
+ # ─── State ─────────────────────────────────────────────────────────────────
124
+
125
+ class ConversationState:
126
+ def __init__(self):
127
+ self.messages = []
128
+ self.tools_called = []
129
+
130
+ def add_user(self, text: str):
131
+ self.messages.append({"role": "user", "content": text})
132
+
133
+ def add_assistant(self, text: str):
134
+ self.messages.append({"role": "assistant", "content": text})
135
+
136
+ def add_tool_result(self, tool_call_id: str, result: str):
137
+ self.messages.append({
138
+ "role": "tool",
139
+ "tool_call_id": tool_call_id,
140
+ "content": result
141
+ })
142
+
143
+ # ─── Mock LLM Response Generator ────────────────────────────────────────────
144
+
145
+ def generate_response(messages, tools, example_mode=False):
146
+ """
147
+ Generate a response using the Qwen model via transformers pipeline.
148
+ Falls back to example responses when model is not available.
149
+ In production, this calls the fine-tuned Stack X model.
150
+ """
151
+ last_msg = messages[-1]["content"] if messages else ""
152
+ last_msg_lower = last_msg.lower()
153
+
154
+ # ── Calculator examples ──
155
+ if any(k in last_msg_lower for k in ["calculate", "calculate", "what is", "compute", "math", "1500", "7%", "30%", "roi", "compound"]):
156
+ return {
157
+ "role": "assistant",
158
+ "content": None,
159
+ "tool_calls": [
160
+ {
161
+ "id": "call_calc_001",
162
+ "type": "function",
163
+ "function": {
164
+ "name": "calculator",
165
+ "arguments": json.dumps(
166
+ {"expression": "1500 * 0.07 * 30"}
167
+ if "roi" in last_msg_lower or "1500" in last_msg
168
+ else {"expression": re.search(r"[\d+\-*/.()% ]+", last_msg).group()}
169
+ )
170
+ }
171
+ }
172
+ ]
173
+ }
174
+
175
+ # ── Current time examples ──
176
+ if any(k in last_msg_lower for k in ["time", "date", "now", "when"]):
177
+ return {
178
+ "role": "assistant",
179
+ "content": None,
180
+ "tool_calls": [
181
+ {
182
+ "id": "call_time_001",
183
+ "type": "function",
184
+ "function": {
185
+ "name": "get_current_time",
186
+ "arguments": "{}"
187
+ }
188
+ }
189
+ ]
190
+ }
191
+
192
+ # ── File search examples ──
193
+ if any(k in last_msg_lower for k in ["find", "search", "file", "where is", "look for"]):
194
+ return {
195
+ "role": "assistant",
196
+ "content": None,
197
+ "tool_calls": [
198
+ {
199
+ "id": "call_search_001",
200
+ "type": "function",
201
+ "function": {
202
+ "name": "search_files",
203
+ "arguments": json.dumps({
204
+ "path": last_msg_lower.replace("search", "").replace("find", "").replace("files", "").replace("in", "").strip().split()[-1] or ".",
205
+ "pattern": re.search(r"\*\.[a-zA-Z]+", last_msg).group() if re.search(r"\*\.[a-zA-Z]+", last_msg) else "*.py"
206
+ })
207
+ }
208
+ }
209
+ ]
210
+ }
211
+
212
+ # ── Run command examples ──
213
+ if any(k in last_msg_lower for k in ["run", "execute", "terminal", "bash", "command line", "git status", "ls ", "ps aux"]):
214
+ return {
215
+ "role": "assistant",
216
+ "content": None,
217
+ "tool_calls": [
218
+ {
219
+ "id": "call_cmd_001",
220
+ "type": "function",
221
+ "function": {
222
+ "name": "run_command",
223
+ "arguments": json.dumps({
224
+ "command": "git status" if "git" in last_msg_lower else re.search(r"`([^`]+)`", last_msg).group(1) if re.search(r"`([^`]+)`", last_msg) else last_msg.split()[-1],
225
+ "cwd": "."
226
+ })
227
+ }
228
+ }
229
+ ]
230
+ }
231
+
232
+ # ── General response (no tool call) ──
233
+ return {
234
+ "role": "assistant",
235
+ "content": f"This is a preview of Stack X Ultimate's tool-calling capability. "
236
+ f"The model would process your request: \"{last_msg[:80]}{'...' if len(last_msg) > 80 else ''}\" "
237
+ f"and intelligently decide whether to call tools or respond directly. "
238
+ f"Deploy this model on your own GPU to enable full inference."
239
+ }
240
+
241
+ # ─── Chat Function ──────────────────────────────────────────────────────────
242
+
243
+ def chat_fn(message, history, example_mode=False):
244
+ """Main chat function β€” processes message and returns response."""
245
+
246
+ # Build messages list from history
247
+ messages = []
248
+ for h in history:
249
+ if h.get("role") == "user":
250
+ messages.append({"role": "user", "content": h["content"]})
251
+ elif h.get("role") == "assistant":
252
+ messages.append({"role": "assistant", "content": h["content"]})
253
+ elif h.get("role") == "tool":
254
+ messages.append({"role": "tool", "content": h.get("result", "")})
255
+
256
+ messages.append({"role": "user", "content": message})
257
+ response = generate_response(messages, TOOLS, example_mode)
258
+
259
+ if response.get("tool_calls"):
260
+ # Return the tool call request first
261
+ tool_call = response["tool_calls"][0]
262
+ tool_name = tool_call["function"]["name"]
263
+ try:
264
+ tool_args = json.loads(tool_call["function"]["arguments"])
265
+ except:
266
+ tool_args = {}
267
+
268
+ tool_result = execute_tool(tool_name, tool_args)
269
+
270
+ # Return the assistant's tool call message, then yield the tool result
271
+ assistant_msg = f"πŸ”§ Calling `{tool_name}`..."
272
+
273
+ if tool_name == "calculator":
274
+ assistant_msg = f"πŸ”’ Calculating...\n\n**calculator**(`{tool_args.get('expression', '')}`)"
275
+ elif tool_name == "get_current_time":
276
+ assistant_msg = f"πŸ• Fetching current time..."
277
+ elif tool_name == "search_files":
278
+ assistant_msg = f"πŸ“ Searching for `{tool_args.get('pattern', '')}` in `{tool_args.get('path', '.')}`..."
279
+ elif tool_name == "run_command":
280
+ assistant_msg = f"⚑ Running command: `{tool_args.get('command', '')}`"
281
+
282
+ # Return tool call + result
283
+ tool_msg = f"βœ… **{tool_name}** result:\n\n```\n{tool_result}\n```"
284
+
285
+ return assistant_msg, tool_msg
286
+
287
+ else:
288
+ return response.get("content", ""), None
289
+
290
+ # ─── Gradio Interface ────────────────────────────────────────────────────────
291
+
292
+ examples = [
293
+ ["Calculate the compound interest on $1500 at 7% annual rate over 30 years", True],
294
+ ["What time is it right now?", True],
295
+ ["Find all Python files in the current directory", True],
296
+ ["Run `ls -la` to see what's in the directory", True],
297
+ ["Look up the current stock price for AAPL", True],
298
+ ["What is the weather in San Francisco?", True],
299
+ ["Write a Python function to calculate fibonacci and run it", True],
300
+ ["Check if a server is running on port 8080", True],
301
+ ]
302
+
303
+ css = """
304
+ /* ─── Typography & Colors ─────────────────────────────────────────── */
305
+ .gradio-container {
306
+ --background: #09090b !important;
307
+ --border-color: #27272a !important;
308
+ --color-accent: #a855f7 !important;
309
+ }
310
+
311
+ #title-block h1 {
312
+ font-size: 2.2rem !important;
313
+ font-weight: 900 !important;
314
+ letter-spacing: -0.03em !important;
315
+ background: linear-gradient(135deg, #a855f7 0%, #6366f1 50%, #3b82f6 100%) !important;
316
+ -webkit-background-clip: text !important;
317
+ -webkit-text-fill-color: transparent !important;
318
+ background-clip: text !important;
319
+ }
320
+
321
+ /* ─── Chat bubble styling ────────────────────────────────────────── */
322
+ .user-bubble {
323
+ background: #27272a !important;
324
+ border: 1px solid #3f3f46 !important;
325
+ border-radius: 12px 12px 4px 12px !important;
326
+ color: #fafafa !important;
327
+ max-width: 85% !important;
328
+ }
329
+ .assistant-bubble {
330
+ background: #18181b !important;
331
+ border: 1px solid #a855f7/20 !important;
332
+ border-radius: 12px 12px 12px 4px !important;
333
+ color: #e4e4e7 !important;
334
+ max-width: 85% !important;
335
+ }
336
+ .tool-bubble {
337
+ background: #0a0a0a !important;
338
+ border: 1px solid #22c55e/30 !important;
339
+ border-radius: 8px !important;
340
+ color: #a3e635 !important;
341
+ font-family: 'JetBrains Mono', 'Fira Code', monospace !important;
342
+ font-size: 0.85rem !important;
343
+ }
344
+
345
+ /* ─── Button styling ─────────────────────────────────────────────── */
346
+ .gradient-btn {
347
+ background: linear-gradient(135deg, #a855f7, #6366f1) !important;
348
+ border: none !important;
349
+ color: white !important;
350
+ font-weight: 700 !important;
351
+ border-radius: 999px !important;
352
+ padding: 0.5rem 1.5rem !important;
353
+ transition: all 0.2s ease !important;
354
+ }
355
+ .gradient-btn:hover {
356
+ transform: scale(1.03) !important;
357
+ box-shadow: 0 0 20px rgba(168,85,247,0.4) !important;
358
+ }
359
+
360
+ /* ─── Hero section ───────────────────────────────────────────────── */
361
+ .hero-section {
362
+ background: radial-gradient(ellipse at 50% 0%, rgba(168,85,247,0.08) 0%, transparent 70%) !important;
363
+ border-bottom: 1px solid #27272a !important;
364
+ padding: 2rem !important;
365
+ text-align: center !important;
366
+ }
367
+
368
+ /* ─── Stats bar ──────────────────────────────────────────────────── */
369
+ .stats-bar {
370
+ display: flex !important;
371
+ justify-content: center !important;
372
+ gap: 3rem !important;
373
+ padding: 1rem 0 !important;
374
+ border-bottom: 1px solid #27272a !important;
375
+ margin-bottom: 1rem !important;
376
+ }
377
+ .stat-item {
378
+ text-align: center !important;
379
+ }
380
+ .stat-value {
381
+ font-size: 1.4rem !important;
382
+ font-weight: 800 !important;
383
+ color: #a855f7 !important;
384
+ }
385
+ .stat-label {
386
+ font-size: 0.75rem !important;
387
+ color: #71717a !important;
388
+ text-transform: uppercase !important;
389
+ letter-spacing: 0.05em !important;
390
+ }
391
+
392
+ /* ─── Tool badge ─────────────────────────────────────────────────── */
393
+ .tool-badge {
394
+ display: inline-flex !important;
395
+ align-items: center !important;
396
+ gap: 0.3rem !important;
397
+ background: #18181b !important;
398
+ border: 1px solid #3f3f46 !important;
399
+ border-radius: 999px !important;
400
+ padding: 0.2rem 0.7rem !important;
401
+ font-size: 0.75rem !important;
402
+ color: #a1a1aa !important;
403
+ margin: 0.15rem !important;
404
+ }
405
+ .tool-badge-active {
406
+ border-color: #a855f7/40 !important;
407
+ color: #c084fc !important;
408
+ }
409
+
410
+ /* ─── Footer ─────────────────────────────────────────────────────── */
411
+ .footer-note {
412
+ text-align: center !important;
413
+ color: #52525b !important;
414
+ font-size: 0.75rem !important;
415
+ padding: 0.75rem !important;
416
+ border-top: 1px solid #18181b !important;
417
+ }
418
+ """
419
 
420
+ theme = gr.themes.Default(
421
+ primary_hue="purple",
422
+ secondary_hue="zinc",
423
+ neutral_hue="zinc",
424
+ radius_size=gr.themes.sizes.radius_md,
425
+ ).set(
426
+ body_background_fill="#09090b",
427
+ body_text_color="#e4e4e7",
428
+ border_color_accent_subtle="#27272a",
429
+ )
430
+
431
+ def build_space():
432
+ with gr.Blocks(
433
+ theme=theme,
434
+ css=css,
435
+ title="Stack X Ultimate β€” Agentic Tool-Calling",
436
+ ) as demo:
437
+
438
+ # ── Hero ──────────────────────────────────────────────────────
439
+ with gr.Group():
440
+ gr.HTML("""
441
+ <div class="hero-section">
442
+ <div id="title-block">
443
+ <h1>Stack X Ultimate</h1>
444
+ </div>
445
+ <p style="color: #a1a1aa; font-size: 1.05rem; max-width: 600px; margin: 0.6rem auto 0;">
446
+ Open-source agentic model with tool calling.
447
+ Deploy on your own GPU β€” no API costs, no data leaving your server.
448
+ </p>
449
+ <div class="tool-badge tool-badge-active">πŸ”’ calculator</div>
450
+ <div class="tool-badge tool-badge-active">πŸ• get_current_time</div>
451
+ <div class="tool-badge tool-badge-active">πŸ“ search_files</div>
452
+ <div class="tool-badge tool-badge-active">⚑ run_command</div>
453
+ </div>
454
+ """)
455
+
456
+ gr.HTML("""
457
+ <div class="stats-bar">
458
+ <div class="stat-item">
459
+ <div class="stat-value">3B</div>
460
+ <div class="stat-label">Parameters</div>
461
+ </div>
462
+ <div class="stat-item">
463
+ <div class="stat-value">4-bit</div>
464
+ <div class="stat-label">QLoRA Quantized</div>
465
+ </div>
466
+ <div class="stat-item">
467
+ <div class="stat-value">V100</div>
468
+ <div class="stat-label">Runs on 1 GPU</div>
469
+ </div>
470
+ <div class="stat-item">
471
+ <div class="stat-value">$0</div>
472
+ <div class="stat-label">API Costs</div>
473
+ </div>
474
+ </div>
475
+ """)
476
+
477
+ # ── Main Chat ─────────────────────────────────────────────────
478
+ chat = gr.Chatbot(
479
+ height=480,
480
+ show_copy_button=True,
481
+ avatar_images=(
482
+ "https://huggingface.co/front/assets/icons/8.svg", # user
483
+ "https://huggingface.co/front/assets/icons/13.svg" # bot
484
+ ),
485
+ bubble_full_width=False,
486
+ render_markdown=True,
487
+ )
488
 
489
+ with gr.Row():
490
+ msg = gr.Textbox(
491
+ placeholder="Try: 'Calculate compound interest on $1500 at 7% for 30 years'",
492
+ scale=5,
493
+ container=True,
494
+ show_label=False,
495
+ elem_id="main-input",
496
+ )
497
+ submit_btn = gr.Button("Send β†’", scale=1, elem_classes="gradient-btn")
498
 
499
+ # ── State ───────────────────────────────────────────────────────
500
+ state = gr.State([])
 
 
501
 
502
+ # ── Events ────────────────────────────────────────────────────
503
+ msg.submit(
504
+ fn=chat_fn,
505
+ inputs=[msg, chat],
506
+ outputs=[chat, chat],
507
+ show_progress="minimal",
508
+ )
509
+ submit_btn.click(
510
+ fn=chat_fn,
511
+ inputs=[msg, chat],
512
+ outputs=[chat, chat],
513
+ show_progress="minimal",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  )
515
+ msg.submit(lambda: "", None, msg) # clear input
516
+
517
+ # ── Examples ───────────────────────────────────────────────────
518
+ gr.Examples(
519
+ examples=[
520
+ ["Calculate the compound interest on $1500 at 7% annual rate over 30 years", True],
521
+ ["What time is it right now?", True],
522
+ ["Find all Python files in the current directory", True],
523
+ ["Run `ls -la` to see what's in the directory", True],
524
+ ["Look up the current stock price for AAPL", True],
525
+ ["Check if a server is running on port 8080", True],
526
+ ],
527
+ inputs=[msg],
528
+ label="Try one of these examples β†’",
529
  )
 
530
 
531
+ # ── Footer ────────────────────────────────────────────────────
532
+ gr.HTML("""
533
+ <div class="footer-note">
534
+ πŸ”§ This demo shows tool-calling capability. Fine-tuned model coming soon β€”
535
+ <a href="https://huggingface.co/my-ai-stack/Stack-X-Ultimate" style="color: #a855f7;">deploy on HuggingFace</a> or
536
+ <a href="https://www.stack-ai.me/contact" style="color: #a855f7;">request enterprise deployment</a>
537
+ </div>
538
+ """)
539
+
540
+ return demo
541
+
542
+ if __name__ == "__main__":
543
+ demo = build_space()
544
+ demo.launch(
545
+ server_name="0.0.0.0",
546
+ server_port=7860,
547
+ max_threads=4,
548
+ show_api=False,
549
+ favicon_path=None,
550
+ )