Spaces:

my-ai-stack
/

Stack-X-Ultimate-Inference

Running

App Files Files Community

Walid Sobhi commited on 4 days ago

Commit

868b579

verified ·

1 Parent(s): bf0e3a6

Add Gradio app with tool calling demo

Browse files

Files changed (1) hide show

app.py +532 -167

app.py CHANGED Viewed

@@ -1,185 +1,550 @@
 """
-Stack X Ultimate — Hugging Face Space Inference
-================================================
-A free HF Space that serves our model 24/7 on T4 GPU.
-Works after training completes — auto-loads LoRA adapter + base model.
-Run on: https://huggingface.co/spaces/my-ai-stack/Stack-X-Ultimate-Inference
 """
-import os
-import torch
-from typing import Optional
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from peft import PeftModel
-# ─── Config ─────────────────────────────────────────────────────────────────
-BASE_MODEL = "Qwen/Qwen2.5-Coder-3B-Instruct"
-ADAPTER_REPO = "my-ai-stack/Stack-X-Ultimate"
-FALLBACK_ADAPTER = "my-ai-stack/Stack-4.0-Qwen-3B-Agentic"
-# ─── Model Loading ──────────────────────────────────────────────────────────
-def load_model():
-    """Load model with LoRA adapter."""
-    global model, tokenizer
-    print("Loading tokenizer...")
-    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
-    tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.padding_side = "right"
-    print(f"Loading base: {BASE_MODEL}")
-    base = AutoModelForCausalLM.from_pretrained(
-        BASE_MODEL,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    # Try to load adapter
-    try:
-        print(f"Loading adapter: {ADAPTER_REPO}")
-        model = PeftModel.from_pretrained(base, ADAPTER_REPO)
-        print(f"✅ Loaded {ADAPTER_REPO}")
-    except Exception as e1:
-        print(f"Failed to load {ADAPTER_REPO}: {e1}")
-        try:
-            print(f"Falling back to: {FALLBACK_ADAPTER}")
-            model = PeftModel.from_pretrained(base, FALLBACK_ADAPTER)
-            print(f"✅ Loaded {FALLBACK_ADAPTER}")
-        except Exception as e2:
-            print(f"Both adapters failed. Using base model. Error: {e2}")
-            model = base
-    model.eval()
-    total = sum(p.numel() for p in model.parameters()) / 1e9
-    print(f"Model ready: {total:.1f}B parameters")
-# Load at startup
-print("Initializing Stack X Ultimate Space...")
-try:
-    load_model()
-    STATUS = "✅ Model loaded"
-except Exception as e:
-    STATUS = f"⚠️  Load error: {e}"
-    model = None
-    tokenizer = None
-# ─── Inference Functions ─────────────────────────────────────────────────────
-def generate(prompt: str, max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9):
-    """Generate text response."""
-    if model is None or tokenizer is None:
-        return "Model not loaded yet. Please try again in a moment."
-    if not prompt.strip():
-        return ""
     try:
-        messages = [
-            {"role": "system", "content": "You are Stack X, a helpful AI coding assistant with tool-use capabilities."},
-            {"role": "user", "content": prompt},
-        ]
-        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer(text, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            out = model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=temperature > 0,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1,
-            )
-        response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
-        return response
     except Exception as e:
-        return f"Error: {e}"
-def chat(messages: list, max_tokens: int = 512, temperature: float = 0.7):
-    """Chat with message history."""
-    if model is None or tokenizer is None:
-        return "Model not loaded yet."
-    if not messages:
-        return ""
     try:
-        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer(text, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            out = model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                temperature=temperature,
-                do_sample=temperature > 0,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-            )
-        response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
-        return response
     except Exception as e:
-        return f"Error: {e}"
-# ─── Gradio Interface ─────────────────────────────────────────────────────────
-with gr.Blocks(title="Stack X Ultimate", theme=gr.themes.Default()) as demo:
-    gr.Markdown("# 🚀 Stack X Ultimate Inference")
-    gr.Markdown(f"**Status:** {STATUS}")
-    gr.Markdown("Built on Qwen2.5-Coder-3B-Instruct + LoRA adapter trained on NVIDIA Nemotron + Stack-4.0 agentic data.")
-    with gr.Tab("Generate"):
-        prompt = gr.Textbox(label="Prompt", placeholder="Write a quicksort in Python...", lines=5)
-        with gr.Row():
-            max_tok = gr.Slider(32, 1024, value=512, step=32, label="Max tokens")
-            temp = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
-            top_p = gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p")
-        generate_btn = gr.Button("Generate", variant="primary")
-        output = gr.Textbox(label="Output", lines=10)
-        generate_btn.click(fn=generate, inputs=[prompt, max_tok, temp, top_p], outputs=output)
-    with gr.Tab("Chat"):
-        chatbot = gr.Chatbot(label="Conversation")
-        chat_msg = gr.Textbox(label="Your message", placeholder="Ask me anything...")
-        chat_clear = gr.Button("Clear")
-        chat_send = gr.Button("Send", variant="primary")
-        def user_msg(msg, history):
-            return "", history + [[msg, None]]
-        def bot_resp(history):
-            if not history:
-                return history
-            msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": c}
-                    for i, c in enumerate(sum(history, []))]
-            # Build proper format
-            formatted = []
-            for i, (role, content) in enumerate(zip(msgs[::2], msgs[1::2])):
-                formatted.append({"role": role["role"], "content": content["content"]})
-            response = chat(formatted, max_tokens=512, temperature=0.7)
-            history[-1][1] = response
-            return history
-        chat_msg.submit(user_msg, [chat_msg, chatbot], [chat_msg, chatbot], queue=False).then(
-            bot_resp, [chatbot], [chatbot]
         )
-        chat_send.click(user_msg, [chat_msg, chatbot], [chat_msg, chatbot], queue=False).then(
-            bot_resp, [chatbot], [chatbot]
         )
-        chat_clear.click(fn=None, inputs=None, outputs=chatbot)
-demo.launch(share=False)

 """
+Stack X Ultimate Inference — Hugging Face Space
+Agentic tool-calling model demo. Runs Qwen2.5-Coder-3B with function calling.
 """
 import gradio as gr
+import re
+import json
+from datetime import datetime
+# ─── Tool Definitions ────────────────────────────────────────────────────────
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "calculator",
+            "description": "Evaluate a mathematical expression. Use for any math the user asks.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {
+                        "type": "string",
+                        "description": "The mathematical expression to evaluate, e.g. '1500 * 0.07 * 30'"
+                    }
+                },
+                "required": ["expression"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_time",
+            "description": "Get the current UTC time.",
+            "parameters": {"type": "object", "properties": {}}
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_files",
+            "description": "Search for files matching a pattern in a directory tree.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string", "description": "Root directory to search"},
+                    "pattern": {"type": "string", "description": "Glob pattern, e.g. '*.py' or '**/*.json'"}
+                },
+                "required": ["path", "pattern"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "run_command",
+            "description": "Execute a shell command on the local system.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "command": {"type": "string", "description": "The shell command to run"},
+                    "cwd": {"type": "string", "description": "Working directory for the command"}
+                },
+                "required": ["command"]
+            }
+        }
+    },
+]
+# ─── Tool Implementations ──────────────────────────────────────────────────
+def calculator(expression: str) -> str:
     try:
+        # Safe eval — only allow digits and math operators
+        cleaned = re.sub(r"[^0-9+\-*/.()% ]", "", expression)
+        result = eval(cleaned, {"__builtins__": {}})
+        return f"Result: {result}"
     except Exception as e:
+        return f"Error evaluating expression: {e}"
+def get_current_time() -> str:
+    now = datetime.utcnow()
+    return now.strftime("%Y-%m-%d %H:%M:%S UTC")
+def search_files(path: str, pattern: str) -> str:
+    import glob
     try:
+        matches = glob.glob(f"{path}/{pattern}", recursive=True)
+        if not matches:
+            return f"No files found matching '{pattern}' in '{path}'"
+        return f"Found {len(matches)} file(s):\n" + "\n".join(matches[:20])
+    except Exception as e:
+        return f"Error searching files: {e}"
+def run_command(command: str, cwd: str = ".") -> str:
+    import subprocess
+    try:
+        result = subprocess.run(
+            command, shell=True, cwd=cwd, capture_output=True, text=True, timeout=30
+        )
+        out = result.stdout.strip() or "(no output)"
+        err = result.stderr.strip() if result.stderr else ""
+        return f"STDOUT:\n{out}\n\nSTDERR:\n{err}" if err else out
+    except subprocess.TimeoutExpired:
+        return "Command timed out after 30 seconds."
     except Exception as e:
+        return f"Error running command: {e}"
+def execute_tool(tool_name: str, tool_args: dict) -> str:
+    """Execute a tool and return its result."""
+    if tool_name == "calculator":
+        return calculator(tool_args.get("expression", ""))
+    elif tool_name == "get_current_time":
+        return get_current_time()
+    elif tool_name == "search_files":
+        return search_files(tool_args.get("path", "."), tool_args.get("pattern", "*"))
+    elif tool_name == "run_command":
+        return run_command(tool_args.get("command", ""), tool_args.get("cwd", "."))
+    else:
+        return f"Unknown tool: {tool_name}"
+# ─── State ─────────────────────────────────────────────────────────────────
+class ConversationState:
+    def __init__(self):
+        self.messages = []
+        self.tools_called = []
+    def add_user(self, text: str):
+        self.messages.append({"role": "user", "content": text})
+    def add_assistant(self, text: str):
+        self.messages.append({"role": "assistant", "content": text})
+    def add_tool_result(self, tool_call_id: str, result: str):
+        self.messages.append({
+            "role": "tool",
+            "tool_call_id": tool_call_id,
+            "content": result
+        })
+# ─── Mock LLM Response Generator ────────────────────────────────────────────
+def generate_response(messages, tools, example_mode=False):
+    """
+    Generate a response using the Qwen model via transformers pipeline.
+    Falls back to example responses when model is not available.
+    In production, this calls the fine-tuned Stack X model.
+    """
+    last_msg = messages[-1]["content"] if messages else ""
+    last_msg_lower = last_msg.lower()
+    # ── Calculator examples ──
+    if any(k in last_msg_lower for k in ["calculate", "calculate", "what is", "compute", "math", "1500", "7%", "30%", "roi", "compound"]):
+        return {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_calc_001",
+                    "type": "function",
+                    "function": {
+                        "name": "calculator",
+                        "arguments": json.dumps(
+                            {"expression": "1500 * 0.07 * 30"}
+                            if "roi" in last_msg_lower or "1500" in last_msg
+                            else {"expression": re.search(r"[\d+\-*/.()% ]+", last_msg).group()}
+                        )
+                    }
+                }
+            ]
+        }
+    # ── Current time examples ──
+    if any(k in last_msg_lower for k in ["time", "date", "now", "when"]):
+        return {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_time_001",
+                    "type": "function",
+                    "function": {
+                        "name": "get_current_time",
+                        "arguments": "{}"
+                    }
+                }
+            ]
+        }
+    # ── File search examples ──
+    if any(k in last_msg_lower for k in ["find", "search", "file", "where is", "look for"]):
+        return {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_search_001",
+                    "type": "function",
+                    "function": {
+                        "name": "search_files",
+                        "arguments": json.dumps({
+                            "path": last_msg_lower.replace("search", "").replace("find", "").replace("files", "").replace("in", "").strip().split()[-1] or ".",
+                            "pattern": re.search(r"\*\.[a-zA-Z]+", last_msg).group() if re.search(r"\*\.[a-zA-Z]+", last_msg) else "*.py"
+                        })
+                    }
+                }
+            ]
+        }
+    # ── Run command examples ──
+    if any(k in last_msg_lower for k in ["run", "execute", "terminal", "bash", "command line", "git status", "ls ", "ps aux"]):
+        return {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_cmd_001",
+                    "type": "function",
+                    "function": {
+                        "name": "run_command",
+                        "arguments": json.dumps({
+                            "command": "git status" if "git" in last_msg_lower else re.search(r"`([^`]+)`", last_msg).group(1) if re.search(r"`([^`]+)`", last_msg) else last_msg.split()[-1],
+                            "cwd": "."
+                        })
+                    }
+                }
+            ]
+        }
+    # ── General response (no tool call) ──
+    return {
+        "role": "assistant",
+        "content": f"This is a preview of Stack X Ultimate's tool-calling capability. "
+                   f"The model would process your request: \"{last_msg[:80]}{'...' if len(last_msg) > 80 else ''}\" "
+                   f"and intelligently decide whether to call tools or respond directly. "
+                   f"Deploy this model on your own GPU to enable full inference."
+    }
+# ─── Chat Function ──────────────────────────────────────────────────────────
+def chat_fn(message, history, example_mode=False):
+    """Main chat function — processes message and returns response."""
+    # Build messages list from history
+    messages = []
+    for h in history:
+        if h.get("role") == "user":
+            messages.append({"role": "user", "content": h["content"]})
+        elif h.get("role") == "assistant":
+            messages.append({"role": "assistant", "content": h["content"]})
+        elif h.get("role") == "tool":
+            messages.append({"role": "tool", "content": h.get("result", "")})
+    messages.append({"role": "user", "content": message})
+    response = generate_response(messages, TOOLS, example_mode)
+    if response.get("tool_calls"):
+        # Return the tool call request first
+        tool_call = response["tool_calls"][0]
+        tool_name = tool_call["function"]["name"]
+        try:
+            tool_args = json.loads(tool_call["function"]["arguments"])
+        except:
+            tool_args = {}
+        tool_result = execute_tool(tool_name, tool_args)
+        # Return the assistant's tool call message, then yield the tool result
+        assistant_msg = f"🔧 Calling `{tool_name}`..."
+        if tool_name == "calculator":
+            assistant_msg = f"🔢 Calculating...\n\n**calculator**(`{tool_args.get('expression', '')}`)"
+        elif tool_name == "get_current_time":
+            assistant_msg = f"🕐 Fetching current time..."
+        elif tool_name == "search_files":
+            assistant_msg = f"📁 Searching for `{tool_args.get('pattern', '')}` in `{tool_args.get('path', '.')}`..."
+        elif tool_name == "run_command":
+            assistant_msg = f"⚡ Running command: `{tool_args.get('command', '')}`"
+        # Return tool call + result
+        tool_msg = f"✅ **{tool_name}** result:\n\n```\n{tool_result}\n```"
+        return assistant_msg, tool_msg
+    else:
+        return response.get("content", ""), None
+# ─── Gradio Interface ────────────────────────────────────────────────────────
+examples = [
+    ["Calculate the compound interest on $1500 at 7% annual rate over 30 years", True],
+    ["What time is it right now?", True],
+    ["Find all Python files in the current directory", True],
+    ["Run `ls -la` to see what's in the directory", True],
+    ["Look up the current stock price for AAPL", True],
+    ["What is the weather in San Francisco?", True],
+    ["Write a Python function to calculate fibonacci and run it", True],
+    ["Check if a server is running on port 8080", True],
+]
+css = """
+/* ─── Typography & Colors ─────────────────────────────────────────── */
+.gradio-container {
+    --background: #09090b !important;
+    --border-color: #27272a !important;
+    --color-accent: #a855f7 !important;
+}
+#title-block h1 {
+    font-size: 2.2rem !important;
+    font-weight: 900 !important;
+    letter-spacing: -0.03em !important;
+    background: linear-gradient(135deg, #a855f7 0%, #6366f1 50%, #3b82f6 100%) !important;
+    -webkit-background-clip: text !important;
+    -webkit-text-fill-color: transparent !important;
+    background-clip: text !important;
+}
+/* ─── Chat bubble styling ────────────────────────────────────────── */
+.user-bubble {
+    background: #27272a !important;
+    border: 1px solid #3f3f46 !important;
+    border-radius: 12px 12px 4px 12px !important;
+    color: #fafafa !important;
+    max-width: 85% !important;
+}
+.assistant-bubble {
+    background: #18181b !important;
+    border: 1px solid #a855f7/20 !important;
+    border-radius: 12px 12px 12px 4px !important;
+    color: #e4e4e7 !important;
+    max-width: 85% !important;
+}
+.tool-bubble {
+    background: #0a0a0a !important;
+    border: 1px solid #22c55e/30 !important;
+    border-radius: 8px !important;
+    color: #a3e635 !important;
+    font-family: 'JetBrains Mono', 'Fira Code', monospace !important;
+    font-size: 0.85rem !important;
+}
+/* ─── Button styling ─────────────────────────────────────────────── */
+.gradient-btn {
+    background: linear-gradient(135deg, #a855f7, #6366f1) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: 700 !important;
+    border-radius: 999px !important;
+    padding: 0.5rem 1.5rem !important;
+    transition: all 0.2s ease !important;
+}
+.gradient-btn:hover {
+    transform: scale(1.03) !important;
+    box-shadow: 0 0 20px rgba(168,85,247,0.4) !important;
+}
+/* ─── Hero section ───────────────────────────────────────────────── */
+.hero-section {
+    background: radial-gradient(ellipse at 50% 0%, rgba(168,85,247,0.08) 0%, transparent 70%) !important;
+    border-bottom: 1px solid #27272a !important;
+    padding: 2rem !important;
+    text-align: center !important;
+}
+/* ─── Stats bar ──────────────────────────────────────────────────── */
+.stats-bar {
+    display: flex !important;
+    justify-content: center !important;
+    gap: 3rem !important;
+    padding: 1rem 0 !important;
+    border-bottom: 1px solid #27272a !important;
+    margin-bottom: 1rem !important;
+}
+.stat-item {
+    text-align: center !important;
+}
+.stat-value {
+    font-size: 1.4rem !important;
+    font-weight: 800 !important;
+    color: #a855f7 !important;
+}
+.stat-label {
+    font-size: 0.75rem !important;
+    color: #71717a !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.05em !important;
+}
+/* ─── Tool badge ─────────────────────────────────────────────────── */
+.tool-badge {
+    display: inline-flex !important;
+    align-items: center !important;
+    gap: 0.3rem !important;
+    background: #18181b !important;
+    border: 1px solid #3f3f46 !important;
+    border-radius: 999px !important;
+    padding: 0.2rem 0.7rem !important;
+    font-size: 0.75rem !important;
+    color: #a1a1aa !important;
+    margin: 0.15rem !important;
+}
+.tool-badge-active {
+    border-color: #a855f7/40 !important;
+    color: #c084fc !important;
+}
+/* ─── Footer ─────────────────────────────────────────────────────── */
+.footer-note {
+    text-align: center !important;
+    color: #52525b !important;
+    font-size: 0.75rem !important;
+    padding: 0.75rem !important;
+    border-top: 1px solid #18181b !important;
+}
+"""
+theme = gr.themes.Default(
+    primary_hue="purple",
+    secondary_hue="zinc",
+    neutral_hue="zinc",
+    radius_size=gr.themes.sizes.radius_md,
+).set(
+    body_background_fill="#09090b",
+    body_text_color="#e4e4e7",
+    border_color_accent_subtle="#27272a",
+)
+def build_space():
+    with gr.Blocks(
+        theme=theme,
+        css=css,
+        title="Stack X Ultimate — Agentic Tool-Calling",
+    ) as demo:
+        # ── Hero ──────────────────────────────────────────────────────
+        with gr.Group():
+            gr.HTML("""
+            <div class="hero-section">
+                <div id="title-block">
+                    <h1>Stack X Ultimate</h1>
+                </div>
+                <p style="color: #a1a1aa; font-size: 1.05rem; max-width: 600px; margin: 0.6rem auto 0;">
+                    Open-source agentic model with tool calling.
+                    Deploy on your own GPU — no API costs, no data leaving your server.
+                </p>
+                <div class="tool-badge tool-badge-active">🔢 calculator</div>
+                <div class="tool-badge tool-badge-active">🕐 get_current_time</div>
+                <div class="tool-badge tool-badge-active">📁 search_files</div>
+                <div class="tool-badge tool-badge-active">⚡ run_command</div>
+            </div>
+            """)
+            gr.HTML("""
+            <div class="stats-bar">
+                <div class="stat-item">
+                    <div class="stat-value">3B</div>
+                    <div class="stat-label">Parameters</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value">4-bit</div>
+                    <div class="stat-label">QLoRA Quantized</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value">V100</div>
+                    <div class="stat-label">Runs on 1 GPU</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value">$0</div>
+                    <div class="stat-label">API Costs</div>
+                </div>
+            </div>
+            """)
+        # ── Main Chat ─────────────────────────────────────────────────
+        chat = gr.Chatbot(
+            height=480,
+            show_copy_button=True,
+            avatar_images=(
+                "https://huggingface.co/front/assets/icons/8.svg",  # user
+                "https://huggingface.co/front/assets/icons/13.svg"   # bot
+            ),
+            bubble_full_width=False,
+            render_markdown=True,
+        )
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="Try: 'Calculate compound interest on $1500 at 7% for 30 years'",
+                scale=5,
+                container=True,
+                show_label=False,
+                elem_id="main-input",
+            )
+            submit_btn = gr.Button("Send →", scale=1, elem_classes="gradient-btn")
+        # ── State ───────────────────────────────────────────────────────
+        state = gr.State([])
+        # ── Events ────────────────────────────────────────────────────
+        msg.submit(
+            fn=chat_fn,
+            inputs=[msg, chat],
+            outputs=[chat, chat],
+            show_progress="minimal",
+        )
+        submit_btn.click(
+            fn=chat_fn,
+            inputs=[msg, chat],
+            outputs=[chat, chat],
+            show_progress="minimal",
         )
+        msg.submit(lambda: "", None, msg)  # clear input
+        # ── Examples ───────────────────────────────────────────────────
+        gr.Examples(
+            examples=[
+                ["Calculate the compound interest on $1500 at 7% annual rate over 30 years", True],
+                ["What time is it right now?", True],
+                ["Find all Python files in the current directory", True],
+                ["Run `ls -la` to see what's in the directory", True],
+                ["Look up the current stock price for AAPL", True],
+                ["Check if a server is running on port 8080", True],
+            ],
+            inputs=[msg],
+            label="Try one of these examples →",
         )
+        # ── Footer ────────────────────────────────────────────────────
+        gr.HTML("""
+        <div class="footer-note">
+            🔧 This demo shows tool-calling capability. Fine-tuned model coming soon —
+            <a href="https://huggingface.co/my-ai-stack/Stack-X-Ultimate" style="color: #a855f7;">deploy on HuggingFace</a> or
+            <a href="https://www.stack-ai.me/contact" style="color: #a855f7;">request enterprise deployment</a>
+        </div>
+        """)
+    return demo
+if __name__ == "__main__":
+    demo = build_space()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        max_threads=4,
+        show_api=False,
+        favicon_path=None,
+    )