Spaces:
Running on Zero
Running on Zero
| """DEX Evolution Outpost v2.2 — FastAPI + Gradio""" | |
| import gradio as gr | |
| import spaces | |
| import json, re, logging | |
| from typing import Generator | |
| from fastapi import FastAPI | |
| from fastapi.responses import JSONResponse | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # FastAPI app | |
| api_app = FastAPI() | |
| # ── Model Config ────────────────────────────────────────────── | |
| MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507" | |
| MAX_NEW_TOKENS = 1024 | |
| _tok = None | |
| _pipeline = None | |
| TOOLS = [ | |
| {"name": "web_search", "description": "Search the web via DuckDuckGo.", "params": {"query": "string", "max_results": "int (default 5)"}}, | |
| {"name": "web_extract", "description": "Download and extract text from a URL.", "params": {"url": "string"}}, | |
| {"name": "run_code", "description": "Execute Python code in sandbox.", "params": {"code": "string"}}, | |
| {"name": "generate_image", "description": "Generate image via Stable Diffusion.", "params": {"prompt": "string", "steps": "int (default 20)"}}, | |
| ] | |
| TOOL_DESC = "\n".join(f"• {t['name']}: {t['description']} (params: {json.dumps(t['params'])})" for t in TOOLS) | |
| SYSTEM = ( | |
| "You are DEX, a local AI assistant with access to live tools. ALWAYS use tools for factual/real-time questions.\n" | |
| "To call a tool, write EXACTLY this format:\n" | |
| '{"name":"tool_name","arguments":{"param":"value"}}\n' | |
| f"Available tools:\n{TOOL_DESC}\n" | |
| "RULES:\n" | |
| "- For ANY factual question: use web_search FIRST, then answer from results\n" | |
| "- For code questions: use run_code to test your answer\n" | |
| "- Never fabricate information — if unsure, search\n" | |
| "After tool results, continue your analysis." | |
| ) | |
| # ── GPU Functions ───────────────────────────────────────────── | |
| def gpu_warmup() -> str: | |
| import torch | |
| return f"GPU ready at init: {torch.cuda.is_available()}" | |
| gpu_message = gpu_warmup() # Ensure detection during init | |
| logger.info(gpu_message) | |
| def _gpu_llm_chat(messages: list) -> str: | |
| global _tok, _pipeline | |
| if _pipeline is None: | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline as _p | |
| logger.info("Loading model...") | |
| _tok = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| _model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", trust_remote_code=True) | |
| _pipeline = _p("text-generation", model=_model, tokenizer=_tok, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7, top_p=0.9) | |
| logger.info("Model loaded!") | |
| prompt = _tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| out = _pipeline(prompt)[0]["generated_text"] | |
| return out[len(prompt):].strip() | |
| def _gpu_generate_image(prompt: str, steps: int = 20): | |
| from diffusers import StableDiffusionPipeline | |
| import torch | |
| pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda") | |
| img = pipe(prompt, num_inference_steps=steps).images[0] | |
| del pipe; torch.cuda.empty_cache() | |
| return img | |
| # ── CPU Tools ───────────────────────────────────────────────── | |
| def _web_search(query: str, max_results: int = 5): | |
| from duckduckgo_search import DDGS | |
| return json.dumps(DDGS().text(query, max_results=max_results), ensure_ascii=False) | |
| def _web_extract(url: str): | |
| import trafilatura | |
| downloaded = trafilatura.fetch_url(url) | |
| return trafilatura.extract(downloaded) if downloaded else "Failed to download." | |
| def _run_code(code: str): | |
| import io, contextlib | |
| stdout = io.StringIO() | |
| try: | |
| with contextlib.redirect_stdout(stdout): | |
| exec(code, {"__builtins__": __builtins__}) | |
| return stdout.getvalue() or "Executed successfully." | |
| except Exception as e: | |
| return f"Error: {type(e).__name__}: {e}" | |
| def _execute_tool(name: str, args: dict) -> str: | |
| try: | |
| if name == "web_search": return _web_search(**args) | |
| elif name == "web_extract": return _web_extract(**args) | |
| elif name == "run_code": return _run_code(**args) | |
| elif name == "generate_image": return _gpu_generate_image(**args) | |
| return f"Unknown tool: {name}" | |
| except Exception as e: | |
| return f"Tool error: {type(e).__name__}: {e}" | |
| def _extract_tool_calls(text: str): | |
| """Extract JSON tool calls from LLM output. | |
| Handles: raw JSON, blocks, embedded JSON objects.""" | |
| calls = [] | |
| # Method 1: blocks (legacy format) | |
| for m in re.finditer(r'', text, re.DOTALL): | |
| try: calls.append(json.loads(m.group(1).strip())) | |
| except: pass | |
| if calls: | |
| return calls | |
| # Method 2: Raw JSON objects anywhere in text (Qwen3 outputs plain JSON) | |
| decoder = json.JSONDecoder() | |
| pos = 0 | |
| while pos < len(text): | |
| try: | |
| obj, end = decoder.raw_decode(text, pos) | |
| if isinstance(obj, dict) and "name" in obj and "arguments" in obj: | |
| calls.append(obj) | |
| pos = end | |
| except json.JSONDecodeError: | |
| pos += 1 | |
| return calls | |
| def _dispatch_tool_calls(calls: list): | |
| results = [] | |
| for c in calls: | |
| name, args = c.get("name",""), c.get("arguments",{}) | |
| logger.info(f"Tool: {name}({args})") | |
| results.append({"name": name, "arguments": args, "result": _execute_tool(name, args)}) | |
| return results | |
| # ── Agent Loop ──────────────────────────────────────────────── | |
| def run_agent(user_msg: str, agent_name: str, chat_history: list) -> Generator: | |
| agent_system = { | |
| "command": SYSTEM, | |
| "code": "You are a code-focused assistant. Write, debug, and explain code. When asked to run code, use the run_code tool:\n{name:\"run_code\",arguments:{\"code\":\"your code here\"}}", | |
| "research": """You are a research assistant. IMPORTANT: You MUST search the web using tools before answering any factual question. Do NOT rely on training data — always verify current info with live search. | |
| To search, write: {"name":"web_search","arguments":{"query":"search query here","max_results":5}} | |
| After getting results, summarize them. Always cite sources. If no results found, say so.""", | |
| "image": "You are an image generation assistant. When asked to create an image, use the generate_image tool:\n{\"name\":\"generate_image\",\"arguments\":{\"prompt\":\"detailed image description\",\"steps\":20}}", | |
| "web": """You are a web agent. To extract content from a URL, use the web_extract tool: | |
| {"name":"web_extract","arguments":{"url":"https://..."}} | |
| To search the web first, use web_search: | |
| {"name":"web_search","arguments":{"query":"search terms","max_results":5}} | |
| Always use tools — do not make up content.""", | |
| }.get(agent_name, SYSTEM) | |
| h = list(chat_history) if chat_history else [] | |
| h.append({"role": "user", "content": user_msg}) | |
| yield h | |
| messages = [{"role": "system", "content": agent_system}, {"role": "user", "content": user_msg}] | |
| try: | |
| resp = _gpu_llm_chat(messages) | |
| except Exception as e: | |
| h.append({"role": "assistant", "content": f"❌ Error: {type(e).__name__}: {e}"}) | |
| yield h | |
| return | |
| tool_calls = _extract_tool_calls(resp) | |
| if tool_calls: | |
| h.append({"role": "assistant", "content": f"🔧 Using: {', '.join(c.get('name','?') for c in tool_calls)}"}) | |
| yield h | |
| results = _dispatch_tool_calls(tool_calls) | |
| obs = "\n".join(f"**{r['name']}**: {r['result'][:500]}" for r in results) | |
| h.append({"role": "assistant", "content": f"📋 Results:\n{obs}"}) | |
| yield h | |
| messages.append({"role": "assistant", "content": resp}) | |
| for r in results: | |
| messages.append({"role": "user", "content": f"Tool result for {r['name']}: {r['result'][:2000]}"}) | |
| try: | |
| resp = _gpu_llm_chat(messages) | |
| except Exception as e: | |
| resp = f"(LLM error: {e})" | |
| h.append({"role": "assistant", "content": resp}) | |
| yield h | |
| # ── Handlers ────────────────────────────────────────────────── | |
| def cmd_handler(msg, history): | |
| if not msg or not msg.strip(): yield history or []; return | |
| for h in run_agent(msg, "command", history): | |
| yield h | |
| def code_handler(msg, history): | |
| if not msg or not msg.strip(): yield history or []; return | |
| for h in run_agent(msg, "code", history): | |
| yield h | |
| def research_handler(msg, history): | |
| if not msg or not msg.strip(): yield history or []; return | |
| for h in run_agent(msg, "research", history): | |
| yield h | |
| def image_handler(msg, history): | |
| if not msg or not msg.strip(): yield history or []; return | |
| for h in run_agent(msg, "image", history): | |
| yield h | |
| def web_handler(msg, history): | |
| if not msg or not msg.strip(): yield history or []; return | |
| for h in run_agent(msg, "web", history): | |
| yield h | |
| # ── REST API (standalone) ───────────────────────────────────── | |
| async def api_agent(req: dict): | |
| agent = req.get("agent", "command") | |
| msg = req.get("message", "") | |
| if not msg: | |
| return JSONResponse({"error": "message required"}, status_code=400) | |
| last = [] | |
| for h in run_agent(msg, agent, []): | |
| last = h | |
| return {"agent": agent, "response": last[-1]["content"] if last else "No response"} | |
| async def api_health(): | |
| return {"status": "ok", "model": MODEL_ID, "version": "2.2"} | |
| # ── Gradio UI ───────────────────────────────────────────────── | |
| with gr.Blocks(title="DEX Evolution Outpost ⚡", theme=gr.themes.Soft(primary_hue="violet")) as demo: | |
| gr.Markdown("# ⚡ DEX Evolution Outpost\n*Qwen3-4B + Stable Diffusion on H200 — no API keys needed*") | |
| with gr.Tab("⚡ Command"): | |
| gr.Markdown("*Route tasks — the LLM picks the agent.*") | |
| c1 = gr.Chatbot(height=500) | |
| i1 = gr.Textbox(label="Input", placeholder="Ask anything…") | |
| b1 = gr.Button("⚡ Go", variant="primary") | |
| b1.click(cmd_handler, [i1, c1], [c1]) | |
| with gr.Tab("💻 Code"): | |
| gr.Markdown("*Write, debug, and explain code.*") | |
| c2 = gr.Chatbot(height=500) | |
| i2 = gr.Textbox(label="Input") | |
| b2 = gr.Button("💻 Go", variant="primary") | |
| b2.click(code_handler, [i2, c2], [c2]) | |
| with gr.Tab("🔬 Research"): | |
| gr.Markdown("*Search the web and summarize findings.*") | |
| c3 = gr.Chatbot(height=500) | |
| i3 = gr.Textbox(label="Input") | |
| b3 = gr.Button("🔬 Go", variant="primary") | |
| b3.click(research_handler, [i3, c3], [c3]) | |
| with gr.Tab("🎨 Image"): | |
| gr.Markdown("*Generate images with Stable Diffusion.*") | |
| c4 = gr.Chatbot(height=500) | |
| i4 = gr.Textbox(label="Input", placeholder="Describe your image…") | |
| b4 = gr.Button("🎨 Go", variant="primary") | |
| b4.click(image_handler, [i4, c4], [c4]) | |
| with gr.Tab("🌐 Web"): | |
| gr.Markdown("*Browse and extract web content.*") | |
| c5 = gr.Chatbot(height=500) | |
| i5 = gr.Textbox(label="Input", placeholder="Enter URL or search query…") | |
| b5 = gr.Button("🌐 Go", variant="primary") | |
| b5.click(web_handler, [i5, c5], [c5]) | |
| gr.Markdown("---\n*DEX Evolution Outpost v2.2 — all local, no API keys | [API](/api/agent)*") | |
| # Mount Gradio on FastAPI | |
| app = gr.mount_gradio_app(api_app, demo, path="/") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |