dexifried
Fix tool extraction: raw JSON decode for Qwen3 plain output
71a75b4
"""DEX Evolution Outpost v2.2 — FastAPI + Gradio"""
import gradio as gr
import spaces
import json, re, logging
from typing import Generator
from fastapi import FastAPI
from fastapi.responses import JSONResponse
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# FastAPI app
api_app = FastAPI()
# ── Model Config ──────────────────────────────────────────────
MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"
MAX_NEW_TOKENS = 1024
_tok = None
_pipeline = None
TOOLS = [
{"name": "web_search", "description": "Search the web via DuckDuckGo.", "params": {"query": "string", "max_results": "int (default 5)"}},
{"name": "web_extract", "description": "Download and extract text from a URL.", "params": {"url": "string"}},
{"name": "run_code", "description": "Execute Python code in sandbox.", "params": {"code": "string"}},
{"name": "generate_image", "description": "Generate image via Stable Diffusion.", "params": {"prompt": "string", "steps": "int (default 20)"}},
]
TOOL_DESC = "\n".join(f"• {t['name']}: {t['description']} (params: {json.dumps(t['params'])})" for t in TOOLS)
SYSTEM = (
"You are DEX, a local AI assistant with access to live tools. ALWAYS use tools for factual/real-time questions.\n"
"To call a tool, write EXACTLY this format:\n"
'{"name":"tool_name","arguments":{"param":"value"}}\n'
f"Available tools:\n{TOOL_DESC}\n"
"RULES:\n"
"- For ANY factual question: use web_search FIRST, then answer from results\n"
"- For code questions: use run_code to test your answer\n"
"- Never fabricate information — if unsure, search\n"
"After tool results, continue your analysis."
)
# ── GPU Functions ─────────────────────────────────────────────
@spaces.GPU(duration=60)
def gpu_warmup() -> str:
import torch
return f"GPU ready at init: {torch.cuda.is_available()}"
gpu_message = gpu_warmup() # Ensure detection during init
logger.info(gpu_message)
@spaces.GPU(duration=120)
def _gpu_llm_chat(messages: list) -> str:
global _tok, _pipeline
if _pipeline is None:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline as _p
logger.info("Loading model...")
_tok = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", trust_remote_code=True)
_pipeline = _p("text-generation", model=_model, tokenizer=_tok, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7, top_p=0.9)
logger.info("Model loaded!")
prompt = _tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
out = _pipeline(prompt)[0]["generated_text"]
return out[len(prompt):].strip()
@spaces.GPU(duration=60)
def _gpu_generate_image(prompt: str, steps: int = 20):
from diffusers import StableDiffusionPipeline
import torch
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
img = pipe(prompt, num_inference_steps=steps).images[0]
del pipe; torch.cuda.empty_cache()
return img
# ── CPU Tools ─────────────────────────────────────────────────
def _web_search(query: str, max_results: int = 5):
from duckduckgo_search import DDGS
return json.dumps(DDGS().text(query, max_results=max_results), ensure_ascii=False)
def _web_extract(url: str):
import trafilatura
downloaded = trafilatura.fetch_url(url)
return trafilatura.extract(downloaded) if downloaded else "Failed to download."
def _run_code(code: str):
import io, contextlib
stdout = io.StringIO()
try:
with contextlib.redirect_stdout(stdout):
exec(code, {"__builtins__": __builtins__})
return stdout.getvalue() or "Executed successfully."
except Exception as e:
return f"Error: {type(e).__name__}: {e}"
def _execute_tool(name: str, args: dict) -> str:
try:
if name == "web_search": return _web_search(**args)
elif name == "web_extract": return _web_extract(**args)
elif name == "run_code": return _run_code(**args)
elif name == "generate_image": return _gpu_generate_image(**args)
return f"Unknown tool: {name}"
except Exception as e:
return f"Tool error: {type(e).__name__}: {e}"
def _extract_tool_calls(text: str):
"""Extract JSON tool calls from LLM output.
Handles: raw JSON, blocks, embedded JSON objects."""
calls = []
# Method 1: blocks (legacy format)
for m in re.finditer(r'', text, re.DOTALL):
try: calls.append(json.loads(m.group(1).strip()))
except: pass
if calls:
return calls
# Method 2: Raw JSON objects anywhere in text (Qwen3 outputs plain JSON)
decoder = json.JSONDecoder()
pos = 0
while pos < len(text):
try:
obj, end = decoder.raw_decode(text, pos)
if isinstance(obj, dict) and "name" in obj and "arguments" in obj:
calls.append(obj)
pos = end
except json.JSONDecodeError:
pos += 1
return calls
def _dispatch_tool_calls(calls: list):
results = []
for c in calls:
name, args = c.get("name",""), c.get("arguments",{})
logger.info(f"Tool: {name}({args})")
results.append({"name": name, "arguments": args, "result": _execute_tool(name, args)})
return results
# ── Agent Loop ────────────────────────────────────────────────
def run_agent(user_msg: str, agent_name: str, chat_history: list) -> Generator:
agent_system = {
"command": SYSTEM,
"code": "You are a code-focused assistant. Write, debug, and explain code. When asked to run code, use the run_code tool:\n{name:\"run_code\",arguments:{\"code\":\"your code here\"}}",
"research": """You are a research assistant. IMPORTANT: You MUST search the web using tools before answering any factual question. Do NOT rely on training data — always verify current info with live search.
To search, write: {"name":"web_search","arguments":{"query":"search query here","max_results":5}}
After getting results, summarize them. Always cite sources. If no results found, say so.""",
"image": "You are an image generation assistant. When asked to create an image, use the generate_image tool:\n{\"name\":\"generate_image\",\"arguments\":{\"prompt\":\"detailed image description\",\"steps\":20}}",
"web": """You are a web agent. To extract content from a URL, use the web_extract tool:
{"name":"web_extract","arguments":{"url":"https://..."}}
To search the web first, use web_search:
{"name":"web_search","arguments":{"query":"search terms","max_results":5}}
Always use tools — do not make up content.""",
}.get(agent_name, SYSTEM)
h = list(chat_history) if chat_history else []
h.append({"role": "user", "content": user_msg})
yield h
messages = [{"role": "system", "content": agent_system}, {"role": "user", "content": user_msg}]
try:
resp = _gpu_llm_chat(messages)
except Exception as e:
h.append({"role": "assistant", "content": f"❌ Error: {type(e).__name__}: {e}"})
yield h
return
tool_calls = _extract_tool_calls(resp)
if tool_calls:
h.append({"role": "assistant", "content": f"🔧 Using: {', '.join(c.get('name','?') for c in tool_calls)}"})
yield h
results = _dispatch_tool_calls(tool_calls)
obs = "\n".join(f"**{r['name']}**: {r['result'][:500]}" for r in results)
h.append({"role": "assistant", "content": f"📋 Results:\n{obs}"})
yield h
messages.append({"role": "assistant", "content": resp})
for r in results:
messages.append({"role": "user", "content": f"Tool result for {r['name']}: {r['result'][:2000]}"})
try:
resp = _gpu_llm_chat(messages)
except Exception as e:
resp = f"(LLM error: {e})"
h.append({"role": "assistant", "content": resp})
yield h
# ── Handlers ──────────────────────────────────────────────────
def cmd_handler(msg, history):
if not msg or not msg.strip(): yield history or []; return
for h in run_agent(msg, "command", history):
yield h
def code_handler(msg, history):
if not msg or not msg.strip(): yield history or []; return
for h in run_agent(msg, "code", history):
yield h
def research_handler(msg, history):
if not msg or not msg.strip(): yield history or []; return
for h in run_agent(msg, "research", history):
yield h
def image_handler(msg, history):
if not msg or not msg.strip(): yield history or []; return
for h in run_agent(msg, "image", history):
yield h
def web_handler(msg, history):
if not msg or not msg.strip(): yield history or []; return
for h in run_agent(msg, "web", history):
yield h
# ── REST API (standalone) ─────────────────────────────────────
@api_app.post("/api/agent")
async def api_agent(req: dict):
agent = req.get("agent", "command")
msg = req.get("message", "")
if not msg:
return JSONResponse({"error": "message required"}, status_code=400)
last = []
for h in run_agent(msg, agent, []):
last = h
return {"agent": agent, "response": last[-1]["content"] if last else "No response"}
@api_app.get("/api/health")
async def api_health():
return {"status": "ok", "model": MODEL_ID, "version": "2.2"}
# ── Gradio UI ─────────────────────────────────────────────────
with gr.Blocks(title="DEX Evolution Outpost ⚡", theme=gr.themes.Soft(primary_hue="violet")) as demo:
gr.Markdown("# ⚡ DEX Evolution Outpost\n*Qwen3-4B + Stable Diffusion on H200 — no API keys needed*")
with gr.Tab("⚡ Command"):
gr.Markdown("*Route tasks — the LLM picks the agent.*")
c1 = gr.Chatbot(height=500)
i1 = gr.Textbox(label="Input", placeholder="Ask anything…")
b1 = gr.Button("⚡ Go", variant="primary")
b1.click(cmd_handler, [i1, c1], [c1])
with gr.Tab("💻 Code"):
gr.Markdown("*Write, debug, and explain code.*")
c2 = gr.Chatbot(height=500)
i2 = gr.Textbox(label="Input")
b2 = gr.Button("💻 Go", variant="primary")
b2.click(code_handler, [i2, c2], [c2])
with gr.Tab("🔬 Research"):
gr.Markdown("*Search the web and summarize findings.*")
c3 = gr.Chatbot(height=500)
i3 = gr.Textbox(label="Input")
b3 = gr.Button("🔬 Go", variant="primary")
b3.click(research_handler, [i3, c3], [c3])
with gr.Tab("🎨 Image"):
gr.Markdown("*Generate images with Stable Diffusion.*")
c4 = gr.Chatbot(height=500)
i4 = gr.Textbox(label="Input", placeholder="Describe your image…")
b4 = gr.Button("🎨 Go", variant="primary")
b4.click(image_handler, [i4, c4], [c4])
with gr.Tab("🌐 Web"):
gr.Markdown("*Browse and extract web content.*")
c5 = gr.Chatbot(height=500)
i5 = gr.Textbox(label="Input", placeholder="Enter URL or search query…")
b5 = gr.Button("🌐 Go", variant="primary")
b5.click(web_handler, [i5, c5], [c5])
gr.Markdown("---\n*DEX Evolution Outpost v2.2 — all local, no API keys | [API](/api/agent)*")
# Mount Gradio on FastAPI
app = gr.mount_gradio_app(api_app, demo, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)