Spaces:

Dexifried
/

dex-evolution-outpost

Running on Zero

dexifried

Fix tool extraction: raw JSON decode for Qwen3 plain output

71a75b4 3 days ago

12.1 kB

	"""DEX Evolution Outpost v2.2 — FastAPI + Gradio"""
	import gradio as gr
	import spaces
	import json, re, logging
	from typing import Generator
	from fastapi import FastAPI
	from fastapi.responses import JSONResponse

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# FastAPI app
	api_app = FastAPI()

	# ── Model Config ──────────────────────────────────────────────
	MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"
	MAX_NEW_TOKENS = 1024
	_tok = None
	_pipeline = None

	TOOLS = [
	{"name": "web_search", "description": "Search the web via DuckDuckGo.", "params": {"query": "string", "max_results": "int (default 5)"}},
	{"name": "web_extract", "description": "Download and extract text from a URL.", "params": {"url": "string"}},
	{"name": "run_code", "description": "Execute Python code in sandbox.", "params": {"code": "string"}},
	{"name": "generate_image", "description": "Generate image via Stable Diffusion.", "params": {"prompt": "string", "steps": "int (default 20)"}},
	]
	TOOL_DESC = "\n".join(f"• {t['name']}: {t['description']} (params: {json.dumps(t['params'])})" for t in TOOLS)
	SYSTEM = (
	"You are DEX, a local AI assistant with access to live tools. ALWAYS use tools for factual/real-time questions.\n"
	"To call a tool, write EXACTLY this format:\n"
	'{"name":"tool_name","arguments":{"param":"value"}}\n'
	f"Available tools:\n{TOOL_DESC}\n"
	"RULES:\n"
	"- For ANY factual question: use web_search FIRST, then answer from results\n"
	"- For code questions: use run_code to test your answer\n"
	"- Never fabricate information — if unsure, search\n"
	"After tool results, continue your analysis."
	)

	# ── GPU Functions ─────────────────────────────────────────────
	@spaces.GPU(duration=60)
	def gpu_warmup() -> str:
	import torch
	return f"GPU ready at init: {torch.cuda.is_available()}"

	gpu_message = gpu_warmup() # Ensure detection during init
	logger.info(gpu_message)


	@spaces.GPU(duration=120)
	def _gpu_llm_chat(messages: list) -> str:
	global _tok, _pipeline
	if _pipeline is None:
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline as _p
	logger.info("Loading model...")
	_tok = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", trust_remote_code=True)
	_pipeline = _p("text-generation", model=_model, tokenizer=_tok, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7, top_p=0.9)
	logger.info("Model loaded!")
	prompt = _tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	out = _pipeline(prompt)[0]["generated_text"]
	return out[len(prompt):].strip()

	@spaces.GPU(duration=60)
	def _gpu_generate_image(prompt: str, steps: int = 20):
	from diffusers import StableDiffusionPipeline
	import torch
	pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
	img = pipe(prompt, num_inference_steps=steps).images[0]
	del pipe; torch.cuda.empty_cache()
	return img

	# ── CPU Tools ─────────────────────────────────────────────────
	def _web_search(query: str, max_results: int = 5):
	from duckduckgo_search import DDGS
	return json.dumps(DDGS().text(query, max_results=max_results), ensure_ascii=False)

	def _web_extract(url: str):
	import trafilatura
	downloaded = trafilatura.fetch_url(url)
	return trafilatura.extract(downloaded) if downloaded else "Failed to download."

	def _run_code(code: str):
	import io, contextlib
	stdout = io.StringIO()
	try:
	with contextlib.redirect_stdout(stdout):
	exec(code, {"__builtins__": __builtins__})
	return stdout.getvalue() or "Executed successfully."
	except Exception as e:
	return f"Error: {type(e).__name__}: {e}"

	def _execute_tool(name: str, args: dict) -> str:
	try:
	if name == "web_search": return _web_search(**args)
	elif name == "web_extract": return _web_extract(**args)
	elif name == "run_code": return _run_code(**args)
	elif name == "generate_image": return _gpu_generate_image(**args)
	return f"Unknown tool: {name}"
	except Exception as e:
	return f"Tool error: {type(e).__name__}: {e}"

	def _extract_tool_calls(text: str):
	"""Extract JSON tool calls from LLM output.
	Handles: raw JSON, blocks, embedded JSON objects."""
	calls = []
	# Method 1: blocks (legacy format)
	for m in re.finditer(r'', text, re.DOTALL):
	try: calls.append(json.loads(m.group(1).strip()))
	except: pass
	if calls:
	return calls
	# Method 2: Raw JSON objects anywhere in text (Qwen3 outputs plain JSON)
	decoder = json.JSONDecoder()
	pos = 0
	while pos < len(text):
	try:
	obj, end = decoder.raw_decode(text, pos)
	if isinstance(obj, dict) and "name" in obj and "arguments" in obj:
	calls.append(obj)
	pos = end
	except json.JSONDecodeError:
	pos += 1
	return calls

	def _dispatch_tool_calls(calls: list):
	results = []
	for c in calls:
	name, args = c.get("name",""), c.get("arguments",{})
	logger.info(f"Tool: {name}({args})")
	results.append({"name": name, "arguments": args, "result": _execute_tool(name, args)})
	return results

	# ── Agent Loop ────────────────────────────────────────────────
	def run_agent(user_msg: str, agent_name: str, chat_history: list) -> Generator:
	agent_system = {
	"command": SYSTEM,
	"code": "You are a code-focused assistant. Write, debug, and explain code. When asked to run code, use the run_code tool:\n{name:\"run_code\",arguments:{\"code\":\"your code here\"}}",
	"research": """You are a research assistant. IMPORTANT: You MUST search the web using tools before answering any factual question. Do NOT rely on training data — always verify current info with live search.

	To search, write: {"name":"web_search","arguments":{"query":"search query here","max_results":5}}

	After getting results, summarize them. Always cite sources. If no results found, say so.""",
	"image": "You are an image generation assistant. When asked to create an image, use the generate_image tool:\n{\"name\":\"generate_image\",\"arguments\":{\"prompt\":\"detailed image description\",\"steps\":20}}",
	"web": """You are a web agent. To extract content from a URL, use the web_extract tool:
	{"name":"web_extract","arguments":{"url":"https://..."}}

	To search the web first, use web_search:
	{"name":"web_search","arguments":{"query":"search terms","max_results":5}}

	Always use tools — do not make up content.""",
	}.get(agent_name, SYSTEM)

	h = list(chat_history) if chat_history else []
	h.append({"role": "user", "content": user_msg})
	yield h

	messages = [{"role": "system", "content": agent_system}, {"role": "user", "content": user_msg}]

	try:
	resp = _gpu_llm_chat(messages)
	except Exception as e:
	h.append({"role": "assistant", "content": f"❌ Error: {type(e).__name__}: {e}"})
	yield h
	return

	tool_calls = _extract_tool_calls(resp)
	if tool_calls:
	h.append({"role": "assistant", "content": f"🔧 Using: {', '.join(c.get('name','?') for c in tool_calls)}"})
	yield h

	results = _dispatch_tool_calls(tool_calls)
	obs = "\n".join(f"{r['name']}: {r['result'][:500]}" for r in results)
	h.append({"role": "assistant", "content": f"📋 Results:\n{obs}"})
	yield h

	messages.append({"role": "assistant", "content": resp})
	for r in results:
	messages.append({"role": "user", "content": f"Tool result for {r['name']}: {r['result'][:2000]}"})
	try:
	resp = _gpu_llm_chat(messages)
	except Exception as e:
	resp = f"(LLM error: {e})"

	h.append({"role": "assistant", "content": resp})
	yield h

	# ── Handlers ──────────────────────────────────────────────────
	def cmd_handler(msg, history):
	if not msg or not msg.strip(): yield history or []; return
	for h in run_agent(msg, "command", history):
	yield h

	def code_handler(msg, history):
	if not msg or not msg.strip(): yield history or []; return
	for h in run_agent(msg, "code", history):
	yield h

	def research_handler(msg, history):
	if not msg or not msg.strip(): yield history or []; return
	for h in run_agent(msg, "research", history):
	yield h

	def image_handler(msg, history):
	if not msg or not msg.strip(): yield history or []; return
	for h in run_agent(msg, "image", history):
	yield h

	def web_handler(msg, history):
	if not msg or not msg.strip(): yield history or []; return
	for h in run_agent(msg, "web", history):
	yield h

	# ── REST API (standalone) ─────────────────────────────────────
	@api_app.post("/api/agent")
	async def api_agent(req: dict):
	agent = req.get("agent", "command")
	msg = req.get("message", "")
	if not msg:
	return JSONResponse({"error": "message required"}, status_code=400)
	last = []
	for h in run_agent(msg, agent, []):
	last = h
	return {"agent": agent, "response": last[-1]["content"] if last else "No response"}

	@api_app.get("/api/health")
	async def api_health():
	return {"status": "ok", "model": MODEL_ID, "version": "2.2"}

	# ── Gradio UI ─────────────────────────────────────────────────
	with gr.Blocks(title="DEX Evolution Outpost ⚡", theme=gr.themes.Soft(primary_hue="violet")) as demo:
	gr.Markdown("# ⚡ DEX Evolution Outpost\nQwen3-4B + Stable Diffusion on H200 — no API keys needed")

	with gr.Tab("⚡ Command"):
	gr.Markdown("Route tasks — the LLM picks the agent.")
	c1 = gr.Chatbot(height=500)
	i1 = gr.Textbox(label="Input", placeholder="Ask anything…")
	b1 = gr.Button("⚡ Go", variant="primary")
	b1.click(cmd_handler, [i1, c1], [c1])

	with gr.Tab("💻 Code"):
	gr.Markdown("Write, debug, and explain code.")
	c2 = gr.Chatbot(height=500)
	i2 = gr.Textbox(label="Input")
	b2 = gr.Button("💻 Go", variant="primary")
	b2.click(code_handler, [i2, c2], [c2])

	with gr.Tab("🔬 Research"):
	gr.Markdown("Search the web and summarize findings.")
	c3 = gr.Chatbot(height=500)
	i3 = gr.Textbox(label="Input")
	b3 = gr.Button("🔬 Go", variant="primary")
	b3.click(research_handler, [i3, c3], [c3])

	with gr.Tab("🎨 Image"):
	gr.Markdown("Generate images with Stable Diffusion.")
	c4 = gr.Chatbot(height=500)
	i4 = gr.Textbox(label="Input", placeholder="Describe your image…")
	b4 = gr.Button("🎨 Go", variant="primary")
	b4.click(image_handler, [i4, c4], [c4])

	with gr.Tab("🌐 Web"):
	gr.Markdown("Browse and extract web content.")
	c5 = gr.Chatbot(height=500)
	i5 = gr.Textbox(label="Input", placeholder="Enter URL or search query…")
	b5 = gr.Button("🌐 Go", variant="primary")
	b5.click(web_handler, [i5, c5], [c5])

	gr.Markdown("---\nDEX Evolution Outpost v2.2 — all local, no API keys \| [API](/api/agent)")

	# Mount Gradio on FastAPI
	app = gr.mount_gradio_app(api_app, demo, path="/")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)