Spaces:

forkjoin-ai
/

aether

Running

aether / app.py

Taylor

chore: add void attention footer links

fd5d225 15 days ago

10.5 kB

	"""
	Aether -- Pure Engine Speed Comparison
	Same model. Same prompt. Same tokens. Different engine.
	PyTorch CPU vs Aether WASM-SIMD. Let the ms/tok speak.
	"""

	import gradio as gr
	import torch
	import json
	import time
	import subprocess
	import urllib.request
	import urllib.error
	import select
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from transformers import AutoModelForCausalLM, AutoTokenizer

	print("[Aether] Starting Aether sidecar...", flush=True)
	aether_proc = subprocess.Popen(
	["node", "aether-server.mjs"],
	env={**__import__('os').environ, "AETHER_PORT": "7861"},
	stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
	)

	print("[Aether] Loading PyTorch model...", flush=True)
	tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
	pytorch_model = AutoModelForCausalLM.from_pretrained(
	"HuggingFaceTB/SmolLM2-360M-Instruct",
	torch_dtype=torch.float32, device_map="cpu",
	)
	print("[Aether] PyTorch ready.", flush=True)

	print("[Aether] Waiting for Aether engine...", flush=True)
	for attempt in range(180):
	try:
	req = urllib.request.Request("http://127.0.0.1:7861/health")
	resp = urllib.request.urlopen(req, timeout=2)
	health = json.loads(resp.read())
	if health.get("status") == "ok" and health.get("model") == "loaded":
	print(f"[Aether] Engine ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
	break
	except Exception:
	pass
	if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]:
	line = aether_proc.stdout.readline()
	if line: print(f" {line.decode().strip()}", flush=True)
	time.sleep(1)


	def gen_pytorch(prompt, max_tokens):
	messages = [{"role": "user", "content": prompt}]
	text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer(text, return_tensors="pt")
	t0 = time.perf_counter()
	with torch.no_grad():
	outputs = pytorch_model.generate(
	**inputs, max_new_tokens=max_tokens, temperature=0.7, top_p=0.9,
	do_sample=True, pad_token_id=tokenizer.eos_token_id,
	)
	elapsed = time.perf_counter() - t0
	n = outputs.shape[1] - inputs["input_ids"].shape[1]
	text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
	return text, elapsed, n, (elapsed * 1000 / n) if n > 0 else 0


	def gen_aether(prompt, max_tokens):
	try:
	data = json.dumps({"prompt": prompt, "max_tokens": max_tokens}).encode()
	req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
	headers={"Content-Type": "application/json"})
	resp = urllib.request.urlopen(req, timeout=600)
	r = json.loads(resp.read())
	return r["text"], r["totalTimeMs"] / 1000, r["tokens"], r["avgTokenMs"]
	except urllib.error.HTTPError as e:
	body = e.read().decode() if e.fp else str(e)
	try: detail = json.loads(body).get("error", body[:300])
	except Exception: detail = body[:300]
	return f"[Error: {detail}]", 0, 0, 0
	except Exception as e:
	return f"[Error: {e}]", 0, 0, 0


	def compare(prompt, max_tokens):
	empty = ("", "", "", "")
	if not prompt or not prompt.strip():
	yield empty
	return

	max_tokens = int(max_tokens)
	pt_result = [None]
	ae_result = [None]

	def run_pt():
	pt_result[0] = gen_pytorch(prompt, max_tokens)
	def run_ae():
	ae_result[0] = gen_aether(prompt, max_tokens)

	def fmt(r):
	if not r: return "running..."
	return f"{r[2]} tokens in {r[1]:.1f}s ({r[3]:.0f}ms/tok)"

	def build():
	pt, ae = pt_result[0], ae_result[0]
	return (
	pt[0] if pt else "generating...",
	ae[0] if ae else "generating...",
	fmt(pt), fmt(ae),
	)

	with ThreadPoolExecutor(max_workers=2) as pool:
	futures = {pool.submit(run_pt): "pt", pool.submit(run_ae): "ae"}
	for future in as_completed(futures):
	future.result()
	yield build()
	yield build()


	CSS = """
	.gradio-container { max-width: 1060px !important; margin: 0 auto !important; }
	.gradio-container, .dark { background: #09090b !important; }
	#hero { text-align: center; padding: 2rem 0 1rem; }
	#hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; }
	#hero .accent { color: #06b6d4; }
	#hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; }
	.response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; }
	.response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; }
	.pt-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
	.ae-label { color: #06b6d4 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
	.stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.85rem !important; color: #52525b !important; }
	#prompt-input > label > span { display: none !important; }
	#prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
	#prompt-input textarea:focus { border-color: #06b6d4 !important; }
	#gen-btn { background: #06b6d4 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; color: #09090b !important; }
	.prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; }
	.prompt-chip:hover { border-color: #06b6d4 !important; color: #fafafa !important; }
	#footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; }
	#footer p { color: #52525b; font-size: 0.8rem; }
	#footer a { color: #06b6d4; text-decoration: none; }
	footer.svelte-1ax1toq { display: none !important; }
	.built-with { display: none !important; }
	"""

	with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="cyan", neutral_hue="zinc"), title="Aether") as demo:

	gr.HTML("""
	<div id="hero">
	<h1><span class="accent">Aether</span></h1>
	<p class="subtitle">Pure engine speed comparison. Same model (SmolLM2-360M-Instruct). Same prompt. Same tokens.<br/>
	Left: PyTorch CPU (2.8GB runtime, CUDA/MKL optimized).<br/>
	Right: Aether (14KB WASM binary, pure JS + SIMD128, zero ML dependencies).<br/>
	Both generate in parallel. Whichever finishes first shows first.</p>
	</div>
	""")

	with gr.Row():
	prompt = gr.Textbox(elem_id="prompt-input", placeholder="What is the shape of failure?", lines=2, label="Prompt", show_label=False, interactive=True, scale=4)
	max_tok = gr.Slider(minimum=8, maximum=8192, value=64, step=1, label="Max tokens", scale=1)

	btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")

	with gr.Row(equal_height=True):
	with gr.Column():
	gr.HTML('<p class="pt-label">PyTorch CPU (standard)</p>')
	pt_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
	pt_stats = gr.HTML('<p class="stats-text">--</p>')
	with gr.Column(min_width=30):
	gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
	with gr.Column():
	gr.HTML('<p class="ae-label">Aether WASM-SIMD (14KB)</p>')
	ae_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
	ae_stats = gr.HTML('<p class="stats-text">--</p>')

	outputs = [pt_out, ae_out, pt_stats, ae_stats]
	inputs = [prompt, max_tok]

	def run(p, mt):
	for pt, ae, ps, aes in compare(p, mt):
	yield pt, ae, f'<p class="stats-text">{ps}</p>', f'<p class="stats-text">{aes}</p>'

	btn.click(run, inputs, outputs)
	prompt.submit(run, inputs, outputs)

	gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
	with gr.Row():
	for p in ["hello", "What is the shape of failure?", "Write a haiku about parallel universes.", "Explain entropy to a five-year-old."]:
	gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
	fn=lambda x=p: x, outputs=[prompt]
	).then(fn=run, inputs=inputs, outputs=outputs)

	gr.HTML("""
	<div id="footer">
	<p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
	SmolLM2-360M-Instruct · Q8_0 GGUF · Same weights, different engines
	</p>
	<p>
	PyTorch: ~2.8GB runtime, C++/CUDA/MKL optimized, Python bindings<br/>
	Aether: 14KB WASM + JS, SIMD128 vectorized, zero dependencies, runs anywhere
	</p>
	<p style="margin-top:1rem;">
	<a href="https://forkracefold.com/">Whitepaper</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/aether">Aether</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/aether-browser">Edge Mesh</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/the-void">The Void</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/buleyean-rl">Buleyean RL</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/metacog">Metacog</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/five-bules">Five Bules</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/void-attention">Void Attention</a> ·
	<a href="https://huggingface.co/spaces/forkjoin-ai/quark-personality">Quark Personality</a>
	</p>
	<p style="margin-top:1rem;">Copyright 2026 forkjoin.ai</p>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)