Spaces:

anthonym21
/

slipcore

Sleeping

App Files Files Community

slipcore / app.py

anthonym21

Deploy Slipstream paper Space with Live Quantizer

a9a21f4 verified 8 days ago

raw

history blame contribute delete

20.4 kB

	import json
	import os
	import re
	from dataclasses import dataclass
	from typing import Dict, List, Tuple, Optional

	import gradio as gr

	# Lazy-loaded quantizer
	_quantizer = None
	_quantizer_error = None

	def _get_quantizer():
	"""Lazy load the embedding quantizer."""
	global _quantizer, _quantizer_error
	if _quantizer is not None:
	return _quantizer
	if _quantizer_error is not None:
	return None
	try:
	from quantizer import EmbeddingQuantizer
	_quantizer = EmbeddingQuantizer(fallback_threshold=0.3)
	return _quantizer
	except Exception as e:
	_quantizer_error = str(e)
	return None

	def _get_keyword_quantizer():
	"""Fallback to keyword quantizer."""
	try:
	from quantizer import KeywordQuantizer
	return KeywordQuantizer()
	except Exception:
	return None


	# ---------------------------
	# Loaded at build time
	# ---------------------------
	PAPER_TITLE = "Slipstream: Semantic Quantization for Efficient Multi-Agent Coordination"
	PAPER_AUTHORS = "Anthony Maio"
	PAPER_ABSTRACT = "As multi-agent LLM systems scale,coordination bandwidthbecomes a primary cost\ndriver: every token spent on routing, intent framing, and redundant context is paid repeat-\nedly across agents and turns. Current approaches waste 40\u201360% of compute on coordination\noverhead, with communication costs scalingO(n2)as agent counts increase.\nThis paper introducesSlipstream, a protocol that performssemantic quantization:\nmapping free-form messages onto a sharedUniversal Concept Reference (UCR)and\ntransmitting compactmnemonic anchorsthat identify structured intents. Unlike syn-\ntactic compression (which fails due to BPE tokenizer fragmentation), Slipstream transmits\nnatural-language mnemonics that tokenize efficiently across model architectures.\nSlipstream combines (1) a symbolic4D semantic manifold\u2014Action, Polarity, Domain,\nUrgency\u2014with (2) a data-drivenvector engine(embeddings + nearest-centroid retrieval)\nplus anevolutionary extension layerthat learns new anchors from low-confidence traf-\nfic. Results show82% token reduction(41.9\u21927.4 tokens average) while maintaining\nsemantic fidelity, making large-scale multi-agent deployments economically viable."
	PAPER_TAGS = "semantic-quantization, multi-agent-systems, protocol-standards, token-ef-"
	DEFAULT_LLM_MODEL = None
	DETECTED_MODELS = []


	# ---------------------------
	# Robust file loading
	# ---------------------------
	def _load_chunks(path: str = "paper_chunks.jsonl") -> List[str]:
	chunks: List[str] = []
	try:
	with open(path, "r", encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if not line:
	continue
	try:
	rec = json.loads(line)
	txt = (rec.get("text") or "").strip()
	if txt:
	chunks.append(txt)
	except Exception:
	continue
	except FileNotFoundError:
	return []
	except Exception:
	return []
	return chunks


	# ---------------------------
	# Tiny BM25-ish retrieval (pure Python)
	# ---------------------------
	def _tokenize(s: str) -> List[str]:
	return re.findall(r"[A-Za-z0-9][A-Za-z0-9_-]{1,}", s.lower())


	@dataclass
	class Retriever:
	chunks: List[str]
	doc_tokens: List[List[str]]
	df: Dict[str, int]
	idf: Dict[str, float]

	@staticmethod
	def build(chunks: List[str]) -> "Retriever":
	doc_tokens = [_tokenize(c) for c in chunks]
	df: Dict[str, int] = {}
	for toks in doc_tokens:
	for t in set(toks):
	df[t] = df.get(t, 0) + 1
	n = max(1, len(doc_tokens))
	idf = {}
	for t, d in df.items():
	idf[t] = float((n - d + 0.5) / (d + 0.5))
	return Retriever(chunks=chunks, doc_tokens=doc_tokens, df=df, idf=idf)

	def topk(self, query: str, k: int = 4) -> List[Tuple[int, float]]:
	q = _tokenize(query)
	if not q:
	return []
	scores: List[Tuple[int, float]] = []
	qset = set(q)
	for i, toks in enumerate(self.doc_tokens):
	if not toks:
	continue
	overlap = qset.intersection(toks)
	if not overlap:
	continue
	score = 0.0
	for t in overlap:
	score += self.idf.get(t, 0.0)
	score = score / (1.0 + (len(toks) / 200.0))
	scores.append((i, score))
	scores.sort(key=lambda x: x[1], reverse=True)
	return scores[:k]


	CHUNKS = _load_chunks()
	RETRIEVER = Retriever.build(CHUNKS) if CHUNKS else None


	def retrieve_context(query: str, k: int = 4, max_chars: int = 6000) -> str:
	if not RETRIEVER:
	return ""
	hits = RETRIEVER.topk(query, k=k)
	parts: List[str] = []
	for idx, _score in hits:
	txt = CHUNKS[idx].strip()
	if txt:
	parts.append(txt)
	ctx = "\n\n".join(parts).strip()
	return ctx[:max_chars]


	# ---------------------------
	# HF Inference helpers (optional)
	# ---------------------------
	def _get_hf_client(model_id: str):
	try:
	from huggingface_hub import InferenceClient
	except Exception as e:
	raise RuntimeError("huggingface_hub is not installed. Add it to requirements.txt") from e

	token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
	if not token:
	raise RuntimeError("HF_TOKEN is not set as a Space secret.")
	return InferenceClient(model=model_id, token=token)


	def _llm_chat(model_id: str, messages: List[dict], max_tokens: int = 512) -> str:
	client = _get_hf_client(model_id)
	out = client.chat_completion(messages=messages, max_tokens=max_tokens)
	return out.choices[0].message.content


	# ---------------------------
	# Chat with paper (RAG-lite)
	# ---------------------------
	def paper_chat(message: str, history: List[Tuple[str, str]]) -> str:
	message = (message or "").strip()
	if not message:
	return "Ask a question about the paper."

	ctx = retrieve_context(message, k=4, max_chars=6000)

	model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL
	if not model_id:
	if not ctx:
	return "No indexed context found. (paper_chunks.jsonl missing?)"
	return "Top matches in the paper:\n\n" + ctx[:1200]

	trimmed_history = history[-4:] if history else []
	messages: List[dict] = [
	{
	"role": "system",
	"content": (
	"You are a precise research assistant. Answer using ONLY the provided paper context. "
	"If the context is insufficient, say what is missing and point to what section would help."
	),
	},
	]
	if ctx:
	messages.append({"role": "system", "content": "PAPER CONTEXT:\n\n" + ctx})

	for u, a in trimmed_history:
	messages.append({"role": "user", "content": u})
	messages.append({"role": "assistant", "content": a})

	messages.append({"role": "user", "content": message})

	try:
	return _llm_chat(model_id, messages, max_tokens=512).strip()
	except Exception as e:
	if ctx:
	return f"(LLM unavailable: {e})\n\nTop matches in the paper:\n\n" + ctx[:1200]
	return f"LLM unavailable: {e}"


	# ---------------------------
	# Share Kit (generators)
	# ---------------------------
	def _fallback_tweet_thread(title: str, abstract: str) -> str:
	abs_one = re.sub(r"\s+", " ", abstract).strip()
	bullets = [
	f"1/ {title}",
	"2/ TL;DR: " + (abs_one[:220] + ("…" if len(abs_one) > 220 else "")),
	"3/ Key idea: (open the Space → Chat tab and ask for the method overview)",
	"4/ Try it: use the Share Kit tab to generate a talk outline / FAQ.",
	"5/ Links: add your paper + code links in the README.",
	]
	return "\n\n".join(bullets)

	def generate_share(kind: str) -> str:
	kind = (kind or "").strip().lower()
	base_title = PAPER_TITLE or "Paper"
	base_abs = PAPER_ABSTRACT or ""

	model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL
	if not model_id:
	if kind == "tweet thread":
	return _fallback_tweet_thread(base_title, base_abs)
	if kind == "talk outline":
	return "\n".join([
	f"Title: {base_title}",
	"- Motivation",
	"- Problem setup",
	"- Method",
	"- Results",
	"- Limitations",
	"- Q&A",
	])
	if kind == "faq":
	return "\n".join([
	"Q: What problem does this address?\nA: " + (base_abs[:220] + ("…" if len(base_abs) > 220 else "")),
	"",
	"Q: What is the main contribution?\nA: Ask in the Chat tab.",
	"",
	"Q: How do I reproduce it?\nA: Link code + add steps in README.",
	])
	return "Select an item to generate."

	prompt = {
	"tweet thread": "Write a concise 6-tweet thread summarizing the paper for the ML community.",
	"talk outline": "Create a 10-minute talk outline with section headers and bullet points.",
	"faq": "Write an FAQ with 6 Q/A pairs focused on method, results, limitations, and usage.",
	}.get(kind, "Summarize the paper in 8 bullet points.")

	ctx = (PAPER_ABSTRACT or "").strip()
	messages = [
	{"role": "system", "content": "You are an expert technical writer for ML research audiences."},
	{"role": "user", "content": f"Paper title: {base_title}\nAuthors: {PAPER_AUTHORS}\n\nAbstract/context:\n{ctx}\n\nTask: {prompt}"},
	]
	try:
	return _llm_chat(model_id, messages, max_tokens=600).strip()
	except Exception as e:
	if kind == "tweet thread":
	return _fallback_tweet_thread(base_title, base_abs) + f"\n\n(LLM unavailable: {e})"
	return f"LLM unavailable: {e}"


	# ---------------------------
	# Model Playground (chat)
	# ---------------------------
	def model_chat(model_id: str, message: str, history: List[Tuple[str, str]]) -> str:
	model_id = (model_id or "").strip()
	message = (message or "").strip()
	if not model_id:
	return "Provide a model id."
	if not message:
	return "Send a message."

	messages: List[dict] = [{"role": "system", "content": "You are a helpful assistant."}]
	for u, a in (history[-4:] if history else []):
	messages.append({"role": "user", "content": u})
	messages.append({"role": "assistant", "content": a})
	messages.append({"role": "user", "content": message})

	try:
	return _llm_chat(model_id, messages, max_tokens=512).strip()
	except Exception as e:
	return f"Model call failed: {e}"


	# ---------------------------
	# UI helpers
	# ---------------------------
	def quantize_intent(intent: str) -> Tuple[str, str, str]:
	"""
	Quantize a natural language intent to UCR anchor.
	Returns: (primary_result_md, alternatives_md, wire_format)
	"""
	intent = (intent or "").strip()
	if not intent:
	return "Enter an intent to quantize.", "", ""

	# Try embedding quantizer first, fall back to keyword
	quantizer = _get_quantizer()
	method = "embedding"
	if quantizer is None:
	quantizer = _get_keyword_quantizer()
	method = "keyword"

	if quantizer is None:
	return "Quantizer unavailable. Check logs.", "", ""

	try:
	result = quantizer.quantize(intent)
	except Exception as e:
	return f"Quantization error: {e}", "", ""

	# Confidence color
	conf = result.confidence
	if conf >= 0.7:
	color = "green"
	conf_label = "High"
	elif conf >= 0.5:
	color = "orange"
	conf_label = "Medium"
	else:
	color = "red"
	conf_label = "Low"

	# Primary result
	primary_md = f"""
	### {result.anchor.mnemonic}

	Confidence: <span style="color:{color}; font-weight:bold">{conf:.0%}</span> ({conf_label})

	Canonical meaning: {result.anchor.canonical}

	Method: {method} {'(fallback)' if result.is_fallback else ''}

	Coordinates: `{result.anchor.coords}` (Action, Polarity, Domain, Urgency)
	"""

	# Alternatives
	if result.alternatives:
	alt_lines = ["\| Anchor \| Similarity \|", "\|--------\|------------\|"]
	for alt_anchor, alt_score in result.alternatives[:3]:
	bar_len = int(alt_score * 10)
	bar = "█" * bar_len + "░" * (10 - bar_len)
	alt_lines.append(f"\| {alt_anchor.mnemonic} \| {bar} {alt_score:.0%} \|")
	alternatives_md = "\n".join(alt_lines)
	else:
	alternatives_md = "No alternatives"

	# Wire format
	wire = f"SLIP v1 user agent {result.anchor.mnemonic}"
	if result.is_fallback:
	# Truncate long intents for fallback payload
	payload = intent[:100].replace('"', "'")
	wire = f'SLIP v1 user agent Fallback "{payload}"'

	return primary_md, alternatives_md, wire


	EXAMPLE_INTENTS = [
	("Review my code", "RequestReview"),
	("Task complete!", "InformComplete"),
	("System down!", "ObserveError"),
	("Can you help?", "RequestHelp"),
	("Looks good to me", "EvalApprove"),
	]


	def start_here(choice: str) -> str:
	choice = (choice or "").strip().lower()
	if choice == "quick summary":
	return f"### {PAPER_TITLE}\n\nAuthors: {PAPER_AUTHORS}\n\nAbstract:\n\n{PAPER_ABSTRACT}"
	if choice == "how does it work?":
	return "Go to Chat and ask: Give me a method overview with the key steps."
	if choice == "what are the limitations?":
	return "Go to Chat and ask: List limitations and failure modes discussed in the paper."
	if choice == "generate a tweet thread":
	return generate_share("tweet thread")
	return "Pick an option."


	def _load_gallery_items() -> List[Tuple[str, str]]:
	items: List[Tuple[str, str]] = []
	if os.path.isdir("assets/images"):
	for fn in sorted(os.listdir("assets/images"))[:48]:
	path = os.path.join("assets/images", fn)
	if os.path.isfile(path):
	items.append((path, fn))
	if not items and os.path.isdir("assets/pages"):
	for fn in sorted(os.listdir("assets/pages"))[:24]:
	path = os.path.join("assets/pages", fn)
	if os.path.isfile(path):
	items.append((path, fn))
	return items


	CSS = '''
	.paper-hero h1 { margin-bottom: 0.2rem; }
	.paper-hero p { margin-top: 0.2rem; opacity: 0.9; }
	.hint { opacity: 0.85; }
	'''

	with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
	gr.Markdown(f"# {PAPER_TITLE}", elem_classes=["paper-hero"])
	if PAPER_AUTHORS:
	gr.Markdown(f"Authors: {PAPER_AUTHORS}", elem_classes=["paper-hero"])
	if PAPER_TAGS:
	gr.Markdown(f"Tags: {PAPER_TAGS}", elem_classes=["paper-hero"])

	with gr.Tabs():
	with gr.Tab("Start here"):
	gr.Markdown("Pick an interaction to explore the paper quickly.", elem_classes=["hint"])
	choice = gr.Radio(
	["Quick summary", "How does it work?", "What are the limitations?", "Generate a tweet thread"],
	value="Quick summary",
	label="What do you want?",
	)
	out = gr.Markdown()
	choice.change(start_here, inputs=choice, outputs=out)
	demo.load(start_here, inputs=choice, outputs=out)

	with gr.Tab("Overview"):
	gr.Markdown("## Abstract")
	gr.Markdown(PAPER_ABSTRACT)

	gr.Markdown("---")
	gr.Markdown("### Text search (snippet)")
	q = gr.Textbox(label="Find a phrase", placeholder="e.g., scalable oversight", lines=1)
	snippet = gr.Textbox(label="Top matching context", lines=10)

	def _snippet(query: str) -> str:
	query = (query or "").strip()
	if not query:
	return ""
	ctx = retrieve_context(query, k=4, max_chars=1600)
	return ctx or "No matches."

	q.change(_snippet, inputs=q, outputs=snippet)

	with gr.Tab("Gallery"):
	gr.Markdown("Extracted images / rendered page previews (if included at build time).", elem_classes=["hint"])
	gallery = gr.Gallery(label="Figures / pages", columns=2, rows=2, height=520)

	def _gallery():
	return _load_gallery_items()

	demo.load(_gallery, outputs=gallery)

	with gr.Tab("Chat"):
	gr.Markdown(
	"Ask questions. If you set `HF_TOKEN` + `PAPER_LLM_MODEL` as Space secrets, answers become generative; "
	"otherwise it returns top-matching snippets.",
	elem_classes=["hint"],
	)
	gr.ChatInterface(fn=paper_chat, title="Chat with the Paper")

	with gr.Tab("Share Kit"):
	gr.Markdown("Generate shareable assets. Works without secrets (deterministic fallback).", elem_classes=["hint"])
	kind = gr.Dropdown(["Tweet thread", "Talk outline", "FAQ"], value="Tweet thread", label="Generate")
	btn = gr.Button("Create")
	share_out = gr.Textbox(lines=14, label="Output")
	btn.click(lambda k: generate_share(k), inputs=kind, outputs=share_out)

	with gr.Tab("Model Playground"):
	gr.Markdown("Chat with a referenced Hub model (if any) or provide your own. Requires `HF_TOKEN` secret.", elem_classes=["hint"])
	model_id = gr.Dropdown(
	choices=(DETECTED_MODELS if DETECTED_MODELS else []),
	value=(DETECTED_MODELS[0] if DETECTED_MODELS else None),
	label="Model id",
	allow_custom_value=True,
	)

	def _model_chat_fn(message: str, history: List[Tuple[str, str]], mid: str) -> str:
	return model_chat(mid, message, history)

	gr.ChatInterface(fn=_model_chat_fn, additional_inputs=[model_id], title="Model Playground")

	with gr.Tab("Live Quantizer"):
	gr.Markdown("""
	## Think → Quantize → Transmit

	Type a messy, natural-language intent and watch it get quantized to a UCR anchor.
	This demonstrates the core Slipstream innovation: mapping free-form language onto a shared semantic manifold.
	""")

	with gr.Row():
	with gr.Column(scale=2):
	intent_input = gr.Textbox(
	label="Your intent (natural language)",
	placeholder="Hey, I'm kinda stuck on this auth bug, can you take a look?",
	lines=2,
	)
	quantize_btn = gr.Button("Quantize", variant="primary")

	gr.Markdown("Try these examples:")
	with gr.Row():
	for ex_text, ex_anchor in EXAMPLE_INTENTS:
	ex_btn = gr.Button(ex_text, size="sm")
	ex_btn.click(lambda t=ex_text: t, outputs=intent_input)

	with gr.Column(scale=3):
	primary_out = gr.Markdown(label="Result")
	with gr.Accordion("Nearby Anchors", open=True):
	alternatives_out = gr.Markdown()
	wire_out = gr.Code(label="SLIP Wire Format", language=None)

	quantize_btn.click(
	quantize_intent,
	inputs=intent_input,
	outputs=[primary_out, alternatives_out, wire_out],
	)
	intent_input.submit(
	quantize_intent,
	inputs=intent_input,
	outputs=[primary_out, alternatives_out, wire_out],
	)

	gr.Markdown("---\nBuilt with Gradio on Hugging Face Spaces.")

	if __name__ == "__main__":
	demo.launch()