Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Dict, List, Tuple, Optional | |
| import gradio as gr | |
| # Lazy-loaded quantizer | |
| _quantizer = None | |
| _quantizer_error = None | |
| def _get_quantizer(): | |
| """Lazy load the embedding quantizer.""" | |
| global _quantizer, _quantizer_error | |
| if _quantizer is not None: | |
| return _quantizer | |
| if _quantizer_error is not None: | |
| return None | |
| try: | |
| from quantizer import EmbeddingQuantizer | |
| _quantizer = EmbeddingQuantizer(fallback_threshold=0.3) | |
| return _quantizer | |
| except Exception as e: | |
| _quantizer_error = str(e) | |
| return None | |
| def _get_keyword_quantizer(): | |
| """Fallback to keyword quantizer.""" | |
| try: | |
| from quantizer import KeywordQuantizer | |
| return KeywordQuantizer() | |
| except Exception: | |
| return None | |
| # --------------------------- | |
| # Loaded at build time | |
| # --------------------------- | |
| PAPER_TITLE = "Slipstream: Semantic Quantization for Efficient Multi-Agent Coordination" | |
| PAPER_AUTHORS = "Anthony Maio" | |
| PAPER_ABSTRACT = "As multi-agent LLM systems scale,coordination bandwidthbecomes a primary cost\ndriver: every token spent on routing, intent framing, and redundant context is paid repeat-\nedly across agents and turns. Current approaches waste 40\u201360% of compute on coordination\noverhead, with communication costs scalingO(n2)as agent counts increase.\nThis paper introducesSlipstream, a protocol that performssemantic quantization:\nmapping free-form messages onto a sharedUniversal Concept Reference (UCR)and\ntransmitting compactmnemonic anchorsthat identify structured intents. Unlike syn-\ntactic compression (which fails due to BPE tokenizer fragmentation), Slipstream transmits\nnatural-language mnemonics that tokenize efficiently across model architectures.\nSlipstream combines (1) a symbolic4D semantic manifold\u2014Action, Polarity, Domain,\nUrgency\u2014with (2) a data-drivenvector engine(embeddings + nearest-centroid retrieval)\nplus anevolutionary extension layerthat learns new anchors from low-confidence traf-\nfic. Results show82% token reduction(41.9\u21927.4 tokens average) while maintaining\nsemantic fidelity, making large-scale multi-agent deployments economically viable." | |
| PAPER_TAGS = "semantic-quantization, multi-agent-systems, protocol-standards, token-ef-" | |
| DEFAULT_LLM_MODEL = None | |
| DETECTED_MODELS = [] | |
| # --------------------------- | |
| # Robust file loading | |
| # --------------------------- | |
| def _load_chunks(path: str = "paper_chunks.jsonl") -> List[str]: | |
| chunks: List[str] = [] | |
| try: | |
| with open(path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| rec = json.loads(line) | |
| txt = (rec.get("text") or "").strip() | |
| if txt: | |
| chunks.append(txt) | |
| except Exception: | |
| continue | |
| except FileNotFoundError: | |
| return [] | |
| except Exception: | |
| return [] | |
| return chunks | |
| # --------------------------- | |
| # Tiny BM25-ish retrieval (pure Python) | |
| # --------------------------- | |
| def _tokenize(s: str) -> List[str]: | |
| return re.findall(r"[A-Za-z0-9][A-Za-z0-9_-]{1,}", s.lower()) | |
| class Retriever: | |
| chunks: List[str] | |
| doc_tokens: List[List[str]] | |
| df: Dict[str, int] | |
| idf: Dict[str, float] | |
| def build(chunks: List[str]) -> "Retriever": | |
| doc_tokens = [_tokenize(c) for c in chunks] | |
| df: Dict[str, int] = {} | |
| for toks in doc_tokens: | |
| for t in set(toks): | |
| df[t] = df.get(t, 0) + 1 | |
| n = max(1, len(doc_tokens)) | |
| idf = {} | |
| for t, d in df.items(): | |
| idf[t] = float((n - d + 0.5) / (d + 0.5)) | |
| return Retriever(chunks=chunks, doc_tokens=doc_tokens, df=df, idf=idf) | |
| def topk(self, query: str, k: int = 4) -> List[Tuple[int, float]]: | |
| q = _tokenize(query) | |
| if not q: | |
| return [] | |
| scores: List[Tuple[int, float]] = [] | |
| qset = set(q) | |
| for i, toks in enumerate(self.doc_tokens): | |
| if not toks: | |
| continue | |
| overlap = qset.intersection(toks) | |
| if not overlap: | |
| continue | |
| score = 0.0 | |
| for t in overlap: | |
| score += self.idf.get(t, 0.0) | |
| score = score / (1.0 + (len(toks) / 200.0)) | |
| scores.append((i, score)) | |
| scores.sort(key=lambda x: x[1], reverse=True) | |
| return scores[:k] | |
| CHUNKS = _load_chunks() | |
| RETRIEVER = Retriever.build(CHUNKS) if CHUNKS else None | |
| def retrieve_context(query: str, k: int = 4, max_chars: int = 6000) -> str: | |
| if not RETRIEVER: | |
| return "" | |
| hits = RETRIEVER.topk(query, k=k) | |
| parts: List[str] = [] | |
| for idx, _score in hits: | |
| txt = CHUNKS[idx].strip() | |
| if txt: | |
| parts.append(txt) | |
| ctx = "\n\n".join(parts).strip() | |
| return ctx[:max_chars] | |
| # --------------------------- | |
| # HF Inference helpers (optional) | |
| # --------------------------- | |
| def _get_hf_client(model_id: str): | |
| try: | |
| from huggingface_hub import InferenceClient | |
| except Exception as e: | |
| raise RuntimeError("huggingface_hub is not installed. Add it to requirements.txt") from e | |
| token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
| if not token: | |
| raise RuntimeError("HF_TOKEN is not set as a Space secret.") | |
| return InferenceClient(model=model_id, token=token) | |
| def _llm_chat(model_id: str, messages: List[dict], max_tokens: int = 512) -> str: | |
| client = _get_hf_client(model_id) | |
| out = client.chat_completion(messages=messages, max_tokens=max_tokens) | |
| return out.choices[0].message.content | |
| # --------------------------- | |
| # Chat with paper (RAG-lite) | |
| # --------------------------- | |
| def paper_chat(message: str, history: List[Tuple[str, str]]) -> str: | |
| message = (message or "").strip() | |
| if not message: | |
| return "Ask a question about the paper." | |
| ctx = retrieve_context(message, k=4, max_chars=6000) | |
| model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL | |
| if not model_id: | |
| if not ctx: | |
| return "No indexed context found. (paper_chunks.jsonl missing?)" | |
| return "Top matches in the paper:\n\n" + ctx[:1200] | |
| trimmed_history = history[-4:] if history else [] | |
| messages: List[dict] = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a precise research assistant. Answer using ONLY the provided paper context. " | |
| "If the context is insufficient, say what is missing and point to what section would help." | |
| ), | |
| }, | |
| ] | |
| if ctx: | |
| messages.append({"role": "system", "content": "PAPER CONTEXT:\n\n" + ctx}) | |
| for u, a in trimmed_history: | |
| messages.append({"role": "user", "content": u}) | |
| messages.append({"role": "assistant", "content": a}) | |
| messages.append({"role": "user", "content": message}) | |
| try: | |
| return _llm_chat(model_id, messages, max_tokens=512).strip() | |
| except Exception as e: | |
| if ctx: | |
| return f"(LLM unavailable: {e})\n\nTop matches in the paper:\n\n" + ctx[:1200] | |
| return f"LLM unavailable: {e}" | |
| # --------------------------- | |
| # Share Kit (generators) | |
| # --------------------------- | |
| def _fallback_tweet_thread(title: str, abstract: str) -> str: | |
| abs_one = re.sub(r"\s+", " ", abstract).strip() | |
| bullets = [ | |
| f"1/ {title}", | |
| "2/ TL;DR: " + (abs_one[:220] + ("…" if len(abs_one) > 220 else "")), | |
| "3/ Key idea: (open the Space → Chat tab and ask for the method overview)", | |
| "4/ Try it: use the Share Kit tab to generate a talk outline / FAQ.", | |
| "5/ Links: add your paper + code links in the README.", | |
| ] | |
| return "\n\n".join(bullets) | |
| def generate_share(kind: str) -> str: | |
| kind = (kind or "").strip().lower() | |
| base_title = PAPER_TITLE or "Paper" | |
| base_abs = PAPER_ABSTRACT or "" | |
| model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL | |
| if not model_id: | |
| if kind == "tweet thread": | |
| return _fallback_tweet_thread(base_title, base_abs) | |
| if kind == "talk outline": | |
| return "\n".join([ | |
| f"Title: {base_title}", | |
| "- Motivation", | |
| "- Problem setup", | |
| "- Method", | |
| "- Results", | |
| "- Limitations", | |
| "- Q&A", | |
| ]) | |
| if kind == "faq": | |
| return "\n".join([ | |
| "Q: What problem does this address?\nA: " + (base_abs[:220] + ("…" if len(base_abs) > 220 else "")), | |
| "", | |
| "Q: What is the main contribution?\nA: Ask in the Chat tab.", | |
| "", | |
| "Q: How do I reproduce it?\nA: Link code + add steps in README.", | |
| ]) | |
| return "Select an item to generate." | |
| prompt = { | |
| "tweet thread": "Write a concise 6-tweet thread summarizing the paper for the ML community.", | |
| "talk outline": "Create a 10-minute talk outline with section headers and bullet points.", | |
| "faq": "Write an FAQ with 6 Q/A pairs focused on method, results, limitations, and usage.", | |
| }.get(kind, "Summarize the paper in 8 bullet points.") | |
| ctx = (PAPER_ABSTRACT or "").strip() | |
| messages = [ | |
| {"role": "system", "content": "You are an expert technical writer for ML research audiences."}, | |
| {"role": "user", "content": f"Paper title: {base_title}\nAuthors: {PAPER_AUTHORS}\n\nAbstract/context:\n{ctx}\n\nTask: {prompt}"}, | |
| ] | |
| try: | |
| return _llm_chat(model_id, messages, max_tokens=600).strip() | |
| except Exception as e: | |
| if kind == "tweet thread": | |
| return _fallback_tweet_thread(base_title, base_abs) + f"\n\n(LLM unavailable: {e})" | |
| return f"LLM unavailable: {e}" | |
| # --------------------------- | |
| # Model Playground (chat) | |
| # --------------------------- | |
| def model_chat(model_id: str, message: str, history: List[Tuple[str, str]]) -> str: | |
| model_id = (model_id or "").strip() | |
| message = (message or "").strip() | |
| if not model_id: | |
| return "Provide a model id." | |
| if not message: | |
| return "Send a message." | |
| messages: List[dict] = [{"role": "system", "content": "You are a helpful assistant."}] | |
| for u, a in (history[-4:] if history else []): | |
| messages.append({"role": "user", "content": u}) | |
| messages.append({"role": "assistant", "content": a}) | |
| messages.append({"role": "user", "content": message}) | |
| try: | |
| return _llm_chat(model_id, messages, max_tokens=512).strip() | |
| except Exception as e: | |
| return f"Model call failed: {e}" | |
| # --------------------------- | |
| # UI helpers | |
| # --------------------------- | |
| def quantize_intent(intent: str) -> Tuple[str, str, str]: | |
| """ | |
| Quantize a natural language intent to UCR anchor. | |
| Returns: (primary_result_md, alternatives_md, wire_format) | |
| """ | |
| intent = (intent or "").strip() | |
| if not intent: | |
| return "Enter an intent to quantize.", "", "" | |
| # Try embedding quantizer first, fall back to keyword | |
| quantizer = _get_quantizer() | |
| method = "embedding" | |
| if quantizer is None: | |
| quantizer = _get_keyword_quantizer() | |
| method = "keyword" | |
| if quantizer is None: | |
| return "Quantizer unavailable. Check logs.", "", "" | |
| try: | |
| result = quantizer.quantize(intent) | |
| except Exception as e: | |
| return f"Quantization error: {e}", "", "" | |
| # Confidence color | |
| conf = result.confidence | |
| if conf >= 0.7: | |
| color = "green" | |
| conf_label = "High" | |
| elif conf >= 0.5: | |
| color = "orange" | |
| conf_label = "Medium" | |
| else: | |
| color = "red" | |
| conf_label = "Low" | |
| # Primary result | |
| primary_md = f""" | |
| ### {result.anchor.mnemonic} | |
| **Confidence:** <span style="color:{color}; font-weight:bold">{conf:.0%}</span> ({conf_label}) | |
| **Canonical meaning:** {result.anchor.canonical} | |
| **Method:** {method} {'(fallback)' if result.is_fallback else ''} | |
| **Coordinates:** `{result.anchor.coords}` (Action, Polarity, Domain, Urgency) | |
| """ | |
| # Alternatives | |
| if result.alternatives: | |
| alt_lines = ["| Anchor | Similarity |", "|--------|------------|"] | |
| for alt_anchor, alt_score in result.alternatives[:3]: | |
| bar_len = int(alt_score * 10) | |
| bar = "█" * bar_len + "░" * (10 - bar_len) | |
| alt_lines.append(f"| {alt_anchor.mnemonic} | {bar} {alt_score:.0%} |") | |
| alternatives_md = "\n".join(alt_lines) | |
| else: | |
| alternatives_md = "*No alternatives*" | |
| # Wire format | |
| wire = f"SLIP v1 user agent {result.anchor.mnemonic}" | |
| if result.is_fallback: | |
| # Truncate long intents for fallback payload | |
| payload = intent[:100].replace('"', "'") | |
| wire = f'SLIP v1 user agent Fallback "{payload}"' | |
| return primary_md, alternatives_md, wire | |
| EXAMPLE_INTENTS = [ | |
| ("Review my code", "RequestReview"), | |
| ("Task complete!", "InformComplete"), | |
| ("System down!", "ObserveError"), | |
| ("Can you help?", "RequestHelp"), | |
| ("Looks good to me", "EvalApprove"), | |
| ] | |
| def start_here(choice: str) -> str: | |
| choice = (choice or "").strip().lower() | |
| if choice == "quick summary": | |
| return f"### {PAPER_TITLE}\n\n**Authors:** {PAPER_AUTHORS}\n\n**Abstract:**\n\n{PAPER_ABSTRACT}" | |
| if choice == "how does it work?": | |
| return "Go to **Chat** and ask: *Give me a method overview with the key steps.*" | |
| if choice == "what are the limitations?": | |
| return "Go to **Chat** and ask: *List limitations and failure modes discussed in the paper.*" | |
| if choice == "generate a tweet thread": | |
| return generate_share("tweet thread") | |
| return "Pick an option." | |
| def _load_gallery_items() -> List[Tuple[str, str]]: | |
| items: List[Tuple[str, str]] = [] | |
| if os.path.isdir("assets/images"): | |
| for fn in sorted(os.listdir("assets/images"))[:48]: | |
| path = os.path.join("assets/images", fn) | |
| if os.path.isfile(path): | |
| items.append((path, fn)) | |
| if not items and os.path.isdir("assets/pages"): | |
| for fn in sorted(os.listdir("assets/pages"))[:24]: | |
| path = os.path.join("assets/pages", fn) | |
| if os.path.isfile(path): | |
| items.append((path, fn)) | |
| return items | |
| CSS = ''' | |
| .paper-hero h1 { margin-bottom: 0.2rem; } | |
| .paper-hero p { margin-top: 0.2rem; opacity: 0.9; } | |
| .hint { opacity: 0.85; } | |
| ''' | |
| with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo: | |
| gr.Markdown(f"# {PAPER_TITLE}", elem_classes=["paper-hero"]) | |
| if PAPER_AUTHORS: | |
| gr.Markdown(f"**Authors:** {PAPER_AUTHORS}", elem_classes=["paper-hero"]) | |
| if PAPER_TAGS: | |
| gr.Markdown(f"**Tags:** {PAPER_TAGS}", elem_classes=["paper-hero"]) | |
| with gr.Tabs(): | |
| with gr.Tab("Start here"): | |
| gr.Markdown("Pick an interaction to explore the paper quickly.", elem_classes=["hint"]) | |
| choice = gr.Radio( | |
| ["Quick summary", "How does it work?", "What are the limitations?", "Generate a tweet thread"], | |
| value="Quick summary", | |
| label="What do you want?", | |
| ) | |
| out = gr.Markdown() | |
| choice.change(start_here, inputs=choice, outputs=out) | |
| demo.load(start_here, inputs=choice, outputs=out) | |
| with gr.Tab("Overview"): | |
| gr.Markdown("## Abstract") | |
| gr.Markdown(PAPER_ABSTRACT) | |
| gr.Markdown("---") | |
| gr.Markdown("### Text search (snippet)") | |
| q = gr.Textbox(label="Find a phrase", placeholder="e.g., scalable oversight", lines=1) | |
| snippet = gr.Textbox(label="Top matching context", lines=10) | |
| def _snippet(query: str) -> str: | |
| query = (query or "").strip() | |
| if not query: | |
| return "" | |
| ctx = retrieve_context(query, k=4, max_chars=1600) | |
| return ctx or "No matches." | |
| q.change(_snippet, inputs=q, outputs=snippet) | |
| with gr.Tab("Gallery"): | |
| gr.Markdown("Extracted images / rendered page previews (if included at build time).", elem_classes=["hint"]) | |
| gallery = gr.Gallery(label="Figures / pages", columns=2, rows=2, height=520) | |
| def _gallery(): | |
| return _load_gallery_items() | |
| demo.load(_gallery, outputs=gallery) | |
| with gr.Tab("Chat"): | |
| gr.Markdown( | |
| "Ask questions. If you set `HF_TOKEN` + `PAPER_LLM_MODEL` as Space secrets, answers become generative; " | |
| "otherwise it returns top-matching snippets.", | |
| elem_classes=["hint"], | |
| ) | |
| gr.ChatInterface(fn=paper_chat, title="Chat with the Paper") | |
| with gr.Tab("Share Kit"): | |
| gr.Markdown("Generate shareable assets. Works without secrets (deterministic fallback).", elem_classes=["hint"]) | |
| kind = gr.Dropdown(["Tweet thread", "Talk outline", "FAQ"], value="Tweet thread", label="Generate") | |
| btn = gr.Button("Create") | |
| share_out = gr.Textbox(lines=14, label="Output") | |
| btn.click(lambda k: generate_share(k), inputs=kind, outputs=share_out) | |
| with gr.Tab("Model Playground"): | |
| gr.Markdown("Chat with a referenced Hub model (if any) or provide your own. Requires `HF_TOKEN` secret.", elem_classes=["hint"]) | |
| model_id = gr.Dropdown( | |
| choices=(DETECTED_MODELS if DETECTED_MODELS else []), | |
| value=(DETECTED_MODELS[0] if DETECTED_MODELS else None), | |
| label="Model id", | |
| allow_custom_value=True, | |
| ) | |
| def _model_chat_fn(message: str, history: List[Tuple[str, str]], mid: str) -> str: | |
| return model_chat(mid, message, history) | |
| gr.ChatInterface(fn=_model_chat_fn, additional_inputs=[model_id], title="Model Playground") | |
| with gr.Tab("Live Quantizer"): | |
| gr.Markdown(""" | |
| ## Think → Quantize → Transmit | |
| Type a messy, natural-language intent and watch it get quantized to a UCR anchor. | |
| This demonstrates the core Slipstream innovation: mapping free-form language onto a shared semantic manifold. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| intent_input = gr.Textbox( | |
| label="Your intent (natural language)", | |
| placeholder="Hey, I'm kinda stuck on this auth bug, can you take a look?", | |
| lines=2, | |
| ) | |
| quantize_btn = gr.Button("Quantize", variant="primary") | |
| gr.Markdown("**Try these examples:**") | |
| with gr.Row(): | |
| for ex_text, ex_anchor in EXAMPLE_INTENTS: | |
| ex_btn = gr.Button(ex_text, size="sm") | |
| ex_btn.click(lambda t=ex_text: t, outputs=intent_input) | |
| with gr.Column(scale=3): | |
| primary_out = gr.Markdown(label="Result") | |
| with gr.Accordion("Nearby Anchors", open=True): | |
| alternatives_out = gr.Markdown() | |
| wire_out = gr.Code(label="SLIP Wire Format", language=None) | |
| quantize_btn.click( | |
| quantize_intent, | |
| inputs=intent_input, | |
| outputs=[primary_out, alternatives_out, wire_out], | |
| ) | |
| intent_input.submit( | |
| quantize_intent, | |
| inputs=intent_input, | |
| outputs=[primary_out, alternatives_out, wire_out], | |
| ) | |
| gr.Markdown("---\nBuilt with Gradio on Hugging Face Spaces.") | |
| if __name__ == "__main__": | |
| demo.launch() | |