Spaces:
Sleeping
Sleeping
| """ | |
| Learning Sequential Monte Carlo (SMC) Through the Plain-English Translator | |
| An interactive educational space that teaches Sequential Monte Carlo methods | |
| using a practical application: helping professionals explain complex concepts | |
| without using industry jargon. | |
| """ | |
| import torch | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import random | |
| import json | |
| import os | |
| import math | |
| # Mock spaces module for local development (only needed on HuggingFace Spaces) | |
| try: | |
| import spaces | |
| except ImportError: | |
| class spaces: | |
| def GPU(func): | |
| return func | |
| # Load benchmark data | |
| BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json") | |
| with open(BENCHMARK_DATA_PATH, "r") as f: | |
| BENCHMARK_DATA = json.load(f) | |
| # Path to infographic | |
| INFOGRAPHIC_PATH = os.path.join(os.path.dirname(__file__), "Sequential_monte_carlo.png") | |
| # ============================================================================ | |
| # MODEL SETUP | |
| # ============================================================================ | |
| AVAILABLE_MODELS = { | |
| "TinyLlama-1.1B (Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| "Qwen2-0.5B (Fastest)": "Qwen/Qwen2-0.5B-Instruct", | |
| "Qwen2.5-7B (Best Quality)": "Qwen/Qwen2.5-7B-Instruct", | |
| "Qwen3-8B (Latest)": "Qwen/Qwen3-8B", | |
| "Gemma-2-2B (Requires HF Login)": "google/gemma-2-2b-it", | |
| } | |
| loaded_models = {} | |
| loaded_tokenizers = {} | |
| def load_model(model_name: str): | |
| model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| if model_id not in loaded_tokenizers: | |
| loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id) | |
| if model_id not in loaded_models: | |
| loaded_models[model_id] = AutoModelForCausalLM.from_pretrained( | |
| model_id, device_map="auto", torch_dtype=torch.float16 | |
| ) | |
| return loaded_tokenizers[model_id], loaded_models[model_id] | |
| # ============================================================================ | |
| # JARGON DICTIONARIES | |
| # ============================================================================ | |
| JARGON_DICTIONARIES = { | |
| "Legal": [ | |
| "liability", "liable", "indemnify", "indemnification", "breach", | |
| "statute", "damages", "negligence", "herein", "aforementioned", | |
| "plaintiff", "defendant", "jurisdiction", "arbitration", "tort", | |
| "fiduciary", "escrow", "lien", "deposition", "stipulation", | |
| "injunction", "subpoena", "affidavit", "adjudicate", "appellant" | |
| ], | |
| "Medical": [ | |
| "prognosis", "diagnosis", "etiology", "pathology", "contraindicated", | |
| "idiopathic", "nosocomial", "comorbidity", "prophylactic", "benign", | |
| "malignant", "metastasis", "hemorrhage", "ischemia", "infarction", | |
| "edema", "necrosis", "lesion", "syndrome", "acute", "chronic", | |
| "bilateral", "unilateral", "subcutaneous", "intravenous" | |
| ], | |
| "Financial": [ | |
| "amortization", "liquidity", "collateral", "derivative", "equity", | |
| "fiduciary", "hedge", "leverage", "portfolio", "securities", | |
| "dividend", "depreciation", "liability", "asset", "accrual", | |
| "arbitrage", "capitalization", "yield", "maturity", "principal", | |
| "compound", "annuity", "underwriting", "insolvency", "solvency" | |
| ], | |
| "Technical/Engineering": [ | |
| "algorithm", "bandwidth", "latency", "throughput", "scalability", | |
| "deprecated", "refactor", "polymorphism", "encapsulation", "abstraction", | |
| "iteration", "recursion", "synchronous", "asynchronous", "protocol", | |
| "middleware", "backend", "frontend", "deployment", "infrastructure", | |
| "microservices", "containerization", "orchestration", "API", "SDK" | |
| ] | |
| } | |
| # ============================================================================ | |
| # SMC CORE FUNCTIONS | |
| # ============================================================================ | |
| def is_safe(text: str, banned_words: list) -> bool: | |
| text_lower = text.lower() | |
| for word in banned_words: | |
| word_lower = word.lower() | |
| if (f" {word_lower} " in f" {text_lower} " or | |
| f" {word_lower}." in f" {text_lower}" or | |
| f" {word_lower}," in f" {text_lower}" or | |
| f" {word_lower}?" in f" {text_lower}" or | |
| f" {word_lower}!" in f" {text_lower}" or | |
| text_lower.startswith(f"{word_lower} ") or | |
| text_lower.endswith(f" {word_lower}")): | |
| return False | |
| return True | |
| def find_jargon_used(text: str, banned_words: list) -> list: | |
| text_lower = text.lower() | |
| found = [] | |
| for word in banned_words: | |
| word_lower = word.lower() | |
| if (f" {word_lower} " in f" {text_lower} " or | |
| f" {word_lower}." in f" {text_lower}" or | |
| f" {word_lower}," in f" {text_lower}" or | |
| f" {word_lower}?" in f" {text_lower}" or | |
| f" {word_lower}!" in f" {text_lower}" or | |
| text_lower.startswith(f"{word_lower} ") or | |
| text_lower.endswith(f" {word_lower}")): | |
| found.append(word) | |
| return found | |
| def count_jargon(text: str, banned_words: list) -> int: | |
| return len(find_jargon_used(text, banned_words)) | |
| def compute_weight(text: str, banned_words: list, penalty_factor: float = 0.3) -> float: | |
| jargon_count = count_jargon(text, banned_words) | |
| return math.pow(penalty_factor, jargon_count) | |
| def weighted_resample(particles: list, weights: list, num_samples: int) -> list: | |
| if not particles or not weights: | |
| return [] | |
| total_weight = sum(weights) | |
| if total_weight == 0: | |
| probs = [1.0 / len(particles)] * len(particles) | |
| else: | |
| probs = [w / total_weight for w in weights] | |
| resampled = random.choices(particles, weights=probs, k=num_samples) | |
| unique = list(dict.fromkeys(resampled)) | |
| return unique[:num_samples] | |
| def smc_translate( | |
| concept: str, | |
| profession: str, | |
| custom_banned_words: str = "", | |
| model_name: str = "TinyLlama-1.1B (Fast)", | |
| num_particles: int = 5, | |
| max_steps: int = 20, | |
| tokens_per_step: int = 15, | |
| constraint_mode: str = "Soft (Penalize)", | |
| progress=gr.Progress() | |
| ) -> tuple: | |
| tokenizer, model_inst = load_model(model_name) | |
| use_soft_constraints = "Soft" in constraint_mode | |
| banned_words = JARGON_DICTIONARIES.get(profession, []).copy() | |
| if custom_banned_words.strip(): | |
| custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()] | |
| banned_words.extend(custom_list) | |
| prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field. | |
| Rules: | |
| - Explain as if talking to a curious 10-year-old | |
| - Use a concrete, relatable real-world example to illustrate the concept | |
| - Avoid technical jargon - use everyday words instead | |
| - Keep it concise: 2-3 sentences max | |
| Concept to explain: {concept} | |
| Simple explanation with example:""" | |
| particles = [prompt] | |
| trace_log = [] | |
| trace_log.append(f"{'='*60}") | |
| trace_log.append(f"SMC PLAIN-ENGLISH TRANSLATOR - TRACE LOG") | |
| trace_log.append(f"{'='*60}") | |
| trace_log.append(f"Model: {model_name}") | |
| trace_log.append(f"Constraint Mode: {constraint_mode}") | |
| trace_log.append(f"Concept: {concept}") | |
| trace_log.append(f"Domain: {profession}") | |
| trace_log.append(f"Banned words: {len(banned_words)} terms") | |
| trace_log.append(f"Particles: {num_particles} | Steps: {max_steps} | Tokens/step: {tokens_per_step}") | |
| trace_log.append(f"{'='*60}") | |
| trace_log.append("") | |
| for step in progress.tqdm(range(max_steps), desc="SMC Iteration"): | |
| candidates = [] | |
| # STEP 1: EXPLORE - Generate multiple continuations | |
| for particle in particles: | |
| inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device) | |
| with torch.no_grad(): | |
| outputs = model_inst.generate( | |
| **inputs, | |
| max_new_tokens=tokens_per_step, | |
| num_return_sequences=3, | |
| do_sample=True, | |
| temperature=0.9 if use_soft_constraints else 0.8, | |
| top_p=0.95 if use_soft_constraints else 0.9, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| for out in outputs: | |
| decoded = tokenizer.decode(out, skip_special_tokens=True) | |
| candidates.append(decoded) | |
| if not candidates: | |
| trace_log.append(f"Step {step+1}: No candidates generated - stopping") | |
| break | |
| # STEP 2: FILTER/WEIGHT - Apply constraints | |
| if use_soft_constraints: | |
| weights = [compute_weight(c, banned_words, penalty_factor=0.3) for c in candidates] | |
| jargon_counts = [count_jargon(c, banned_words) for c in candidates] | |
| clean_count = sum(1 for c in jargon_counts if c == 0) | |
| trace_log.append(f"Step {step+1}: {len(candidates)} particles explored") | |
| trace_log.append(f" {clean_count} jargon-free | Weights: [{min(weights):.2f} - {max(weights):.2f}]") | |
| # STEP 3: RESAMPLE - Weighted selection | |
| particles = weighted_resample(candidates, weights, num_particles) | |
| if not particles: | |
| trace_log.append(f" Resampling failed - stopping") | |
| break | |
| trace_log.append(f" Resampled to {len(particles)} particles") | |
| else: | |
| valid_candidates = [] | |
| pruned_count = 0 | |
| for candidate in candidates: | |
| if is_safe(candidate, banned_words): | |
| valid_candidates.append(candidate) | |
| else: | |
| pruned_count += 1 | |
| trace_log.append(f"Step {step+1}: {len(candidates)} particles explored") | |
| trace_log.append(f" {len(valid_candidates)} survived | {pruned_count} pruned (contained jargon)") | |
| if valid_candidates: | |
| unique_candidates = list(set(valid_candidates)) | |
| random.shuffle(unique_candidates) | |
| particles = unique_candidates[:num_particles] | |
| else: | |
| trace_log.append(f" ALL PARTICLES DIED - jargon unavoidable!") | |
| break | |
| # Check for completion | |
| current_text = particles[0].split("Simple explanation with example:")[-1].strip() | |
| if current_text.endswith(('.', '!', '?')) and len(current_text) > 80: | |
| trace_log.append(f"\nNatural completion reached at step {step+1}") | |
| break | |
| trace_log.append("") | |
| trace_log.append(f"{'='*60}") | |
| # Get best result | |
| if particles: | |
| if use_soft_constraints: | |
| best_idx = 0 | |
| best_jargon_count = float('inf') | |
| for i, p in enumerate(particles): | |
| jc = count_jargon(p, banned_words) | |
| if jc < best_jargon_count: | |
| best_jargon_count = jc | |
| best_idx = i | |
| final_text = particles[best_idx].split("Simple explanation with example:")[-1].strip() | |
| else: | |
| final_text = particles[0].split("Simple explanation with example:")[-1].strip() | |
| else: | |
| final_text = "(All generation paths used jargon - try soft constraints!)" | |
| final_jargon = find_jargon_used(final_text, banned_words) | |
| if final_jargon: | |
| trace_log.append(f"RESULT: Contains jargon: {final_jargon}") | |
| else: | |
| trace_log.append(f"RESULT: Jargon-free output achieved!") | |
| trace_log.append(f"{'='*60}") | |
| return final_text, "\n".join(trace_log), ", ".join(banned_words) | |
| # ============================================================================ | |
| # EXAMPLES | |
| # ============================================================================ | |
| EXAMPLES = [ | |
| ["Force Majeure clause and why it might void our contract", "Legal", ""], | |
| ["Why we need to add an indemnification clause to protect your business", "Legal", ""], | |
| ["Your MRI shows a benign lesion that we should monitor", "Medical", ""], | |
| ["The etiology of your chronic fatigue syndrome", "Medical", ""], | |
| ["How compound interest and amortization affect your mortgage payments", "Financial", ""], | |
| ["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""], | |
| ["Why our API has high latency and how microservices could help", "Technical/Engineering", ""], | |
| ["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""], | |
| ] | |
| # ============================================================================ | |
| # GRADIO INTERFACE | |
| # ============================================================================ | |
| with gr.Blocks(title="Learn SMC: The Plain-English Translator") as demo: | |
| # ==================== HEADER ==================== | |
| gr.Markdown(""" | |
| # Learning Sequential Monte Carlo (SMC) | |
| ## An Interactive Guide Using the Plain-English Translator | |
| Welcome! This space teaches you about **Sequential Monte Carlo** methods through a practical application: | |
| helping professionals explain complex concepts without using jargon. | |
| Navigate through the tabs to learn the theory, try the interactive demo, and see our experimental results. | |
| """) | |
| with gr.Tabs(): | |
| # ==================== TAB 1: LEARN SMC ==================== | |
| with gr.TabItem("1. Learn SMC"): | |
| gr.Markdown(""" | |
| # Understanding Sequential Monte Carlo | |
| Sequential Monte Carlo (SMC) is a powerful technique for solving problems where you need to | |
| navigate through a space of possibilities while satisfying constraints. Let's understand it | |
| through both theory and our practical application. | |
| """) | |
| # Infographic | |
| gr.Markdown("## The Big Picture") | |
| gr.Image(INFOGRAPHIC_PATH, label="How AI Learns to See the Future: An Introduction to SMC", show_label=True) | |
| gr.Markdown("---") | |
| # Section 1: The Problem | |
| gr.Markdown(""" | |
| ## 1. The Problem: Standard AI's "Greedy" Trap | |
| ### What's Wrong with Normal Text Generation? | |
| Most AI language models work **greedily** - they pick the best next word based on immediate probability, | |
| without considering long-term consequences. This creates a fundamental problem: | |
| **The Greedy Trap:** | |
| - The model chooses what seems best *right now* | |
| - It can't "see" that this choice leads to a dead end | |
| - Once committed, it can't backtrack | |
| ### Our Example: The Curse of Knowledge | |
| When a lawyer tries to explain "Force Majeure" to a client, a standard AI naturally reaches for | |
| legal terminology because those words are statistically most likely in that context: | |
| ``` | |
| Standard AI: "Force Majeure is a contractual provision that excuses liability | |
| when extraordinary circumstances prevent fulfillment..." | |
| ``` | |
| The AI picked "liability," "contractual," and "provision" because they're the most probable | |
| next words - but now it's stuck using jargon the client won't understand! | |
| **This is like choosing the path in a maze that looks shortest, only to hit a dead end.** | |
| """) | |
| gr.Markdown("---") | |
| # Section 2: The Breakthrough | |
| gr.Markdown(""" | |
| ## 2. The Breakthrough: Introducing SMC | |
| ### The Key Insight: Explore Multiple Futures Simultaneously | |
| Instead of committing to one path, SMC maintains **thousands of "particles"** - each representing | |
| a different possible future. Think of it as sending out scouts in every direction. | |
| ### How It Works in Our Translator: | |
| ``` | |
| Standard AI: One path → "Force Majeure is a contractual..." → STUCK WITH JARGON | |
| SMC Approach: Path A → "Imagine you promised your friend..." ✓ Keep exploring | |
| Path B → "This is a liability clause..." ✗ Contains jargon | |
| Path C → "Think of it like a 'nobody's fault'..." ✓ Keep exploring | |
| Path D → "The contractual provision states..." ✗ Contains jargon | |
| Path E → "It's like when a big storm..." ✓ Keep exploring | |
| ``` | |
| **We explore multiple possibilities in parallel, keeping the promising ones and discarding the rest.** | |
| """) | |
| gr.Markdown("---") | |
| # Section 3: The Process | |
| gr.Markdown(""" | |
| ## 3. The Process: How SMC Finds the Optimal Path | |
| SMC follows a three-step cycle that repeats until we reach our goal: | |
| ### Step 1: EXPLORE (Expand) | |
| Each surviving particle generates multiple possible continuations. | |
| If we have 5 particles and each generates 3 continuations, we now have 15 candidates. | |
| ### Step 2: FILTER (Evaluate) | |
| We evaluate each candidate against our constraint (no jargon). | |
| This is "survival of the fittest" - unpromising paths fade out. | |
| **Two Filtering Strategies:** | |
| | Strategy | How It Works | Pros | Cons | | |
| |----------|--------------|------|------| | |
| | **Hard Constraints** | Completely eliminate any particle with jargon | Guarantees jargon-free output | Can kill ALL particles if jargon is unavoidable | | |
| | **Soft Constraints** | Reduce weight of particles with jargon (but let them survive) | More robust, allows gradual steering | May have occasional jargon slip through | | |
| ### Step 3: RESAMPLE (Select) | |
| We select particles for the next round based on their fitness: | |
| - **Hard mode:** Random selection from survivors | |
| - **Soft mode:** Weighted random selection (better particles more likely to be chosen) | |
| ### The Math Behind Soft Constraints: | |
| ``` | |
| Weight = 0.3 ^ (number of jargon words) | |
| 0 jargon words → Weight = 1.0 (100% chance) | |
| 1 jargon word → Weight = 0.3 (30% chance) | |
| 2 jargon words → Weight = 0.09 (9% chance) | |
| 3 jargon words → Weight = 0.027 (2.7% chance) | |
| ``` | |
| """) | |
| gr.Markdown("---") | |
| # Section 4: The Impact | |
| gr.Markdown(""" | |
| ## 4. The Impact: From Prediction to Strategy | |
| SMC transforms AI from a **reactive predictor** to a **strategic planner**. | |
| ### What This Means for Our Translator: | |
| | Approach | Can Plan Ahead? | Handles Constraints? | Success Rate | | |
| |----------|-----------------|---------------------|--------------| | |
| | Standard Greedy | No - commits immediately | No - uses probable words | N/A (always uses jargon) | | |
| | SMC Hard | Yes - explores multiple paths | Yes - prunes violations | 25% (particles often die) | | |
| | SMC Soft | Yes - explores multiple paths | Yes - penalizes violations | **100%** | | |
| ### Beyond Translation: Where Else Is SMC Used? | |
| - **Robotics:** Planning movements while avoiding obstacles | |
| - **Autonomous Vehicles:** Predicting traffic and planning routes | |
| - **Finance:** Portfolio optimization with risk constraints | |
| - **Drug Discovery:** Exploring molecular structures with safety constraints | |
| ### The Fundamental Shift: | |
| > *"If your AI could plan 10 steps ahead instead of 1, what impossible problem would you have it solve first?"* | |
| SMC represents moving from **simple prediction** to **true strategic foresight**. | |
| """) | |
| gr.Markdown("---") | |
| # Connection to Next Tab | |
| gr.Markdown(""" | |
| ## Ready to Try It Yourself? | |
| Now that you understand how SMC works, head to the **"2. Try It: Translator"** tab | |
| to see it in action! You can: | |
| - Watch particles explore and get filtered in real-time | |
| - Compare hard vs soft constraints | |
| - Try different professional domains (Legal, Medical, Financial, Technical) | |
| """) | |
| # ==================== TAB 2: TRY IT ==================== | |
| with gr.TabItem("2. Try It: Translator"): | |
| gr.Markdown(""" | |
| # The Plain-English Translator | |
| ## The Problem We're Solving | |
| **The Curse of Knowledge:** Experts often struggle to explain concepts without jargon. | |
| A standard AI naturally uses technical terms because they're statistically probable. | |
| **Our Solution:** Use SMC to explore multiple explanations simultaneously, | |
| filtering out any path that uses forbidden terminology. This forces the model | |
| to find creative, plain-language alternatives. | |
| --- | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| concept_input = gr.Textbox( | |
| label="Concept to Explain", | |
| placeholder="e.g., 'Force Majeure clause and why it might void our contract'", | |
| lines=2 | |
| ) | |
| profession_dropdown = gr.Dropdown( | |
| choices=["Legal", "Medical", "Financial", "Technical/Engineering"], | |
| value="Legal", | |
| label="Professional Domain", | |
| info="Each domain has its own set of banned jargon terms" | |
| ) | |
| custom_words = gr.Textbox( | |
| label="Additional Banned Words (optional)", | |
| placeholder="e.g., contract, clause, party", | |
| lines=1 | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| choices=list(AVAILABLE_MODELS.keys()), | |
| value="TinyLlama-1.1B (Fast)", | |
| label="Model" | |
| ) | |
| constraint_mode = gr.Radio( | |
| choices=["Hard (Prune)", "Soft (Penalize)"], | |
| value="Soft (Penalize)", | |
| label="Constraint Mode", | |
| info="Soft constraints are more robust - see the Learn tab for explanation" | |
| ) | |
| with gr.Row(): | |
| num_particles = gr.Slider( | |
| minimum=2, maximum=10, value=5, step=1, | |
| label="Particles", | |
| info="More = more exploration" | |
| ) | |
| max_steps = gr.Slider( | |
| minimum=5, maximum=30, value=15, step=5, | |
| label="Max Steps", | |
| info="SMC iterations" | |
| ) | |
| tokens_per_step = gr.Slider( | |
| minimum=5, maximum=30, value=15, step=5, | |
| label="Tokens/Step", | |
| info="Generation length per iteration" | |
| ) | |
| translate_btn = gr.Button("Translate to Plain English", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### SMC in Action | |
| When you click translate, watch the trace log to see: | |
| 1. **Particles explored** - Multiple paths generated | |
| 2. **Filtering** - Jargon paths penalized/pruned | |
| 3. **Resampling** - Best particles selected | |
| 4. **Convergence** - Final jargon-free output | |
| **Tip:** Try the same concept with Hard vs Soft constraints | |
| to see the difference! | |
| """) | |
| gr.Markdown("---") | |
| gr.Markdown("### Output") | |
| smc_output = gr.Textbox(label="Plain-English Explanation", lines=5, show_label=True) | |
| with gr.Accordion("SMC Trace Log (See the algorithm in action)", open=True): | |
| trace_output = gr.Textbox(label="", lines=20, show_label=False) | |
| with gr.Accordion("Banned Words for This Domain", open=False): | |
| banned_words_display = gr.Textbox(label="", lines=3, show_label=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### Example Scenarios") | |
| gr.Examples(examples=EXAMPLES, inputs=[concept_input, profession_dropdown, custom_words], label="") | |
| # ==================== TAB 3: EXPERIMENTS ==================== | |
| with gr.TabItem("3. Our Experiments"): | |
| gr.Markdown(""" | |
| # What We Learned: An Experimental Journey | |
| This tab documents our experimental journey in applying SMC to constrained text generation. | |
| We tested multiple approaches and models to understand what works and what doesn't. | |
| --- | |
| """) | |
| gr.Markdown(""" | |
| ## The Experimental Setup | |
| ### Goal | |
| Generate plain-English explanations of professional concepts (Legal, Medical, Financial, Technical) | |
| that a 10-year-old could understand - **without using any domain-specific jargon**. | |
| ### Benchmark | |
| We created 12 test cases (3 per domain) with gold-standard translations from Claude Opus 4.5. | |
| Each output was scored on: | |
| | Criterion | Points | Description | | |
| |-----------|--------|-------------| | |
| | Jargon-Free | 25 | No banned terminology used | | |
| | Has Example | 25 | Uses relatable analogy | | |
| | Appropriate Length | 25 | 20-100 words | | |
| | Coherence | 25 | Proper sentence structure | | |
| --- | |
| """) | |
| # Experiment 1: Hard Constraints | |
| gr.Markdown(""" | |
| ## Experiment 1: Hard Constraints (Prune All Jargon) | |
| ### Hypothesis | |
| If we completely eliminate any generation path containing jargon, the model will be forced | |
| to find jargon-free alternatives. | |
| ### Setup | |
| - Models: TinyLlama-1.1B, Qwen2-0.5B, Gemma-2-2B | |
| - Parameters: 5 particles, 25 max steps, 6 tokens per step | |
| - Constraint: **Hard** - any particle with jargon is immediately pruned | |
| ### Results | |
| """) | |
| # Build data from benchmark | |
| gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"] | |
| tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"] | |
| qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"] | |
| gr.Markdown(f""" | |
| | Model | Score | Success Rate | Outcome | | |
| |-------|-------|--------------|---------| | |
| | Gemma-2-2B | {gemma_data['total_score']}/{gemma_data['max_possible']} ({gemma_data['percentage']}%) | {gemma_data.get('successful_outputs', 3)}/12 | 9 empty outputs | | |
| | TinyLlama-1.1B | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} ({tinyllama_data['percentage']}%) | {tinyllama_data.get('successful_outputs', 3)}/12 | 9 empty outputs | | |
| | Qwen2-0.5B | {qwen_data['total_score']}/{qwen_data['max_possible']} ({qwen_data['percentage']}%) | {qwen_data.get('successful_outputs', 2)}/12 | 10 empty outputs | | |
| ### What Happened? | |
| **75% of test cases produced empty outputs!** | |
| The problem: When explaining medical concepts, the model naturally reaches for words like | |
| "benign," "lesion," and "diagnosis." With hard constraints, EVERY generation path | |
| contained at least one banned word, causing **total particle death**. | |
| ### Key Learning | |
| Hard constraints are too aggressive. Domain-specific vocabulary is so deeply embedded | |
| in model weights that it's nearly impossible to avoid entirely through pruning alone. | |
| --- | |
| """) | |
| # Experiment 2: Soft Constraints | |
| gr.Markdown(""" | |
| ## Experiment 2: Soft Constraints (Weighted Resampling) | |
| ### Hypothesis | |
| Instead of killing particles with jargon, we should **penalize** them with lower weights. | |
| This allows gradual steering toward jargon-free outputs while preventing particle death. | |
| ### The Key Insight | |
| ``` | |
| Weight = penalty_factor ^ (jargon_count) | |
| With penalty_factor = 0.3: | |
| - 0 jargon words → weight = 1.0 | |
| - 1 jargon word → weight = 0.3 | |
| - 2 jargon words → weight = 0.09 | |
| ``` | |
| Particles with jargon can **survive** but are less likely to be selected for the next generation. | |
| Over time, the population naturally shifts toward jargon-free outputs. | |
| ### Setup | |
| - Model: Qwen2.5-7B (via Ollama) | |
| - Parameters: 5 particles, 15 max steps, 25 tokens per step | |
| - Constraint: **Soft** - penalty factor 0.3 | |
| ### Results | |
| """) | |
| qwen25_soft_data = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}) | |
| gr.Markdown(f""" | |
| | Model | Score | Success Rate | Jargon Violations | | |
| |-------|-------|--------------|-------------------| | |
| | Qwen2.5-7B (Soft) | {qwen25_soft_data.get('total_score', 920)}/{qwen25_soft_data.get('max_possible', 1200)} ({qwen25_soft_data.get('percentage', 76.7)}%) | **{qwen25_soft_data.get('successful_outputs', 12)}/12** | 1/12 | | |
| ### The Transformation | |
| | Metric | Hard Constraints | Soft Constraints | | |
| |--------|------------------|------------------| | |
| | Success Rate | 25% (3/12) | **100% (12/12)** | | |
| | Average Score | ~44% | **76.7%** | | |
| | Empty Outputs | 9/12 | **0/12** | | |
| ### What Changed? | |
| - Particles with jargon no longer die instantly | |
| - The population gradually evolves toward jargon-free outputs | |
| - Even if early generations contain jargon, later generations learn to avoid it | |
| - The one jargon violation ("synchronous") was unavoidable given the topic | |
| --- | |
| """) | |
| # Comparison Browser | |
| gr.Markdown("## Compare Results Across Models") | |
| gr.Markdown("Select an example to see how different approaches performed:") | |
| all_examples = [] | |
| for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]: | |
| for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys(): | |
| all_examples.append(f"{domain}: {concept[:55]}...") | |
| example_dropdown = gr.Dropdown(choices=all_examples, value=all_examples[0], label="Select Example") | |
| first_domain = "Legal" | |
| first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0] | |
| initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"] | |
| initial_qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(first_domain, {}).get(first_concept, {}).get("output", "") or "(Not available)" | |
| initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)" | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**Claude Opus 4.5 (Gold Standard)**") | |
| claude_output = gr.Textbox(value=initial_claude, lines=4, interactive=False, show_label=False) | |
| with gr.Column(): | |
| gr.Markdown("**Qwen2.5-7B (Soft Constraints)**") | |
| qwen25_output = gr.Textbox(value=initial_qwen25, lines=4, interactive=False, show_label=False) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**Gemma-2-2B (Hard Constraints)**") | |
| gemma_output = gr.Textbox(value=initial_gemma, lines=4, interactive=False, show_label=False) | |
| with gr.Column(): | |
| gr.Markdown("**TinyLlama-1.1B (Hard Constraints)**") | |
| initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)" | |
| tinyllama_output = gr.Textbox(value=initial_tiny, lines=4, interactive=False, show_label=False) | |
| def update_example_outputs(selection): | |
| domain = selection.split(":")[0] | |
| concept_preview = selection.split(": ")[1].replace("...", "") | |
| for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys(): | |
| if concept.startswith(concept_preview.strip()): | |
| claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"] | |
| qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(domain, {}).get(concept, {}).get("output", "") or "(Not available)" | |
| gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)" | |
| tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)" | |
| return claude, qwen25, gemma, tiny | |
| return "Not found", "Not found", "Not found", "Not found" | |
| example_dropdown.change( | |
| fn=update_example_outputs, | |
| inputs=[example_dropdown], | |
| outputs=[claude_output, qwen25_output, gemma_output, tinyllama_output] | |
| ) | |
| gr.Markdown("---") | |
| # Key Takeaways | |
| gr.Markdown(""" | |
| ## Key Takeaways | |
| ### What We Learned About SMC for Constrained Generation | |
| 1. **Soft constraints dramatically outperform hard constraints** | |
| - Hard pruning causes particle death when constraints conflict with model priors | |
| - Weighted resampling allows graceful degradation and recovery | |
| 2. **Penalty factor matters** | |
| - 0.3 (70% reduction per jargon word) provided good balance | |
| - Too aggressive (0.1) → still causes particle death | |
| - Too lenient (0.5) → jargon persists too long | |
| 3. **Model size affects vocabulary diversity** | |
| - Larger models (7B+) have more alternative phrasings available | |
| - Smaller models get stuck more easily because they have fewer "escape routes" | |
| 4. **SMC enables strategic generation** | |
| - Standard greedy generation commits immediately and can't backtrack | |
| - SMC explores multiple futures and converges on the best path | |
| ### Broader Implications | |
| This technique applies beyond jargon filtering: | |
| - **Content moderation:** Generate text avoiding harmful content | |
| - **Style transfer:** Guide generation toward specific writing styles | |
| - **Factual grounding:** Penalize generations that contradict known facts | |
| - **Length control:** Soft constraints on verbosity | |
| --- | |
| *Experiments conducted December 2025. Models tested via HuggingFace Transformers and Ollama.* | |
| """) | |
| # Event handlers | |
| translate_btn.click( | |
| fn=smc_translate, | |
| inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps, tokens_per_step, constraint_mode], | |
| outputs=[smc_output, trace_output, banned_words_display] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(theme=gr.themes.Soft()) | |