Spaces:

davanstrien
/

constrained

Sleeping

App Files Files Community

davanstrien HF Staff commited on 7 days ago

Commit

aaeefcc

verified ·

1 Parent(s): 7685871

Update app.py

Browse files

Files changed (1) hide show

app.py +189 -135

app.py CHANGED Viewed

@@ -1,140 +1,194 @@
-"""
-app.py – Gradio demo for structured (constrained) generation with Outlines
------------------------------------------------------------------------
-Deploy this file (plus a requirements.txt) to a **Gradio** Space on
-Hugging Face.  The UI is intentionally minimal so you can embed the Space
-in an `<iframe>` on a slide.
-**requirements.txt** (put this in the same repo):
-```
-gradio>=4.28.0
-transformers>=4.40.0
-outlines>=0.0.36
-torch
-```
-After pushing both files, Spaces will build the image automatically.  The
-Space URL (e.g. `https://username-spacename.hf.space`) can be embedded
-with:
-```html
-<iframe src="https://username-spacename.hf.space" width="640" height="480"></iframe>
-```
-"""
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import outlines  # structured‑generation library
-MODEL_NAME = "distilgpt2"  # small & free to download (≈ 300 MB)
-# Load model / tokenizer once at start‑up
-print("Loading model – first launch may take ~20 s on CPU…")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.float32,
-).eval()
-# ---------------------------------------------------------------------------
-# 1️⃣  Helper: baseline generation (no constraints)
-# ---------------------------------------------------------------------------
-def generate_baseline(prompt: str, max_tokens: int = 64, temperature: float = 0.7):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.to(device)
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    output_ids = model.generate(
-        **inputs,
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_k=50,
-    )
-    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
-# ---------------------------------------------------------------------------
-# 2️⃣  Helper: constrained generation with Outlines
-# ---------------------------------------------------------------------------
-# For demo purposes we request that *at some point* the token string “OpenAI”
-# appears in the output.  Any regex that the `re` module accepts will work.
-# You can expose it as an additional textbox if you want users to edit it.
-PATTERN = r".*OpenAI.*"
-# Build a generator bound to the regex once; it rewires the model’s logits
-# so forbidden tokens get probability −∞ (effectively zero).
-generator = outlines.generate.regex(model, PATTERN)
-def generate_constrained(prompt: str, max_tokens: int = 64, temperature: float = 0.7):
-    return generator(prompt, max_tokens=max_tokens, temperature=temperature)
-# ---------------------------------------------------------------------------
-# 3️⃣  Helper: show top‑10 next‑token probabilities *before* and *after*
-#     applying the regex constraint, to make the effect visible.
-# ---------------------------------------------------------------------------
-def _topk_probs(logits: torch.Tensor, k: int = 10):
-    """Return {token: prob} for the k most likely tokens."""
-    probs = torch.softmax(logits, dim=-1)
-    topk = torch.topk(probs, k)
-    tokens = [tokenizer.decode(idx) for idx in topk.indices[0]]
-    return {t.replace("\n", "\\n"): float(p) for t, p in zip(tokens, topk.values[0])}
-def compare(prompt: str):
-    # Baseline text
-    baseline_text = generate_baseline(prompt)
-    # Constrained text
-    constrained_text = generate_constrained(prompt)
-    # Get logits for next token after the *prompt* (not after full generation)
     with torch.no_grad():
-        inputs = tokenizer(prompt, return_tensors="pt")
-        base_logits = model(**inputs).logits[:, -1, :]
-    # Apply Outlines’ regex sampler to obtain constrained logits
-    regex_sampler = outlines.samplers.RegexSampler(PATTERN)
-    constrained_logits = regex_sampler(base_logits.clone(), inputs.input_ids)
-    baseline_topk = _topk_probs(base_logits)
-    constrained_topk = _topk_probs(constrained_logits)
-    return baseline_text, constrained_text, baseline_topk, constrained_topk
-# ---------------------------------------------------------------------------
-# 4️⃣  Gradio UI – minimal so it fits nicely inside slides
-# ---------------------------------------------------------------------------
-def build_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("## Structured Generation Demo (Outlines)")
-        prompt = gr.Textbox(lines=3, label="Prompt", placeholder="e.g. A short story about innovative AI")
-        generate_btn = gr.Button("Generate")
-        with gr.Row():
-            baseline_out = gr.Textbox(label="Baseline output (unconstrained)")
-            constrained_out = gr.Textbox(label="Constrained output (must contain 'OpenAI')")
-        with gr.Row():
-            baseline_probs = gr.JSON(label="Top‑10 next‑token probs (baseline)")
-            constrained_probs = gr.JSON(label="Top‑10 next‑token probs (constrained)")
-        generate_btn.click(compare, inputs=prompt, outputs=[
-            baseline_out,
-            constrained_out,
-            baseline_probs,
-            constrained_probs,
-        ])
-    return demo
-demo = build_interface()
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessor
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
+# Load a small model
+model_name = "distilgpt2"  # Small model suitable for a demo
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+class OutlineLogitsProcessor(LogitsProcessor):
+    """
+    A logits processor that enforces an outline structure.
+    """
+    def __init__(self, outline_tokens, tokenizer, boost_factor=10.0):
+        self.outline_tokens = outline_tokens
+        self.tokenizer = tokenizer
+        self.boost_factor = boost_factor
+        self.current_outline_idx = 0
+    def __call__(self, input_ids, scores):
+        if self.current_outline_idx < len(self.outline_tokens):
+            # Get the next token from the outline
+            target_token_id = self.outline_tokens[self.current_outline_idx]
+            # Boost probability of the target token
+            scores[target_token_id] += self.boost_factor
+            self.current_outline_idx += 1
+        return scores
+def generate_text(prompt, use_outline=False, outline_text=""):
+    """Generate text with or without an outline constraint."""
+    # Tokenize the prompt
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    logits_processor = None
+    if use_outline and outline_text.strip():
+        # Tokenize the outline
+        outline_tokens = tokenizer.encode(outline_text)[1:]  # Skip the BOS token
+        logits_processor = [OutlineLogitsProcessor(outline_tokens, tokenizer)]
+    # Store token probabilities for visualization
+    all_probs = []
+    # Function to capture token probabilities
+    def capture_probs(logits):
+        probs = torch.softmax(logits[0, -1, :], dim=-1)
+        all_probs.append(probs.detach().cpu().numpy())
+        return logits
+    # Generation parameters
+    gen_kwargs = {
+        "max_length": len(input_ids[0]) + 30,
+        "temperature": 0.7,
+        "do_sample": True,
+        "logits_processor": logits_processor,
+        "output_logits": True,  # This is needed to capture logits
+    }
+    # Custom generation with probability capture
     with torch.no_grad():
+        for _ in range(30):  # Generate 30 tokens
+            outputs = model(input_ids)
+            logits = capture_probs(outputs.logits)
+            if logits_processor:
+                for processor in logits_processor:
+                    logits = processor(input_ids, logits[0, -1, :])
+            next_token_probs = torch.softmax(logits, dim=-1)
+            next_token = torch.multinomial(next_token_probs, 1)
+            input_ids = torch.cat([input_ids, next_token], dim=-1)
+            # Stop if EOS token is generated
+            if next_token.item() == tokenizer.eos_token_id:
+                break
+    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=True)
+    # Get top tokens and their probabilities for visualization
+    top_tokens = []
+    for probs in all_probs:
+        top_indices = np.argsort(probs)[-5:][::-1]  # Top 5 tokens
+        top_tokens.append([(tokenizer.decode([idx]), float(probs[idx])) for idx in top_indices])
+    return generated_text, top_tokens
+def create_probability_plot(top_tokens):
+    """Create a visualization of token probabilities."""
+    if not top_tokens:
+        return None
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # Number of tokens and top-k
+    n_tokens = len(top_tokens)
+    top_k = len(top_tokens[0])
+    # Create a custom colormap that goes from light blue to dark blue
+    colors = [(0.8, 0.9, 1.0), (0.0, 0.4, 0.8)]
+    cmap = LinearSegmentedColormap.from_list("blue_gradient", colors)
+    # Create the heatmap-style visualization
+    data = np.zeros((top_k, n_tokens))
+    token_labels = []
+    for i, token_probs in enumerate(top_tokens):
+        # Extract tokens and probabilities
+        tokens = [t[0] for t in token_probs]
+        probs = [t[1] for t in token_probs]
+        # Store probabilities for visualization
+        for j, prob in enumerate(probs):
+            data[j, i] = prob
+        # Store token labels for the first position
+        if i == 0:
+            token_labels = tokens
+    # Plot the heatmap
+    im = ax.imshow(data, aspect='auto', cmap=cmap)
+    # Add colorbar
+    cbar = fig.colorbar(im, ax=ax, label='Probability')
+    # Customize the plot
+    ax.set_yticks(range(top_k))
+    ax.set_yticklabels(token_labels)
+    ax.set_xlabel('Token Position in Generated Sequence')
+    ax.set_ylabel('Top Tokens')
+    ax.set_title('Token Probabilities During Generation')
+    # Adjust layout and save
+    plt.tight_layout()
+    return fig
+def interface_fn(prompt, use_outline, outline_text):
+    """Main function for the Gradio interface."""
+    generated_text, top_tokens = generate_text(prompt, use_outline, outline_text)
+    # Create visualization of token probabilities
+    prob_plot = create_probability_plot(top_tokens)
+    # Format token probabilities as text for display
+    prob_text = ""
+    for i, tokens in enumerate(top_tokens):
+        prob_text += f"Position {i+1}:\n"
+        for token, prob in tokens:
+            prob_text += f"  '{token}': {prob:.4f}\n"
+        prob_text += "\n"
+    return generated_text, prob_plot, prob_text
+# Create the Gradio interface
+with gr.Blocks(title="Structured Generation Demo") as demo:
+    gr.Markdown("# Structured Generation Demo")
+    gr.Markdown("This demo shows how outlines can constrain language model generation to include specific tokens.")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Enter a prompt to start generation...",
+                value="The most interesting thing about AI is"
+            )
+            use_outline = gr.Checkbox(label="Use Outline Constraint", value=False)
+            outline_text = gr.Textbox(
+                label="Outline Text (tokens to enforce in order)",
+                placeholder="Enter tokens to enforce in the generation...",
+                value="safety, creativity, and knowledge"
+            )
+            generate_btn = gr.Button("Generate Text")
+        with gr.Column():
+            output_text = gr.Textbox(label="Generated Text")
+            prob_plot = gr.Plot(label="Token Probabilities")
+            prob_text = gr.Textbox(label="Detailed Token Probabilities", lines=10)
+    generate_btn.click(
+        interface_fn,
+        inputs=[prompt, use_outline, outline_text],
+        outputs=[output_text, prob_plot, prob_text]
+    )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()