Spaces:

kevin1911
/

LLMdemo

Running

App Files Files Community

kevin1911 commited on Feb 13

Commit

0b23237

verified ·

1 Parent(s): 4e323a6

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -76

app.py CHANGED Viewed

@@ -1,91 +1,95 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-def load_model(model_name="gpt2"):
-    """Load a GPT-2 model and tokenizer from Hugging Face."""
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)
-    return pipeline("text-generation", model=model, tokenizer=tokenizer)
-# Initialize the pipeline outside the function so it's loaded only once
-generator = load_model()
-def generate_text(prompt, max_length=100, temperature=1.0, top_p=0.9):
     """
-    Generates text based on the prompt using a GPT-2 model.
-    Args:
-        prompt (str): Input text from the user.
-        max_length (int): Max tokens in the prompt + generation.
-        temperature (float): Controls randomness.
-        top_p (float): Nucleus sampling hyperparameter.
-    Returns:
-        str: Generated text from GPT-2.
     """
-    results = generator(
-        prompt,
-        max_length=max_length,
-        temperature=temperature,
-        top_p=top_p,
-        num_return_sequences=1,
-        # GPT-2 may not have a dedicated pad token, so eos_token_id used:
-        pad_token_id=generator.tokenizer.eos_token_id
     )
-    return results[0]["generated_text"]
-# Build the Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown(
-        """
-        # Educational GPT-2 Demo
-        This demo demonstrates how a smaller Large Language Model (GPT-2) predicts text.
-        Change the parameters below to see how the model's output is affected:
-        - **Max Length** controls the total number of tokens in the output.
-        - **Temperature** controls randomness (higher means more creative/chaotic).
-        - **Top-p** controls the diversity of tokens (lower means more conservative choices).
-        """
     )
     with gr.Row():
-        with gr.Column():
-            prompt = gr.Textbox(
-                lines=4,
-                label="Prompt",
-                placeholder="Type a prompt here",
-                value="Once upon a time,"
-            )
-            max_len = gr.Slider(
-                minimum=20,
-                maximum=200,
-                value=100,
-                step=1,
-                label="Max Length"
-            )
-            temp = gr.Slider(
-                minimum=0.1,
-                maximum=2.0,
-                value=1.0,
-                step=0.1,
-                label="Temperature"
-            )
-            top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.9,
-                step=0.05,
-                label="Top-p"
-            )
-            generate_button = gr.Button("Generate")
-        with gr.Column():
-            output_box = gr.Textbox(
-                label="Generated Text",
-                lines=10
-            )
-    generate_button.click(
-        fn=generate_text,
-        inputs=[prompt, max_len, temp, top_p],
-        outputs=[output_box]
     )
 demo.launch()

+import torch
 import gradio as gr
+import plotly.express as px
+from transformers import AutoModel, AutoTokenizer
+########################################
+# Load Transformer (DistilBERT) with attention
+########################################
+model_name = "distilbert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Note: output_attentions=True to extract attention matrices
+model = AutoModel.from_pretrained(model_name, output_attentions=True)
+model.eval()
+def visualize_attention(text, layer=5):
     """
+    1. Tokenize input text.
+    2. Run DistilBERT forward pass to get attention matrices.
+    3. Pick a layer (0..5) and average across attention heads.
+    4. Generate a heatmap (Plotly) of shape (seq_len x seq_len).
+    5. Label axes with tokens (Query vs. Key).
     """
+    with torch.no_grad():
+        inputs = tokenizer.encode_plus(text, return_tensors="pt")
+        outputs = model(**inputs)
+        # outputs.attentions: tuple of shape [num_layers] each => (batch=1, num_heads, seq_len, seq_len)
+        all_attentions = outputs.attentions
+        # DistilBERT has 6 layers => valid indices: 0..5
+        attn_layer = all_attentions[layer].mean(dim=1)  # average across heads => shape: (1, seq_len, seq_len)
+    # Convert to numpy for plotting
+    attn_matrix = attn_layer[0].cpu().numpy()
+    # Get tokens (including special tokens like [CLS], [SEP])
+    input_ids = inputs["input_ids"][0]
+    tokens = tokenizer.convert_ids_to_tokens(input_ids)
+    # Build a Plotly heatmap
+    fig = px.imshow(
+        attn_matrix,
+        x=tokens,
+        y=tokens,
+        labels={"x": "Key (Being Attended to)", "y": "Query (Focusing)"},
+        color_continuous_scale="Blues",
+        title=f"DistilBERT Attention (Layer {layer})"
     )
+    fig.update_xaxes(side="top")
+    # Add tooltip: shows row token, column token, and attention weight
+    fig.update_traces(
+        hovertemplate="Query: %{y}<br>Key: %{x}<br>Attention Weight: %{z:.3f}"
     )
+    return fig
+# Short explanation text for the UI
+description_text = """
+## Understanding Transformer Self-Attention
+- **Rows = "Query token"** (the token that is looking at other tokens)
+- **Columns = "Key token"** (the token being looked at)
+- Darker (or higher) color = stronger attention.
+**Transformers** process all tokens in **parallel**, not step-by-step like RNNs.
+Thus, **long-distance dependencies** are easier to capture: any token can directly
+attend to any other token, regardless of distance in the sentence.
+"""
+########################################
+# Gradio Interface
+########################################
+with gr.Blocks() as demo:
+    gr.Markdown("# Transformer Self-Attention Visualization (DistilBERT)")
+    gr.Markdown(description_text)
     with gr.Row():
+        text_input = gr.Textbox(
+            label="Enter a sentence",
+            value="Transformers handle long-range context in parallel."
+        )
+        layer_slider = gr.Slider(
+            minimum=0, maximum=5, step=1, value=5,
+            label="DistilBERT Layer (0=lowest, 5=highest)"
+        )
+    output_plot = gr.Plot(label="Attention Heatmap")
+    visualize_button = gr.Button("Visualize Attention")
+    visualize_button.click(
+        fn=visualize_attention,
+        inputs=[text_input, layer_slider],
+        outputs=output_plot
     )
 demo.launch()