Spaces:

kevin1911
/

LLMdemo

Sleeping

App Files Files Community

kevin1911 commited on Feb 13

Commit

e93333c

verified ·

1 Parent(s): b7556c2

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -150

app.py CHANGED Viewed

@@ -1,198 +1,160 @@
 import torch
 import gradio as gr
 import plotly.express as px
-import numpy as np
 from transformers import AutoModel, AutoTokenizer
 ########################################
-# 1) Load DistilBERT with attention
 ########################################
 model_name = "distilbert-base-uncased"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name, output_attentions=True)
 model.eval()
-########################################
-# 2) Generate attention analysis
-########################################
-def analyze_attention(text, layer=5, top_k=3, show_heatmap=True):
     """
-    1. Tokenize 'text'.
-    2. Forward pass DistilBERT (output_attentions=True).
-    3. Extract attention from chosen layer (0..5).
-    4. Average across heads => (seq_len, seq_len).
-    5. Optionally create Plotly heatmap (fig_dict).
-    6. Create text summary of top-k focuses for each token.
-    7. Generate an "interpretation" to highlight interesting patterns.
     """
     with torch.no_grad():
         inputs = tokenizer.encode_plus(text, return_tensors="pt")
         outputs = model(**inputs)
-        all_attentions = outputs.attentions  # tuple: (#layers) each => (1, #heads, seq_len, seq_len)
-        # DistilBERT has 6 layers => valid range: 0..5
-        att = all_attentions[layer].mean(dim=1)  # average across heads => shape: (1, seq_len, seq_len)
-    att_matrix = att[0].cpu().numpy()  # (seq_len, seq_len)
     input_ids = inputs["input_ids"][0]
     tokens = tokenizer.convert_ids_to_tokens(input_ids)
-    seq_len = len(tokens)
-    # (Optional) Heatmap
-    fig_dict = None
-    if show_heatmap:
-        fig = px.imshow(
-            att_matrix,
-            x=tokens,
-            y=tokens,
-            labels={"x": "Token Being Looked At", "y": "Token Doing the Looking"},
-            color_continuous_scale="Blues",
-            title=f"DistilBERT Self-Attention (Layer {layer})"
-        )
-        fig.update_xaxes(side="top")
-        fig.update_traces(
-            hovertemplate="Row token: %{y}<br>Column token: %{x}<br>Focus Weight: %{z:.3f}"
-        )
-        fig_dict = fig.to_dict()
-    # Top-K Summary for each row
-    summary_md = "## Top-K Focus for Each Token\n"
-    summary_md += f"Showing the **top {top_k}** tokens each token focuses on.\n\n"
-    for i in range(seq_len):
-        row_token = tokens[i]
-        row_weights = att_matrix[i]
-        sorted_idx = row_weights.argsort()[::-1]
-        top_indices = sorted_idx[:top_k]
-        summary_md += f"**Token '{row_token}'** focuses on:\n"
-        for j in top_indices:
-            col_token = tokens[j]
-            weight = row_weights[j]
-            summary_md += f" - `{col_token}` (weight={weight:.3f})\n"
-        summary_md += "\n"
-    # Generate an additional "interpretation" to highlight patterns
-    interpretation_md = interpret_attention(att_matrix, tokens)
-    # Combine summaries
-    combined_md = summary_md + "\n" + interpretation_md
-    return fig_dict, combined_md
-########################################
-# 3) Interpretation function
-########################################
-def interpret_attention(att_matrix: np.ndarray, tokens: list) -> str:
     """
-    Provide a short bullet-list interpretation of the attention matrix:
-    - Count how many tokens mostly attend to themselves (diagonal).
-    - Find the global max attention weight (which row->col?), mention tokens involved.
-    - Possibly mention if we see something interesting about distribution.
     """
-    seq_len = len(tokens)
-    diagonal_focus_count = 0
-    # We'll track the max weight overall
-    max_val = -1.0
-    max_rc = (0, 0)
-    # For each row, check if diagonal is the top focus
-    for i in range(seq_len):
-        row = att_matrix[i]
-        best_j = row.argmax()
-        if best_j == i:
-            diagonal_focus_count += 1
-        # Check global max
-        if row[best_j] > max_val:
-            max_val = row[best_j]
-            max_rc = (i, best_j)
-    # Summaries
-    # 1) Diagonal focus stat
-    diag_msg = f"- **{diagonal_focus_count}/{seq_len} tokens** focus most on themselves (the diagonal)."
-    # 2) Global max
-    i, j = max_rc
-    token_i = tokens[i]
-    token_j = tokens[j]
-    global_msg = f"- The **highest single focus** in the matrix is **{max_val:.3f}**, from token '{token_i}' onto '{token_j}'."
-    # 3) Possibly some quick ratio
-    # For each row, sum of row vs. sum of diagonal
-    # We'll keep it simpler for now
-    interpretation = "## Additional Interpretation\n\n"
-    interpretation += (
-        "Here are some overall patterns in the attention matrix that might help you:\n\n"
     )
-    interpretation += f"{diag_msg}\n"
-    interpretation += f"{global_msg}\n"
-    interpretation += "\n- A strong diagonal means tokens often reference themselves.\n"
-    interpretation += (
-        "- If a token's top focus is another token, that suggests it's referencing or depending on that other token.\n"
     )
-    return interpretation
-########################################
-# 4) Gradio UI
-########################################
-description_md = """
-# DistilBERT Self-Attention with Extra Interpretation
-**Instructions:**
-1. Type your text into the box.
-2. Choose which **layer** of DistilBERT to visualize. (Layers range 0..5).
-3. Decide how many top tokens you want listed for each token (Top-K).
-4. (Optional) Check "Show Heatmap" to see the matrix. If it's too overwhelming, uncheck and just see the summary.
-**Reading the Heatmap**:
-- **Rows** = tokens doing the looking (focus).
-- **Columns** = tokens being looked at.
-- **Color intensity** = how strongly the row token focuses on the column token.
-Below the heatmap, you'll see:
-- A **Top-K focus** summary for each token.
-- An **interpretation** bullet list that highlights interesting overall patterns.
-"""
-def run_demo(text, layer, top_k, show_heatmap):
-    fig_dict, summary_md = analyze_attention(text, layer, top_k, show_heatmap)
-    return fig_dict, summary_md
 with gr.Blocks() as demo:
-    gr.Markdown(description_md)
     with gr.Row():
-        text_in = gr.Textbox(
-            label="Enter text",
             value="Transformers handle long-range context in parallel."
         )
-        layer_in = gr.Slider(
             minimum=0, maximum=5, step=1, value=5,
-            label="DistilBERT Layer"
-        )
-        topk_in = gr.Slider(
-            minimum=1, maximum=6, step=1, value=3,
-            label="Top-K Focus"
         )
-    show_heatmap_check = gr.Checkbox(
-        label="Show Heatmap?",
-        value=True
     )
-    run_btn = gr.Button("Analyze Attention")
-    out_plot = gr.Plot(label="Attention Heatmap")
-    out_summary = gr.Markdown(label="Attention Summaries & Interpretation")
-    run_btn.click(
-        fn=run_demo,
-        inputs=[text_in, layer_in, topk_in, show_heatmap_check],
-        outputs=[out_plot, out_summary]
     )
 demo.launch()

 import torch
 import gradio as gr
 import plotly.express as px
 from transformers import AutoModel, AutoTokenizer
 ########################################
+# Load Transformer (DistilBERT) with attention
 ########################################
 model_name = "distilbert-base-uncased"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name, output_attentions=True)
 model.eval()
+def visualize_attention(text, layer=5):
     """
+    1. Tokenize input text.
+    2. Run DistilBERT forward pass to get attention matrices.
+    3. Pick a layer (0..5) and average across attention heads.
+    4. Generate a heatmap (Plotly) of shape (seq_len x seq_len).
+    5. Label axes with tokens (Query vs. Key).
     """
     with torch.no_grad():
         inputs = tokenizer.encode_plus(text, return_tensors="pt")
         outputs = model(**inputs)
+        all_attentions = outputs.attentions
+        # DistilBERT has 6 layers => valid layer indices: 0..5
+        attn_layer = all_attentions[layer].mean(dim=1)  # shape: (1, seq_len, seq_len)
+    # Convert to numpy for plotting
+    attn_matrix = attn_layer[0].cpu().numpy()
+    # Get tokens (including special tokens)
     input_ids = inputs["input_ids"][0]
     tokens = tokenizer.convert_ids_to_tokens(input_ids)
+    # Build a Plotly heatmap
+    fig = px.imshow(
+        attn_matrix,
+        x=tokens,
+        y=tokens,
+        labels={"x": "Key (Being Attended to)", "y": "Query (Focusing)"},
+        color_continuous_scale="Blues",
+        title=f"DistilBERT Attention (Layer {layer})"
+    )
+    fig.update_xaxes(side="top")
+    # Add tooltip
+    fig.update_traces(
+        hovertemplate="Query: %{y}<br>Key: %{x}<br>Attention Weight: %{z:.3f}"
+    )
+    fig.update_layout(coloraxis_colorbar=dict(title="Attention Weight"))
+    return fig
+def interpret_token_attention(text, token_index=0, layer=5):
     """
+    Provides a textual explanation for why a particular token (Query) attends
+    to other tokens in the input, highlighting the top 2 or 3 tokens
+    it focuses on.
     """
+    with torch.no_grad():
+        inputs = tokenizer.encode_plus(text, return_tensors="pt")
+        outputs = model(**inputs)
+        all_attentions = outputs.attentions
+        attn_layer = all_attentions[layer].mean(dim=1)  # shape: (1, seq_len, seq_len)
+    # Get tokens
+    input_ids = inputs["input_ids"][0]
+    tokens = tokenizer.convert_ids_to_tokens(input_ids)
+    # Safety check for token_index
+    if token_index < 0 or token_index >= len(tokens):
+        return "Invalid token index. Please choose a valid token index."
+    # Extract the row corresponding to our Query token
+    query_attn = attn_layer[0, token_index, :].cpu().numpy()  # shape: (seq_len,)
+    # Sort tokens by attention weight (descending)
+    sorted_indices = query_attn.argsort()[::-1]
+    top_indices = sorted_indices[:3]  # Grab top 3
+    top_tokens = [tokens[i] for i in top_indices]
+    top_weights = [query_attn[i] for i in top_indices]
+    # Build an explanation
+    query_token_str = tokens[token_index]
+    explanation = (
+        f"**You chose token index {token_index}, which is '{query_token_str}'.**\n\n"
+        "In Transformers, each token is converted into Query, Key, and Value vectors:\n"
+        "- **Query** = What this token is looking for\n"
+        "- **Key**   = What another token has to offer\n"
+        "- **Value** = The actual information from that token\n\n"
+        f"As a Query, '{query_token_str}' attends most strongly to:\n"
     )
+    for t, w in zip(top_tokens, top_weights):
+        explanation += f"- **{t}** with attention weight ~ {w:.3f}\n"
+    explanation += (
+        "\nA higher attention weight indicates that this Query token is 'looking at' or "
+        "focusing on that Key token more strongly, likely because it finds the Key token "
+        "relevant to its meaning or context."
     )
+    return explanation
+# Short explanation text for the UI
+description_text = """
+## Understanding Transformer Self-Attention
+- **Rows = Query token** (the token doing the 'looking').
+- **Columns = Key token** (the token being 'looked at').
+- Darker color = stronger attention weight.
+**Transformers** process all tokens in **parallel**, allowing any token to attend to any other token in the sentence.
+This makes it easier for the model to capture long-distance relationships.
+"""
+########################################
+# Gradio Interface
+########################################
 with gr.Blocks() as demo:
+    gr.Markdown("# Transformer Self-Attention Visualization (DistilBERT)")
+    gr.Markdown(description_text)
     with gr.Row():
+        text_input = gr.Textbox(
+            label="Enter a sentence",
             value="Transformers handle long-range context in parallel."
         )
+        layer_slider = gr.Slider(
             minimum=0, maximum=5, step=1, value=5,
+            label="DistilBERT Layer (0=lowest, 5=highest)"
         )
+    output_plot = gr.Plot(label="Attention Heatmap")
+    # Visualization Button
+    visualize_button = gr.Button("Visualize Attention")
+    visualize_button.click(
+        fn=visualize_attention,
+        inputs=[text_input, layer_slider],
+        outputs=output_plot
     )
+    # Dropdown (or Slider) to choose a token index for interpretation
+    token_index = gr.Number(
+        label="Choose a token index to interpret (0-based)",
+        value=0
+    )
+    interpretation_output = gr.Markdown(label="Interpretation")
+    # Interpretation Button
+    interpret_button = gr.Button("Explain This Token's Attention")
+    interpret_button.click(
+        fn=interpret_token_attention,
+        inputs=[text_input, token_index, layer_slider],
+        outputs=interpretation_output
     )
 demo.launch()