Spaces:

asadshahab
/

token-attention-visualizer

Sleeping

App Files Files Community

asadshahab commited on 13 days ago

Commit

dd850a7

1 Parent(s): 6df9665

initial

Browse files

Files changed (16) hide show

.gitignore +104 -0
README.md +72 -7
api/__init__.py +0 -0
app.py +656 -0
claude.md +206 -0
config.py +39 -0
core/__init__.py +0 -0
core/attention.py +279 -0
core/cache.py +90 -0
core/model_handler.py +187 -0
requirements.txt +8 -0
visualization/__init__.py +0 -0
visualization/d3_viz.py +356 -0
visualization/plotly_viz.py +443 -0
visualization/simple_svg_viz.py +138 -0
visualization/utils.py +223 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,104 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+.venv/
+# IDE and editors
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.claude/
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Logs and databases
+*.log
+*.sqlite3
+*.db
+# Model cache and downloads
+models/
+.cache/
+huggingface_hub/
+transformers_cache/
+# Temporary files
+*.tmp
+*.temp
+.tmp/
+# Environment variables
+.env
+.env.local
+.env.production
+.env.staging
+# Jupyter Notebook
+.ipynb_checkpoints
+# pytest
+.pytest_cache/
+.coverage
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Local development
+local_test.py
+test_*.py
+debug.py
+# Gradio temporary files
+.gradio/
+gradio_cached_examples/
+flagged/
+# Large files that shouldn't be in git
+*.bin
+*.safetensors
+*.pt
+*.pth
+*.ckpt
+*.h5
+# Documentation build
+docs/_build/

README.md CHANGED Viewed

@@ -1,14 +1,79 @@
 ---
 title: Token Attention Visualizer
-emoji: 📈
-colorFrom: purple
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.42.0
 app_file: app.py
 pinned: false
-license: mit
-short_description: An interactive tool for visualizing attention patterns in La
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Token Attention Visualizer
+emoji: 🔍
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.0.0
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+# Token Attention Visualizer
+An interactive tool for visualizing attention patterns in Large Language Models during text generation.
+## Features
+- 🚀 **Real-time Generation**: Generate text with any Hugging Face model
+- 🔍 **Attention Visualization**: Explore attention patterns with clear visual representations
+- 📊 **Dual Normalization**: Choose between separate or joint attention normalization
+- ⚡ **Smart Caching**: Fast response with intelligent result caching
+- 🎯 **Token Selection**: Use dropdown menus to select and filter token connections
+- 📈 **Step Navigation**: Navigate through generation steps
+- 🎨 **Customizable Threshold**: Filter weak attention connections
+## How It Works
+The visualizer shows how tokens attend to each other during text generation:
+- **Blue lines**: Attention from input tokens to output tokens
+- **Orange curves**: Attention between output tokens
+- **Line thickness**: Represents attention weight strength
+## Usage
+1. **Load a Model**: Enter a Hugging Face model name (default: HuggingFaceTB/SmolLM-135M-Instruct)
+2. **Enter Prompt**: Type your input text
+3. **Configure Settings**: Adjust max tokens, temperature, and normalization
+4. **Generate**: Click to generate text and visualize attention
+5. **Explore**: Use dropdown menus to select tokens and view their attention patterns
+## Technical Details
+- Built with Gradio for the interface
+- Visualization system with dropdown-based token selection
+- Supports any Hugging Face causal language model
+- Optimized for smaller models like SmolLM for efficient deployment
+- Implements efficient attention processing and caching
+## Local Development
+```bash
+# Clone the repository
+git clone <repo-url>
+cd token-attention-viz
+# Install dependencies
+pip install -r requirements.txt
+# Run the app
+python app.py
+```
+## Deployment
+This app is designed for easy deployment on Hugging Face Spaces. Simply:
+1. Create a new Space
+2. Upload the project files
+3. The app will automatically start
+## Requirements
+- Python 3.8+
+- 4GB+ RAM (SmolLM models are lightweight)
+- GPU acceleration optional (works well on CPU)
+## License
+Apache 2.0

api/__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,656 @@

+import gradio as gr
+import torch
+import sys
+import os
+import json
+from pathlib import Path
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent))
+from core.model_handler import ModelHandler
+from core.attention import AttentionProcessor
+from core.cache import AttentionCache
+from config import Config
+from visualization.d3_viz import create_d3_visualization
+class TokenVisualizerApp:
+    def __init__(self):
+        self.config = Config()
+        self.model_handler = ModelHandler(config=self.config)
+        self.cache = AttentionCache(max_size=self.config.CACHE_SIZE)
+        self.current_data = None
+        self.model_loaded = False
+    def load_model(self, model_name: str = None) -> str:
+        """Load the model and return status message."""
+        if not model_name:
+            model_name = self.config.DEFAULT_MODEL
+        success, message = self.model_handler.load_model(model_name)
+        self.model_loaded = success
+        if success:
+            model_info = self.model_handler.get_model_info()
+            return f"✅ Model loaded: {model_name}\n📊 Parameters: {model_info['num_parameters']:,}\n🖥️ Device: {model_info['device']}"
+        else:
+            return f"❌ Failed to load model: {message}"
+    def generate_and_visualize(
+        self,
+        prompt: str,
+        max_tokens: int,
+        threshold: float,
+        temperature: float,
+        normalization: str,
+        progress=gr.Progress()
+    ):
+        """Main generation function (no visualization)."""
+        if not self.model_loaded:
+            return None, "Please load a model first!", None
+        if not prompt.strip():
+            return None, "Please enter a prompt!", None
+        progress(0.2, desc="Checking cache...")
+        # Check cache
+        cache_key = self.cache.get_key(
+            prompt, max_tokens,
+            self.model_handler.model_name,
+            temperature
+        )
+        cached = self.cache.get(cache_key)
+        if cached:
+            progress(0.5, desc="Using cached data...")
+            self.current_data = cached
+        else:
+            progress(0.3, desc="Generating text...")
+            # Generate new
+            attention_data, output_tokens, input_tokens, generated_text = \
+                self.model_handler.generate_with_attention(
+                    prompt, max_tokens, temperature
+                )
+            if attention_data is None:
+                return None, f"Generation failed: {generated_text}", None
+            progress(0.6, desc="Processing attention...")
+            # Process attention based on normalization method
+            if normalization == "separate":
+                attention_matrices = AttentionProcessor.process_attention_separate(
+                    attention_data, input_tokens, output_tokens
+                )
+            else:
+                attention_matrices = AttentionProcessor.process_attention_joint(
+                    attention_data, input_tokens, output_tokens
+                )
+            self.current_data = {
+                'input_tokens': input_tokens,
+                'output_tokens': output_tokens,
+                'attention_matrices': attention_matrices,
+                'generated_text': generated_text,
+                'attention_data': attention_data  # Keep raw for step updates
+            }
+            # Cache it
+            self.cache.set(cache_key, self.current_data)
+        progress(1.0, desc="Complete!")
+        # Create info text
+        info_text = f"📝 Generated: {self.current_data['generated_text']}\n"
+        info_text += f"🔤 Input tokens: {len(self.current_data['input_tokens'])}\n"
+        info_text += f"🔤 Output tokens: {len(self.current_data['output_tokens'])}"
+        return (
+            info_text,
+        )
+    def update_step(self, step_idx: int, threshold: float):
+        """No-op placeholder after removing visualization."""
+        return None
+    def update_threshold(self, threshold: float, normalization: str):
+        """No-op placeholder after removing visualization."""
+        return None
+    def filter_token_connections(self, token_idx: int, token_type: str, threshold: float):
+        """Removed visualization; keep placeholder."""
+        return None
+    def reset_view(self, threshold: float):
+        """Removed visualization; keep placeholder."""
+        return None
+    def on_d3_token_click(self, click_data: str, threshold: float):
+        """Removed visualization; keep placeholder for compatibility."""
+        return None, gr.update()
+    def on_input_token_select(self, token_label: str, threshold: float):
+        """Removed visualization; keep placeholder for compatibility."""
+        return None
+    def prepare_d3_data(self, step_idx: int, threshold: float = 0.01, filter_token: str = None):
+        """
+        Convert attention data to D3.js-friendly JSON format.
+        Args:
+            step_idx: Generation step to visualize (0-based)
+            threshold: Minimum attention weight to include
+            filter_token: Token to filter by (format: "[IN] token" or "[OUT] token" or "All tokens")
+        Returns:
+            dict: JSON structure with nodes and links for D3.js
+        """
+        if not self.current_data:
+            return {"nodes": [], "links": []}
+        input_tokens = self.current_data['input_tokens']
+        output_tokens = self.current_data['output_tokens']
+        attention_matrices = self.current_data['attention_matrices']
+        # Ensure step_idx is within bounds
+        if step_idx >= len(attention_matrices):
+            step_idx = len(attention_matrices) - 1
+        attention_matrix = attention_matrices[step_idx]
+        # Create nodes
+        nodes = []
+        # Add input nodes
+        for i, token in enumerate(input_tokens):
+            nodes.append({
+                "id": f"input_{i}",
+                "token": token,
+                "type": "input",
+                "index": i
+            })
+        # Add output nodes (up to current step)
+        for i in range(step_idx + 1):
+            if i < len(output_tokens):
+                nodes.append({
+                    "id": f"output_{i}",
+                    "token": output_tokens[i],
+                    "type": "output",
+                    "index": i
+                })
+        # Parse filter token
+        filter_type = None
+        filter_idx = None
+        if filter_token and filter_token != "All tokens":
+            if filter_token.startswith("[IN] "):
+                filter_type = "input"
+                filter_token_text = filter_token[5:]  # Remove "[IN] " prefix
+                filter_idx = next((i for i, token in enumerate(input_tokens) if token == filter_token_text), None)
+            elif filter_token.startswith("[OUT] "):
+                filter_type = "output"
+                filter_token_text = filter_token[6:]  # Remove "[OUT] " prefix
+                filter_idx = next((i for i, token in enumerate(output_tokens) if token == filter_token_text), None)
+        # Create links from attention matrices - show ALL steps up to current step
+        links = []
+        # Show connections for all steps up to and including step_idx
+        for current_step in range(step_idx + 1):
+            if current_step < len(attention_matrices):
+                step_attention = attention_matrices[current_step]
+                # Links from input tokens to this output token
+                input_attention = step_attention['input_attention']
+                if input_attention is not None:
+                    for input_idx in range(len(input_tokens)):
+                        if input_idx < len(input_attention):  # Check bounds
+                            weight = float(input_attention[input_idx])
+                            if weight >= threshold:
+                                # Apply filtering
+                                show_link = True
+                                if filter_type == "input" and filter_idx is not None:
+                                    # Only show connections involving the selected input token
+                                    show_link = (input_idx == filter_idx)
+                                elif filter_type == "output" and filter_idx is not None:
+                                    # Only show connections involving the selected output token
+                                    show_link = (current_step == filter_idx)
+                                if show_link:
+                                    links.append({
+                                        "source": f"input_{input_idx}",
+                                        "target": f"output_{current_step}",
+                                        "weight": weight,
+                                        "type": "input_to_output"
+                                    })
+                # Links from previous output tokens to this output token
+                output_attention = step_attention['output_attention']
+                if output_attention is not None and current_step > 0:
+                    for prev_output_idx in range(current_step):
+                        if prev_output_idx < len(output_attention):  # Check bounds
+                            weight = float(output_attention[prev_output_idx])
+                            if weight >= threshold:
+                                # Apply filtering
+                                show_link = True
+                                if filter_type == "input" and filter_idx is not None:
+                                    # Don't show output-to-output connections when filtering by input
+                                    show_link = False
+                                elif filter_type == "output" and filter_idx is not None:
+                                    # Only show connections involving the selected output token
+                                    show_link = (prev_output_idx == filter_idx or current_step == filter_idx)
+                                if show_link:
+                                    links.append({
+                                        "source": f"output_{prev_output_idx}",
+                                        "target": f"output_{current_step}",
+                                        "weight": weight,
+                                        "type": "output_to_output"
+                                    })
+        return {
+            "nodes": nodes,
+            "links": links,
+            "step": step_idx,
+            "total_steps": len(attention_matrices),
+            "input_count": len(input_tokens),
+            "output_count": step_idx + 1
+        }
+    def create_d3_visualization_html(self, step_idx: int = 0, threshold: float = 0.01, filter_token: str = None):
+        """
+        Create D3.js visualization HTML for the current data.
+        Args:
+            step_idx: Generation step to visualize (0-based)
+            threshold: Minimum attention weight to include
+            filter_token: Token to filter by (format: "[IN] token" or "[OUT] token")
+        Returns:
+            str: HTML string for D3.js visualization
+        """
+        if not self.current_data:
+            return "<div>No data available. Generate text first!</div>"
+        d3_data = self.prepare_d3_data(step_idx, threshold, filter_token)
+        viz_html = create_d3_visualization(d3_data)
+        return viz_html
+    def get_token_choices(self):
+        """
+        Get list of token choices for dropdown.
+        Returns:
+            list: List of token strings for dropdown options
+        """
+        if not self.current_data:
+            return []
+        input_tokens = self.current_data['input_tokens']
+        output_tokens = self.current_data['output_tokens']
+        # Create choices with prefixes to distinguish input/output
+        choices = ["All tokens"]
+        choices.extend([f"[IN] {token}" for token in input_tokens])
+        choices.extend([f"[OUT] {token}" for token in output_tokens])
+        return choices
+def create_gradio_interface():
+    """Create the Gradio interface."""
+    app = TokenVisualizerApp()
+    with gr.Blocks(
+        title="Token Attention Visualizer",
+        css="""
+        /* Default/Light mode styles */
+        .main-header {
+            text-align: center;
+            padding: 2rem 0 3rem 0;
+            background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
+            border-radius: 1rem;
+            margin-bottom: 2rem;
+            border: 1px solid #e2e8f0;
+        }
+        .main-title {
+            font-size: 2.5rem;
+            font-weight: 700;
+            color: #1e293b;
+            margin-bottom: 0.5rem;
+            background: linear-gradient(135deg, #1e293b 0%, #3b82f6 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+        }
+        .main-subtitle {
+            font-size: 1.125rem;
+            color: #64748b;
+            font-weight: 400;
+        }
+        .section-title {
+            font-size: 1.25rem;
+            font-weight: 600;
+            color: #1e293b;
+            margin-bottom: 1.5rem;
+            padding-bottom: 0.5rem;
+            border-bottom: 2px solid #e2e8f0;
+        }
+        /* Explicit light mode overrides */
+        .light .main-header,
+        [data-theme="light"] .main-header {
+            background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
+            border: 1px solid #e2e8f0;
+        }
+        .light .main-title,
+        [data-theme="light"] .main-title {
+            color: #1e293b;
+            background: linear-gradient(135deg, #1e293b 0%, #3b82f6 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+        }
+        .light .main-subtitle,
+        [data-theme="light"] .main-subtitle {
+            color: #64748b;
+        }
+        .light .section-title,
+        [data-theme="light"] .section-title {
+            color: #1e293b;
+            border-bottom: 2px solid #e2e8f0;
+        }
+        /* Dark mode styles with higher specificity */
+        .dark .main-header,
+        [data-theme="dark"] .main-header {
+            background: linear-gradient(135deg, #1e293b 0%, #334155 100%) !important;
+            border: 1px solid #475569 !important;
+        }
+        .dark .main-title,
+        [data-theme="dark"] .main-title {
+            color: #f1f5f9 !important;
+            background: linear-gradient(135deg, #f1f5f9 0%, #60a5fa 100%) !important;
+            -webkit-background-clip: text !important;
+            -webkit-text-fill-color: transparent !important;
+            background-clip: text !important;
+        }
+        .dark .main-subtitle,
+        [data-theme="dark"] .main-subtitle {
+            color: #cbd5e1 !important;
+        }
+        .dark .section-title,
+        [data-theme="dark"] .section-title {
+            color: #f1f5f9 !important;
+            border-bottom: 2px solid #475569 !important;
+        }
+        /* System dark mode - only apply when no explicit theme is set */
+        @media (prefers-color-scheme: dark) {
+            :root:not([data-theme="light"]) .main-header {
+                background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
+                border: 1px solid #475569;
+            }
+            :root:not([data-theme="light"]) .main-title {
+                color: #f1f5f9;
+                background: linear-gradient(135deg, #f1f5f9 0%, #60a5fa 100%);
+                -webkit-background-clip: text;
+                -webkit-text-fill-color: transparent;
+                background-clip: text;
+            }
+            :root:not([data-theme="light"]) .main-subtitle {
+                color: #cbd5e1;
+            }
+            :root:not([data-theme="light"]) .section-title {
+                color: #f1f5f9;
+                border-bottom: 2px solid #475569;
+            }
+        }
+        .load-model-btn {
+            background: linear-gradient(135deg, #f97316 0%, #ea580c 100%) !important;
+            color: white !important;
+            border: none !important;
+            font-weight: 600 !important;
+            padding: 0.75rem 2rem !important;
+            border-radius: 0.5rem !important;
+            box-shadow: 0 4px 6px -1px rgba(249, 115, 22, 0.25) !important;
+            transition: all 0.2s ease !important;
+        }
+        .load-model-btn:hover {
+            background: linear-gradient(135deg, #ea580c 0%, #dc2626 100%) !important;
+            transform: translateY(-1px) !important;
+            box-shadow: 0 6px 8px -1px rgba(249, 115, 22, 0.35) !important;
+        }
+        """
+    ) as demo:
+        gr.HTML("""
+            <div class="main-header">
+                <h1 class="main-title">Token Attention Visualizer</h1>
+                <p class="main-subtitle">Interactive visualization of attention patterns in Large Language Models</p>
+            </div>
+        """)
+        with gr.Row():
+            # Left Panel - Controls
+            with gr.Column(scale=1):
+                gr.HTML('<h2 class="section-title">Model & Generation</h2>')
+                # Model loading
+                model_input = gr.Textbox(
+                    label="Model Name",
+                    value=app.config.DEFAULT_MODEL,
+                    placeholder="Enter Hugging Face model name..."
+                )
+                load_model_btn = gr.Button("Load Model", variant="primary", elem_classes=["load-model-btn"])
+                model_status = gr.Textbox(
+                    label="Model Status",
+                    value="No model loaded",
+                    interactive=False,
+                    lines=2
+                )
+                # Generation controls
+                prompt_input = gr.Textbox(
+                    label="Prompt",
+                    value=app.config.DEFAULT_PROMPT,
+                    lines=3,
+                    placeholder="Enter your prompt here..."
+                )
+                max_tokens_input = gr.Slider(
+                    minimum=1,
+                    maximum=50,
+                    value=app.config.DEFAULT_MAX_TOKENS,
+                    step=1,
+                    label="Max Tokens"
+                )
+                temperature_input = gr.Slider(
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=app.config.DEFAULT_TEMPERATURE,
+                    step=0.1,
+                    label="Temperature"
+                )
+                generate_btn = gr.Button("Generate", variant="primary", size="lg")
+                generated_info = gr.Textbox(
+                    label="Generation Info",
+                    interactive=False,
+                    lines=4
+                )
+                gr.HTML('<h2 class="section-title">Visualization Controls</h2>')
+                step_slider = gr.Slider(
+                    minimum=0,
+                    maximum=10,
+                    value=0,
+                    step=1,
+                    label="Generation Step",
+                    info="Navigate through generation steps"
+                )
+                threshold_slider = gr.Slider(
+                    minimum=0.001,
+                    maximum=0.5,
+                    value=0.01,
+                    step=0.001,
+                    label="Attention Threshold",
+                    info="Filter weak connections"
+                )
+                token_dropdown = gr.Dropdown(
+                    choices=["All tokens"],
+                    value="All tokens",
+                    label="Filter by Token",
+                    info="Select a token to highlight"
+                )
+            # Right Panel - Visualization
+            with gr.Column(scale=2):
+                gr.HTML('<h2 class="section-title">Attention Visualization</h2>')
+                d3_visualization = gr.HTML(
+                    value="""<div style='height: 700px; display: flex; align-items: center; justify-content: center; font-size: 16px;'>
+                        <div style='text-align: center;'>
+                            <div style='font-size: 3rem; margin-bottom: 16px; opacity: 0.5;'>⚪</div>
+                            <div style='font-weight: 500; margin-bottom: 8px;'>Ready to visualize</div>
+                            <div>Generate text to see attention patterns</div>
+                        </div>
+                    </div>"""
+                )
+        # (Visualization output and overlay removed)
+        # Instructions
+        with gr.Accordion("📖 How to Use", open=False):
+            gr.Markdown(
+                """
+                ### Instructions:
+                1. **Load a model** from Hugging Face (default: Llama-3.2-1B)
+                2. **Enter a prompt** and configure generation settings
+                3. **Click Generate** to create text and visualize attention
+                4. **Interact with the visualization:**
+                   - Use the **step slider** to navigate through generation steps
+                   - Adjust the **threshold** to filter weak connections
+                   - Click on **tokens** in the plot to filter their connections
+                   - Click **Reset View** to show all connections
+                ### Understanding the Visualization:
+                - **Blue lines**: Attention from input to output tokens
+                - **Orange curves**: Attention between output tokens
+                - **Line thickness**: Represents attention weight strength
+                - **Node colors**: Blue = input tokens, Coral = generated tokens
+                """
+            )
+        # Event handlers
+        load_model_btn.click(
+            fn=app.load_model,
+            inputs=[model_input],
+            outputs=[model_status]
+        )
+        def _generate(prompt, max_tokens, threshold, temperature):
+            info, = app.generate_and_visualize(
+                prompt, max_tokens, threshold, temperature, "separate"  # Always use separate normalization
+            )
+            # Update visualization and dropdown choices
+            max_steps = len(app.current_data['attention_matrices']) - 1 if app.current_data else 0
+            viz_html = app.create_d3_visualization_html(step_idx=max_steps, threshold=0.01)  # Start with last step
+            token_choices = app.get_token_choices()
+            return info, viz_html, gr.update(choices=token_choices, value="All tokens"), gr.update(maximum=max_steps, value=max_steps)
+        generate_btn.click(
+            fn=_generate,
+            inputs=[
+                prompt_input,
+                max_tokens_input,
+                gr.State(app.config.DEFAULT_THRESHOLD),  # keep threshold in call but unused
+                temperature_input
+            ],
+            outputs=[generated_info, d3_visualization, token_dropdown, step_slider]
+        )
+        # Event handlers for visualization controls
+        def _update_visualization(step_idx, threshold, filter_token="All tokens"):
+            """Update visualization when step or threshold changes."""
+            viz_html = app.create_d3_visualization_html(step_idx=int(step_idx), threshold=threshold, filter_token=filter_token)
+            return viz_html
+        def _filter_by_token(selected_token, step_idx, threshold):
+            """Update visualization when token filter changes."""
+            viz_html = app.create_d3_visualization_html(step_idx=int(step_idx), threshold=threshold, filter_token=selected_token)
+            return viz_html
+        # Connect visualization controls
+        step_slider.change(
+            fn=_update_visualization,
+            inputs=[step_slider, threshold_slider, token_dropdown],
+            outputs=[d3_visualization]
+        )
+        threshold_slider.change(
+            fn=_update_visualization,
+            inputs=[step_slider, threshold_slider, token_dropdown],
+            outputs=[d3_visualization]
+        )
+        token_dropdown.change(
+            fn=_filter_by_token,
+            inputs=[token_dropdown, step_slider, threshold_slider],
+            outputs=[d3_visualization]
+        )
+        # Load default model on startup
+        demo.load(
+            fn=app.load_model,
+            inputs=[gr.State(app.config.DEFAULT_MODEL)],
+            outputs=[model_status]
+        )
+    return demo
+if __name__ == "__main__":
+    # Check if CUDA is available
+    if torch.cuda.is_available():
+        print(f"✅ CUDA available: {torch.cuda.get_device_name(0)}")
+    else:
+        print("⚠️ CUDA not available, using CPU")
+    # Create and launch the app
+    demo = create_gradio_interface()
+    """ demo.launch(
+        share=False,  # Set to True for public URL
+        server_name="0.0.0.0",  # Allow external connections
+        server_port=7860,  # Default Gradio port
+        inbrowser=False  # Don't auto-open browser
+    ) """
+    demo.launch()

claude.md ADDED Viewed

	@@ -0,0 +1,206 @@

+# Claude Code Instructions - Token Attention Visualizer
+## Project Overview
+You are helping to build a Token Attention Visualizer - a web-based tool that visualizes attention weights in Large Language Models (LLMs) during text generation. The tool shows how input tokens influence the generation of output tokens through interactive visualizations.
+## Core Functionality
+1. Accept a text prompt and generate tokens using a Llama model
+2. Extract and process attention matrices from the model
+3. Create an interactive visualization showing token relationships
+4. Allow users to click tokens to filter connections
+5. Provide step-by-step navigation through the generation process
+## Tech Stack
+- **Backend**: FastAPI
+- **Frontend**: Gradio (for easy Hugging Face Spaces deployment)
+- **Visualization**: Plotly (interactive graphs)
+- **ML**: Transformers, PyTorch
+- **Models**: Llama models (1B-3B range)
+## Project Structure
+```
+token-attention-viz/
+├── app.py                 # Main Gradio application
+├── api/
+│   ├── __init__.py
+│   ├── server.py         # FastAPI endpoints (optional)
+│   └── models.py         # Pydantic models
+├── core/
+│   ├── __init__.py
+│   ├── model_handler.py  # Model loading and generation
+│   ├── attention.py      # Attention processing
+│   └── cache.py          # Caching logic
+├── visualization/
+│   ├── __init__.py
+│   ├── plotly_viz.py     # Plotly visualization
+│   └── utils.py          # Token cleaning utilities
+├── requirements.txt
+└── config.py             # Configuration settings
+```
+## Implementation Guidelines
+### Critical Code to Preserve from Original Implementation
+1. **Model Loading Logic**:
+   - Device and dtype detection based on GPU capability
+   - Pad token handling for models without it
+   - Error handling for model loading
+2. **Attention Extraction** :
+   - BOS token removal from visualization
+   - EOS token handling
+   - Attention matrix extraction with proper indexing
+3. **Token Cleaning Function**:
+```python
+def clean_label(token):
+    label = str(token)
+    label = label.replace('Ġ', ' ')
+    label = label.replace('▁', ' ')
+    label = label.replace('Ċ', '\\n')
+    label = label.replace('</s>', '[EOS]')
+    label = label.replace('<unk>', '[UNK]')
+    label = label.replace('<|begin_of_text|>', '[BOS]')
+    label = label.replace('<|end_of_text|>', '[EOS]')
+    label = re.sub(r'<0x[0-9A-Fa-f]{2}>', '', label)
+    return label.strip() if label.strip() else "[EMPTY]"
+```
+4. **Attention Processing with Separate Normalization**:
+   - Layer averaging across heads and layers
+   - Separate normalization for input and output attention
+   - Epsilon handling (1e-8) to avoid division by zero
+5. **Interactive Features**:
+   - Token click handling to show specific connections
+   - Reset selection functionality
+   - Step-by-step navigation
+   - "All Connections" view
+### Key Implementation Details
+#### Model Handler (`core/model_handler.py`)
+- Use `unsloth/Llama-3.2-1B-Instruct` as default model
+- Implement proper device detection (CUDA if available)
+- Use bfloat16 for GPUs with compute capability >= 8.0
+- Generate with `output_attentions=True` and `return_dict_in_generate=True`
+#### Attention Processing (`core/attention.py`)
+- Extract attention for each generation step
+- Average across all layers and heads
+- Apply separate normalization (input and output attention normalized independently)
+- Handle edge cases (first token has no output-to-output attention)
+#### Visualization (`visualization/plotly_viz.py`)
+- **Layout**:
+  - Input tokens on left (x=0.1)
+  - Output tokens on right (x=0.9)
+  - Use linspace for y-coordinates
+- **Connections**:
+  - Blue lines for input→output attention
+  - Orange curved lines for output→output attention
+  - Line thickness proportional to attention weight
+  - Only show connections above threshold
+- **Interactivity**:
+  - Click on any token to filter connections
+  - Highlight selected token in yellow
+  - Show previously generated tokens in pink
+  - Current generating token in coral
+#### Gradio Interface (`app.py`)
+- **Input Controls**:
+  - Text area for prompt
+  - Slider for max tokens (1-50)
+  - Slider for attention threshold (0.0-0.2, step 0.001)
+- **Visualization Controls**:
+  - Step slider for navigation
+  - Reset Selection button
+  - Show All Connections button
+- **Display**:
+  - Generated text output
+  - Interactive Plotly graph
+### Performance Optimizations
+1. **Caching**:
+   - Cache generated attention matrices by prompt+max_tokens hash
+   - LRU cache with configurable size (default 10)
+   - Store processed attention, not raw tensors
+2. **Lazy Updates**:
+   - Only update changed traces when stepping through
+   - Don't recreate entire plot on threshold change
+   - Use Plotly's batch_update for multiple changes
+3. **Memory Management**:
+   - Clear raw attention tensors after processing
+   - Convert to CPU tensors for storage
+   - Use float32 instead of original dtype for visualization
+### Configuration (`config.py`)
+```python
+DEFAULT_MODEL = "unsloth/Llama-3.2-1B-Instruct"
+DEFAULT_PROMPT = "The old wizard walked through the forest"
+DEFAULT_MAX_TOKENS = 20
+DEFAULT_THRESHOLD = 0.05
+MIN_LINE_WIDTH = 0.5
+MAX_LINE_WIDTH = 3.0
+PLOT_WIDTH = 1000
+PLOT_HEIGHT = 600
+```
+### Deployment Preparation
+For Hugging Face Spaces deployment:
+1. Create proper `requirements.txt` with pinned versions
+2. Add `README.md` with Spaces metadata
+3. Ensure model downloads work in Spaces environment
+4. Set appropriate memory/GPU requirements
+## Testing Instructions
+1. **Basic Functionality**:
+   - Test with default prompt
+   - Verify attention matrices are extracted correctly
+   - Check visualization renders properly
+2. **Interactive Features**:
+   - Click on input tokens - should show only their connections to outputs
+   - Click on output tokens - should show incoming connections
+   - Reset button should clear selection
+   - Step slider should navigate through generation
+3. **Edge Cases**:
+   - Empty prompt
+   - Single token generation
+   - Very long prompts (>100 tokens)
+   - High/low threshold values
+## Development Workflow
+1. Start by implementing the model handler and verify generation works
+2. Add attention extraction and processing
+3. Create basic visualization without interactivity
+4. Add interactive features one by one
+5. Implement caching
+6. Create Gradio interface
+7. Test and optimize performance
+8. Prepare for deployment
+## Important Notes
+- Preserve the token cleaning logic exactly as it handles special tokens
+- Keep the BOS token removal logic for cleaner visualization
+- Maintain separate normalization (not joint) for attention weights
+- Ensure CUDA memory is properly managed to avoid OOM errors
+- Test with different model sizes based on available GPU memory
+## Common Issues and Solutions
+1. **CUDA OOM**: Reduce batch size or use smaller model
+2. **Slow Generation**: Enable GPU, use smaller model, or implement streaming
+3. **Visualization Lag**: Reduce number of traces, implement virtualization
+4. **Cache Misses**: Normalize prompt formatting before hashing
+When implementing, prioritize functionality over optimization initially. Get the core visualization working first, then add caching and performance improvements.

config.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class Config:
+    # Model settings
+    DEFAULT_MODEL: str = "HuggingFaceTB/SmolLM-135M-Instruct"
+    DEVICE: str = "cpu"  # Force CPU usage
+    # Generation settings
+    DEFAULT_MAX_TOKENS: int = 20
+    DEFAULT_PROMPT: str = "The old wizard walked through the forest when he"
+    DEFAULT_TEMPERATURE: float = 0.7
+    DEFAULT_TOP_P: float = 0.95
+    # Visualization settings
+    DEFAULT_THRESHOLD: float = 0.05
+    MIN_LINE_WIDTH: float = 0.5
+    MAX_LINE_WIDTH: float = 3.0
+    # Colors
+    INPUT_COLOR: str = "skyblue"
+    OUTPUT_COLOR: str = "coral"
+    CONNECTION_COLOR: str = "rgba(128, 128, 128, 0.3)"
+    # Cache settings
+    CACHE_SIZE: int = 10  # Number of generations to cache
+    # UI settings
+    PLOT_WIDTH: int = 1000
+    PLOT_HEIGHT: int = 600
+    # Node settings
+    NODE_SIZE: int = 15
+    NODE_LINE_WIDTH: float = 2
+    # Font settings
+    FONT_SIZE: int = 10
+    FONT_FAMILY: str = "Arial, sans-serif"

core/__init__.py ADDED Viewed

File without changes

core/attention.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import torch
+import numpy as np
+from typing import List, Dict, Any, Optional
+class AttentionProcessor:
+    @staticmethod
+    def process_attention_separate(
+        attention_data: Dict[str, Any],
+        input_tokens: List[str],
+        output_tokens: List[str]
+    ) -> List[Dict[str, torch.Tensor]]:
+        """
+        Process attention with separate normalization for input and output.
+        This preserves the relative importance within each group.
+        """
+        attentions = attention_data['attentions']
+        input_len_for_attention = attention_data['input_len_for_attention']
+        output_len = attention_data['output_len']
+        if not attentions:
+            return [{'input_attention': torch.zeros(input_len_for_attention),
+                    'output_attention': None} for _ in range(output_len)]
+        attention_matrices = []
+        num_steps = len(attentions)
+        if num_steps == 0:
+            print("Warning: No attention steps found in output.")
+            return [{'input_attention': torch.zeros(input_len_for_attention),
+                    'output_attention': None} for _ in range(output_len)]
+        steps_to_process = min(num_steps, output_len)
+        for i in range(steps_to_process):
+            step_attentions = attentions[i]
+            input_attention_layers = []
+            output_attention_layers = []
+            for layer_idx, layer_attn in enumerate(step_attentions):
+                try:
+                    # Extract attention to input tokens (skip BOS token at position 0)
+                    input_indices = slice(1, 1 + input_len_for_attention)
+                    if layer_attn.shape[3] >= input_indices.stop:
+                        # Get attention from current token (position 0 in generation) to input
+                        input_attn = layer_attn[0, :, 0, input_indices]
+                        input_attention_layers.append(input_attn)
+                        # Extract attention to previous output tokens
+                        if i > 0:
+                            output_indices = slice(1 + input_len_for_attention, 1 + input_len_for_attention + i)
+                            if layer_attn.shape[3] >= output_indices.stop:
+                                output_attn = layer_attn[0, :, 0, output_indices]
+                                output_attention_layers.append(output_attn)
+                            else:
+                                output_attention_layers.append(
+                                    torch.zeros((layer_attn.shape[1], i), device=layer_attn.device)
+                                )
+                    else:
+                        input_attention_layers.append(
+                            torch.zeros((layer_attn.shape[1], input_len_for_attention), device=layer_attn.device)
+                        )
+                        if i > 0:
+                            output_attention_layers.append(
+                                torch.zeros((layer_attn.shape[1], i), device=layer_attn.device)
+                            )
+                except Exception as e:
+                    print(f"Error processing attention at step {i}, layer {layer_idx}: {e}")
+                    input_attention_layers.append(
+                        torch.zeros((layer_attn.shape[1], input_len_for_attention), device=layer_attn.device)
+                    )
+                    if i > 0:
+                        output_attention_layers.append(
+                            torch.zeros((layer_attn.shape[1], i), device=layer_attn.device)
+                        )
+            # Average across layers and heads
+            if input_attention_layers:
+                avg_input_attn = torch.mean(torch.stack(input_attention_layers).float(), dim=[0, 1])
+            else:
+                avg_input_attn = torch.zeros(input_len_for_attention)
+            avg_output_attn = None
+            if i > 0 and output_attention_layers:
+                avg_output_attn = torch.mean(torch.stack(output_attention_layers).float(), dim=[0, 1])
+            elif i > 0:
+                avg_output_attn = torch.zeros(i)
+            # Normalize separately with epsilon for numerical stability
+            epsilon = 1e-8
+            input_sum = avg_input_attn.sum() + epsilon
+            normalized_input_attn = avg_input_attn / input_sum
+            normalized_output_attn = None
+            if i > 0 and avg_output_attn is not None:
+                output_sum = avg_output_attn.sum() + epsilon
+                normalized_output_attn = avg_output_attn / output_sum
+            attention_matrices.append({
+                'input_attention': normalized_input_attn.cpu(),
+                'output_attention': normalized_output_attn.cpu() if normalized_output_attn is not None else None,
+                'raw_input_attention': avg_input_attn.cpu(),  # Keep raw for analysis
+                'raw_output_attention': avg_output_attn.cpu() if avg_output_attn is not None else None
+            })
+        # Fill remaining steps with zeros if needed
+        while len(attention_matrices) < output_len:
+            attention_matrices.append({
+                'input_attention': torch.zeros(input_len_for_attention),
+                'output_attention': None,
+                'raw_input_attention': torch.zeros(input_len_for_attention),
+                'raw_output_attention': None
+            })
+        return attention_matrices
+    @staticmethod
+    def process_attention_joint(
+        attention_data: Dict[str, Any],
+        input_tokens: List[str],
+        output_tokens: List[str]
+    ) -> List[Dict[str, torch.Tensor]]:
+        """
+        Process attention with joint normalization across input and output.
+        This preserves the relative importance across all tokens.
+        """
+        attentions = attention_data['attentions']
+        input_len_for_attention = attention_data['input_len_for_attention']
+        output_len = attention_data['output_len']
+        if not attentions:
+            return [{'input_attention': torch.zeros(input_len_for_attention),
+                    'output_attention': None} for _ in range(output_len)]
+        attention_matrices = []
+        num_steps = len(attentions)
+        if num_steps == 0:
+            print("Warning: No attention steps found in output.")
+            return [{'input_attention': torch.zeros(input_len_for_attention),
+                    'output_attention': None} for _ in range(output_len)]
+        steps_to_process = min(num_steps, output_len)
+        for i in range(steps_to_process):
+            step_attentions = attentions[i]
+            input_attention_layers = []
+            output_attention_layers = []
+            for layer_idx, layer_attn in enumerate(step_attentions):
+                try:
+                    # Extract attention to input tokens
+                    input_indices = slice(1, 1 + input_len_for_attention)
+                    if layer_attn.shape[3] >= input_indices.stop:
+                        input_attn = layer_attn[0, :, 0, input_indices]
+                        input_attention_layers.append(input_attn)
+                        # Extract attention to previous output tokens
+                        if i > 0:
+                            output_indices = slice(1 + input_len_for_attention, 1 + input_len_for_attention + i)
+                            if layer_attn.shape[3] >= output_indices.stop:
+                                output_attn = layer_attn[0, :, 0, output_indices]
+                                output_attention_layers.append(output_attn)
+                            else:
+                                output_attention_layers.append(
+                                    torch.zeros((layer_attn.shape[1], i), device=layer_attn.device)
+                                )
+                    else:
+                        input_attention_layers.append(
+                            torch.zeros((layer_attn.shape[1], input_len_for_attention), device=layer_attn.device)
+                        )
+                        if i > 0:
+                            output_attention_layers.append(
+                                torch.zeros((layer_attn.shape[1], i), device=layer_attn.device)
+                            )
+                except Exception as e:
+                    print(f"Error processing attention at step {i}, layer {layer_idx}: {e}")
+                    input_attention_layers.append(
+                        torch.zeros((layer_attn.shape[1], input_len_for_attention), device=layer_attn.device)
+                    )
+                    if i > 0:
+                        output_attention_layers.append(
+                            torch.zeros((layer_attn.shape[1], i), device=layer_attn.device)
+                        )
+            # Average across layers and heads
+            if input_attention_layers:
+                avg_input_attn = torch.mean(torch.stack(input_attention_layers).float(), dim=[0, 1])
+            else:
+                avg_input_attn = torch.zeros(input_len_for_attention)
+            avg_output_attn = None
+            if i > 0 and output_attention_layers:
+                avg_output_attn = torch.mean(torch.stack(output_attention_layers).float(), dim=[0, 1])
+            elif i > 0:
+                avg_output_attn = torch.zeros(i)
+            # Joint normalization
+            epsilon = 1e-8
+            if i > 0 and avg_output_attn is not None:
+                # Concatenate and normalize together
+                combined_attn = torch.cat([avg_input_attn, avg_output_attn])
+                sum_attn = combined_attn.sum() + epsilon
+                normalized_combined = combined_attn / sum_attn
+                normalized_input_attn = normalized_combined[:input_len_for_attention]
+                normalized_output_attn = normalized_combined[input_len_for_attention:]
+            else:
+                # Only input attention available
+                sum_attn = avg_input_attn.sum() + epsilon
+                normalized_input_attn = avg_input_attn / sum_attn
+                normalized_output_attn = None
+            attention_matrices.append({
+                'input_attention': normalized_input_attn.cpu(),
+                'output_attention': normalized_output_attn.cpu() if normalized_output_attn is not None else None
+            })
+        # Fill remaining steps with zeros if needed
+        while len(attention_matrices) < output_len:
+            attention_matrices.append({
+                'input_attention': torch.zeros(input_len_for_attention),
+                'output_attention': None
+            })
+        return attention_matrices
+    @staticmethod
+    def extract_attention_for_step(
+        attention_data: Dict[str, Any],
+        step: int,
+        input_len: int
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Extract attention weights for a specific generation step.
+        Optimized to only process the needed step.
+        """
+        attentions = attention_data['attentions']
+        if step >= len(attentions):
+            return {
+                'input_attention': torch.zeros(input_len),
+                'output_attention': None
+            }
+        step_attentions = attentions[step]
+        input_attention_layers = []
+        output_attention_layers = []
+        for layer_attn in step_attentions:
+            # Extract input attention
+            input_indices = slice(1, 1 + input_len)
+            if layer_attn.shape[3] >= input_indices.stop:
+                input_attn = layer_attn[0, :, 0, input_indices]
+                input_attention_layers.append(input_attn)
+                # Extract output attention if there are previous outputs
+                if step > 0:
+                    output_indices = slice(1 + input_len, 1 + input_len + step)
+                    if layer_attn.shape[3] >= output_indices.stop:
+                        output_attn = layer_attn[0, :, 0, output_indices]
+                        output_attention_layers.append(output_attn)
+        # Average and normalize
+        if input_attention_layers:
+            avg_input = torch.mean(torch.stack(input_attention_layers).float(), dim=[0, 1])
+            normalized_input = avg_input / (avg_input.sum() + 1e-8)
+        else:
+            normalized_input = torch.zeros(input_len)
+        normalized_output = None
+        if step > 0 and output_attention_layers:
+            avg_output = torch.mean(torch.stack(output_attention_layers).float(), dim=[0, 1])
+            normalized_output = avg_output / (avg_output.sum() + 1e-8)
+        return {
+            'input_attention': normalized_input.cpu(),
+            'output_attention': normalized_output.cpu() if normalized_output is not None else None
+        }

core/cache.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from typing import Dict, Any, Optional
+import hashlib
+import json
+import torch
+import pickle
+import io
+class AttentionCache:
+    def __init__(self, max_size: int = 10):
+        self.cache = {}
+        self.access_order = []
+        self.max_size = max_size
+    def get_key(self, prompt: str, max_tokens: int, model: str, temperature: float = 0.7) -> str:
+        """Generate cache key from parameters"""
+        data = f"{prompt}_{max_tokens}_{model}_{temperature}"
+        return hashlib.md5(data.encode()).hexdigest()
+    def get(self, key: str) -> Optional[Dict[str, Any]]:
+        """Retrieve cached data"""
+        if key in self.cache:
+            # Move to end (LRU)
+            self.access_order.remove(key)
+            self.access_order.append(key)
+            return self._deserialize(self.cache[key])
+        return None
+    def set(self, key: str, data: Dict[str, Any]):
+        """Store data in cache"""
+        if len(self.cache) >= self.max_size:
+            # Remove least recently used
+            oldest = self.access_order.pop(0)
+            del self.cache[oldest]
+        self.cache[key] = self._serialize(data)
+        self.access_order.append(key)
+    def _serialize(self, data: Dict[str, Any]) -> bytes:
+        """Serialize data for caching, handling torch tensors"""
+        serialized = {}
+        for key, value in data.items():
+            if isinstance(value, list) and len(value) > 0:
+                # Check if it's a list of dicts with tensors (attention matrices)
+                if isinstance(value[0], dict) and any(isinstance(v, torch.Tensor) for v in value[0].values()):
+                    # Convert tensors to CPU and serialize
+                    serialized_list = []
+                    for item in value:
+                        serialized_item = {}
+                        for k, v in item.items():
+                            if isinstance(v, torch.Tensor):
+                                serialized_item[k] = v.cpu().numpy()
+                            else:
+                                serialized_item[k] = v
+                        serialized_list.append(serialized_item)
+                    serialized[key] = serialized_list
+                else:
+                    serialized[key] = value
+            else:
+                serialized[key] = value
+        buffer = io.BytesIO()
+        pickle.dump(serialized, buffer)
+        return buffer.getvalue()
+    def _deserialize(self, data: bytes) -> Dict[str, Any]:
+        """Deserialize data from cache, restoring torch tensors"""
+        buffer = io.BytesIO(data)
+        deserialized = pickle.load(buffer)
+        # Convert numpy arrays back to tensors where needed
+        for key, value in deserialized.items():
+            if isinstance(value, list) and len(value) > 0:
+                if isinstance(value[0], dict):
+                    # Check if it contains numpy arrays (was tensors)
+                    import numpy as np
+                    for item in value:
+                        for k, v in item.items():
+                            if isinstance(v, np.ndarray):
+                                item[k] = torch.from_numpy(v)
+        return deserialized
+    def clear(self):
+        """Clear the entire cache"""
+        self.cache.clear()
+        self.access_order.clear()
+    def size(self) -> int:
+        """Get current cache size"""
+        return len(self.cache)

core/model_handler.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from typing import Tuple, Optional, List, Dict, Any
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module='transformers.generation')
+class ModelHandler:
+    def __init__(self, model_name: str = None, config=None):
+        self.model = None
+        self.tokenizer = None
+        self.device = None
+        self.model_name = model_name
+        self.config = config
+    def load_model(self, model_name: str = None) -> Tuple[bool, str]:
+        """Load model with optimized settings"""
+        if model_name:
+            self.model_name = model_name
+        if not self.model_name:
+            return False, "No model name provided"
+        try:
+            print(f"Loading model: {self.model_name}...")
+            # Load tokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            # Determine device and dtype
+            if self.config and hasattr(self.config, 'DEVICE'):
+                self.device = self.config.DEVICE
+                # If config specifies CPU, force it even if CUDA is available
+                if self.device == "cpu":
+                    print("Forcing CPU usage as specified in config")
+                elif self.device == "cuda" and not torch.cuda.is_available():
+                    print("CUDA requested but not available, falling back to CPU")
+                    self.device = "cpu"
+            else:
+                # Fallback to auto-detection if no config provided
+                self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            # Use bfloat16 for Ampere GPUs (compute capability >= 8.0), otherwise float32
+            if self.device == "cuda" and torch.cuda.is_available():
+                capability = torch.cuda.get_device_capability()
+                if capability[0] >= 8:
+                    dtype = torch.bfloat16
+                else:
+                    dtype = torch.float32
+            else:
+                dtype = torch.float32
+            # Load model
+            try:
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    torch_dtype=dtype,
+                    attn_implementation="eager"  # Force eager attention for attention extraction
+                ).to(self.device)
+                print(f"Model loaded on {self.device} with dtype {dtype} (eager attention)")
+            except Exception as e:
+                print(f"Error loading model with specific dtype: {e}")
+                print("Attempting to load without specific dtype...")
+                try:
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        self.model_name,
+                        attn_implementation="eager"
+                    ).to(self.device)
+                    print(f"Model loaded on {self.device} (default dtype, eager attention)")
+                except Exception as e2:
+                    print(f"Error with eager attention: {e2}")
+                    print("Loading with default settings...")
+                    self.model = AutoModelForCausalLM.from_pretrained(self.model_name).to(self.device)
+                    print(f"Model loaded on {self.device} (default settings)")
+            # Handle pad token
+            if self.tokenizer.pad_token is None:
+                if self.tokenizer.eos_token:
+                    print("Setting pad_token to eos_token")
+                    self.tokenizer.pad_token = self.tokenizer.eos_token
+                    if hasattr(self.model.config, 'pad_token_id') and self.model.config.pad_token_id is None:
+                        self.model.config.pad_token_id = self.tokenizer.eos_token_id
+                else:
+                    print("Warning: No eos_token found to set as pad_token.")
+            return True, f"Model loaded successfully on {self.device}"
+        except Exception as e:
+            return False, f"Error loading model: {str(e)}"
+    def generate_with_attention(
+        self,
+        prompt: str,
+        max_tokens: int = 30,
+        temperature: float = 0.7,
+        top_p: float = 0.95
+    ) -> Tuple[Optional[List], List[str], List[str], str]:
+        """
+        Generate text and capture attention weights
+        Returns: (attention_matrices, output_tokens, input_tokens, generated_text)
+        """
+        if not self.model or not self.tokenizer:
+            return None, [], [], "Model not loaded"
+        # Encode input
+        input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device)
+        input_len_raw = input_ids.shape[1]
+        print(f"Generating with input length: {input_len_raw}, max_new_tokens: {max_tokens}")
+        # Generate with attention
+        with torch.no_grad():
+            attention_mask = torch.ones_like(input_ids)
+            gen_kwargs = {
+                "attention_mask": attention_mask,
+                "max_new_tokens": max_tokens,
+                "output_attentions": True,
+                "return_dict_in_generate": True,
+                "temperature": temperature,
+                "top_p": top_p,
+                "do_sample": temperature > 0
+            }
+            if self.tokenizer.pad_token_id is not None:
+                gen_kwargs["pad_token_id"] = self.tokenizer.pad_token_id
+            try:
+                output = self.model.generate(input_ids, **gen_kwargs)
+            except Exception as e:
+                print(f"Error during generation: {e}")
+                return None, [], [], f"Error during generation: {str(e)}"
+        # Extract generated tokens
+        full_sequence = output.sequences[0]
+        if full_sequence.shape[0] > input_len_raw:
+            generated_ids = full_sequence[input_len_raw:]
+        else:
+            generated_ids = torch.tensor([], dtype=torch.long, device=self.device)
+        # Convert to tokens
+        output_tokens = self.tokenizer.convert_ids_to_tokens(generated_ids, skip_special_tokens=False)
+        input_tokens_raw = self.tokenizer.convert_ids_to_tokens(input_ids[0], skip_special_tokens=False)
+        # Handle BOS token removal from visualization
+        input_tokens = input_tokens_raw
+        input_len_for_attention = input_len_raw
+        bos_token = self.tokenizer.bos_token or '<|begin_of_text|>'
+        if input_tokens_raw and input_tokens_raw[0] == bos_token:
+            input_tokens = input_tokens_raw[1:]
+            input_len_for_attention = input_len_raw - 1
+        # Handle EOS token removal
+        eos_token = self.tokenizer.eos_token or '<|end_of_text|>'
+        if output_tokens and output_tokens[-1] == eos_token:
+            output_tokens = output_tokens[:-1]
+            generated_ids = generated_ids[:-1]
+        # Decode generated text
+        generated_text = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
+        # Extract attention weights
+        attentions = getattr(output, 'attentions', None)
+        if attentions is None:
+            print("Warning: 'attentions' not found in model output. Cannot visualize attention.")
+            return None, output_tokens, input_tokens, generated_text
+        # Return raw attention, tokens, and metadata
+        return {
+            'attentions': attentions,
+            'input_len_for_attention': input_len_for_attention,
+            'output_len': len(output_tokens)
+        }, output_tokens, input_tokens, generated_text
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the loaded model"""
+        if not self.model:
+            return {"loaded": False}
+        return {
+            "loaded": True,
+            "model_name": self.model_name,
+            "device": str(self.device),
+            "num_parameters": sum(p.numel() for p in self.model.parameters()),
+            "dtype": str(next(self.model.parameters()).dtype),
+            "vocab_size": self.tokenizer.vocab_size if self.tokenizer else 0
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch>=2.0.0
+transformers>=4.30.0
+gradio>=4.0.0
+plotly>=5.14.0
+numpy>=1.24.0
+accelerate>=0.20.0
+sentencepiece>=0.1.99
+protobuf>=3.20.0

visualization/__init__.py ADDED Viewed

File without changes

visualization/d3_viz.py ADDED Viewed

	@@ -0,0 +1,356 @@

+"""
+D3.js visualization module for interactive token attention visualization.
+"""
+def create_d3_visualization(data):
+    """
+    Generate a complete, self-contained HTML string with embedded D3.js visualization.
+    Args:
+        data (dict): JSON structure with nodes and links from prepare_d3_data()
+    Returns:
+        str: Complete HTML string with embedded D3.js, CSS, and JavaScript
+    """
+    # Get nodes by type
+    input_nodes = [node for node in data.get('nodes', []) if node.get('type') == 'input']
+    output_nodes = [node for node in data.get('nodes', []) if node.get('type') == 'output']
+    links = data.get('links', [])
+    # SVG dimensions
+    width = 800
+    height = max(400, max(len(input_nodes), len(output_nodes)) * 50 + 100)
+    # Calculate positions
+    input_x = 100
+    output_x = width - 100
+    # Position nodes vertically
+    def get_y_pos(index, total):
+        if total <= 1:
+            return height // 2
+        return 80 + (index * (height - 160)) / (total - 1)
+    # Start building SVG
+    svg_html = f"""
+    <div style='display: flex; flex-direction: column; align-items: center; border: 1px solid #ddd; padding: 20px; margin: 10px; background: white; border-radius: 8px;'>
+        <div style='text-align: center; margin-bottom: 15px;'>
+            <h3 style='margin: 0; color: #333;'>Token Attention Visualization</h3>
+            <p style='margin: 5px 0; color: #666;'>Step {data.get('step', 0) + 1} | {len(input_nodes)} input → {len(output_nodes)} output | {len(links)} connections</p>
+        </div>
+        <svg width="{width}" height="{height}" style='border: 1px solid #eee; background: #fafafa; display: block;'>
+            <!-- Background grid -->
+            <defs>
+                <pattern id="grid" width="20" height="20" patternUnits="userSpaceOnUse">
+                    <path d="M 20 0 L 0 0 0 20" fill="none" stroke="#f0f0f0" stroke-width="1"/>
+                </pattern>
+            </defs>
+            <rect width="100%" height="100%" fill="url(#grid)" />
+            <!-- Column headers -->
+            <text x="{input_x}" y="30" text-anchor="middle" font-size="16" font-weight="bold" fill="#4285f4">Input Tokens</text>
+            <text x="{output_x}" y="30" text-anchor="middle" font-size="16" font-weight="bold" fill="#ea4335">Output Tokens</text>
+    """
+    # Draw connections first (so they appear behind nodes)
+    for link in links:
+        # Find source and target nodes
+        source_node = next((n for n in input_nodes + output_nodes if n['id'] == link['source']), None)
+        target_node = next((n for n in input_nodes + output_nodes if n['id'] == link['target']), None)
+        if source_node and target_node:
+            # Get positions
+            if source_node['type'] == 'input':
+                source_idx = next((i for i, n in enumerate(input_nodes) if n['id'] == source_node['id']), 0)
+                source_y = get_y_pos(source_idx, len(input_nodes))
+                source_x_pos = input_x + 20  # Offset from center of node
+            else:
+                source_idx = next((i for i, n in enumerate(output_nodes) if n['id'] == source_node['id']), 0)
+                source_y = get_y_pos(source_idx, len(output_nodes))
+                source_x_pos = output_x - 20
+            if target_node['type'] == 'input':
+                target_idx = next((i for i, n in enumerate(input_nodes) if n['id'] == target_node['id']), 0)
+                target_y = get_y_pos(target_idx, len(input_nodes))
+                target_x_pos = input_x - 20
+            else:
+                target_idx = next((i for i, n in enumerate(output_nodes) if n['id'] == target_node['id']), 0)
+                target_y = get_y_pos(target_idx, len(output_nodes))
+                target_x_pos = output_x - 20
+            # Line properties based on weight
+            stroke_width = max(1, min(8, link['weight'] * 20))
+            opacity = max(0.3, min(1.0, link['weight'] * 2))
+            color = "#4285f4" if link['type'] == 'input_to_output' else "#ea4335"
+            # Create straight line
+            svg_html += f'''
+                <line x1="{source_x_pos}" y1="{source_y}" x2="{target_x_pos}" y2="{target_y}"
+                      stroke="{color}" stroke-width="{stroke_width}" opacity="{opacity}"/>
+                '''
+    # Draw input nodes
+    for i, node in enumerate(input_nodes):
+        y = get_y_pos(i, len(input_nodes))
+        token_text = node['token']
+        # Clean token text - remove special prefix characters
+        if token_text.startswith('Ġ'):
+            token_text = token_text[1:]  # Remove Ġ prefix
+        if token_text.startswith('▁'):
+            token_text = token_text[1:]  # Remove ▁ prefix (SentencePiece)
+        if token_text.startswith('##'):
+            token_text = token_text[2:]  # Remove ## prefix (BERT subwords)
+        if len(token_text) > 15:
+            token_text = token_text[:13] + "..."
+        svg_html += f'''
+            <g>
+                <circle cx="{input_x}" cy="{y}" r="12" fill="#4285f4" stroke="#1a73e8" stroke-width="2" opacity="0.9"/>
+                <text x="{input_x - 20}" y="{y + 4}" text-anchor="end" font-size="12" fill="#333" font-weight="bold">{token_text}</text>
+            </g>
+        '''
+    # Draw output nodes
+    for i, node in enumerate(output_nodes):
+        y = get_y_pos(i, len(output_nodes))
+        token_text = node['token']
+        # Clean token text - remove special prefix characters
+        if token_text.startswith('Ġ'):
+            token_text = token_text[1:]  # Remove Ġ prefix
+        if token_text.startswith('▁'):
+            token_text = token_text[1:]  # Remove ▁ prefix (SentencePiece)
+        if token_text.startswith('##'):
+            token_text = token_text[2:]  # Remove ## prefix (BERT subwords)
+        if len(token_text) > 15:
+            token_text = token_text[:13] + "..."
+        svg_html += f'''
+            <g>
+                <circle cx="{output_x}" cy="{y}" r="12" fill="#ea4335" stroke="#d33b2c" stroke-width="2" opacity="0.9"/>
+                <text x="{output_x + 20}" y="{y + 4}" text-anchor="start" font-size="12" fill="#333" font-weight="bold">{token_text}</text>
+            </g>
+        '''
+    # Close SVG and add legend
+    svg_html += '''
+        </svg>
+        <div style='margin-top: 20px; padding: 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px;'>
+            <div style='display: flex; justify-content: center; align-items: center; gap: 32px; font-size: 12px; color: #64748b; font-family: Inter, sans-serif;'>
+                <div style='display: flex; align-items: center; gap: 8px;'>
+                    <div style='width: 16px; height: 2px; background: #4285f4; border-radius: 1px;'></div>
+                    <span style='color: #1e293b; font-weight: 500;'>Input → Output</span>
+                </div>
+                <div style='display: flex; align-items: center; gap: 8px;'>
+                    <div style='display: flex; gap: 2px;'>
+                        <div style='width: 8px; height: 1px; background: #64748b;'></div>
+                        <div style='width: 8px; height: 2px; background: #64748b;'></div>
+                        <div style='width: 8px; height: 3px; background: #64748b;'></div>
+                    </div>
+                    <span style='color: #1e293b; font-weight: 500;'>Line thickness = weight</span>
+                </div>
+            </div>
+        </div>
+    </div>
+    '''
+    return svg_html
+def create_d3_visualization_old(data):
+    """
+    OLD VERSION - Generate a complete, self-contained HTML string with embedded D3.js visualization.
+    Args:
+        data (dict): JSON structure with nodes and links from prepare_d3_data()
+    Returns:
+        str: Complete HTML string with embedded D3.js, CSS, and JavaScript
+    """
+    html_template = f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <style>
+        .visualization-container {{
+            width: 100%;
+            height: 600px;
+            border: 1px solid #ddd;
+            border-radius: 8px;
+            background: #fafafa;
+            position: relative;
+            overflow: hidden;
+        }}
+        .node {{
+            cursor: pointer;
+            stroke-width: 2px;
+        }}
+        .node.input {{
+            fill: #4285f4;
+            stroke: #1a73e8;
+        }}
+        .node.output {{
+            fill: #ea4335;
+            stroke: #d33b2c;
+        }}
+        .node.highlighted {{
+            stroke-width: 4px;
+            stroke: #ff6d00;
+        }}
+        .node.dimmed {{
+            opacity: 0.3;
+        }}
+        .link {{
+            stroke: #666;
+            stroke-opacity: 0.6;
+            fill: none;
+        }}
+        .link.input-to-output {{
+            stroke: #4285f4;
+        }}
+        .link.output-to-output {{
+            stroke: #ea4335;
+        }}
+        .link.highlighted {{
+            stroke-opacity: 1;
+            stroke-width: 3px;
+        }}
+        .link.dimmed {{
+            stroke-opacity: 0.1;
+        }}
+        .token-label {{
+            font-family: 'Courier New', monospace;
+            font-size: 12px;
+            text-anchor: middle;
+            dominant-baseline: central;
+            fill: white;
+            font-weight: bold;
+            pointer-events: none;
+        }}
+        .reset-btn {{
+            position: absolute;
+            top: 10px;
+            right: 10px;
+            padding: 8px 16px;
+            background: #4285f4;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+            font-size: 12px;
+            z-index: 100;
+        }}
+        .reset-btn:hover {{
+            background: #1a73e8;
+        }}
+        .info-panel {{
+            position: absolute;
+            bottom: 10px;
+            left: 10px;
+            background: rgba(255, 255, 255, 0.9);
+            padding: 8px 12px;
+            border-radius: 4px;
+            font-size: 11px;
+            font-family: Arial, sans-serif;
+            border: 1px solid #ddd;
+        }}
+    </style>
+</head>
+<body>
+    <div class="visualization-container" id="viz-container">
+        <button class="reset-btn" onclick="resetView()">Reset View</button>
+        <div class="info-panel">
+            <div>Step: {data.get('step', 0) + 1} / {data.get('total_steps', 1)}</div>
+            <div>Nodes: {len(data.get('nodes', []))} | Links: {len(data.get('links', []))}</div>
+            <div>Click nodes to filter connections</div>
+        </div>
+        <svg id="visualization"></svg>
+    </div>
+    <script>
+        // Simple visualization without D3 first - just to test
+        const data = {repr(data)};
+        // Create simple HTML visualization
+        const container = document.getElementById("viz-container");
+        let html = "<div style='padding: 20px;'>";
+        html += "<h3>Debug Info</h3>";
+        html += "<p>Nodes: " + data.nodes.length + "</p>";
+        html += "<p>Links: " + data.links.length + "</p>";
+        // Simple SVG without D3
+        html += "<svg width='800' height='400' style='border: 1px solid #ccc; background: white;'>";
+        // Draw input nodes (left side)
+        const inputNodes = data.nodes.filter(n => n.type === "input");
+        const outputNodes = data.nodes.filter(n => n.type === "output");
+        inputNodes.forEach((node, i) => {{
+            const y = 50 + i * 40;
+            html += `<circle cx="50" cy="${{y}}" r="15" fill="#4285f4" stroke="#1a73e8" stroke-width="2"/>`;
+            html += `<text x="80" y="${{y + 5}}" font-size="12" fill="black">${{node.token}}</text>`;
+        }});
+        // Draw output nodes (right side)
+        outputNodes.forEach((node, i) => {{
+            const y = 50 + i * 40;
+            html += `<circle cx="700" cy="${{y}}" r="15" fill="#ea4335" stroke="#d33b2c" stroke-width="2"/>`;
+            html += `<text x="620" y="${{y + 5}}" font-size="12" fill="black" text-anchor="end">${{node.token}}</text>`;
+        }});
+        // Draw links
+        data.links.forEach(link => {{
+            const sourceNode = data.nodes.find(n => n.id === link.source);
+            const targetNode = data.nodes.find(n => n.id === link.target);
+            if (sourceNode && targetNode) {{
+                const sourceIdx = sourceNode.type === "input" ?
+                    inputNodes.findIndex(n => n.id === sourceNode.id) :
+                    outputNodes.findIndex(n => n.id === sourceNode.id);
+                const targetIdx = targetNode.type === "input" ?
+                    inputNodes.findIndex(n => n.id === targetNode.id) :
+                    outputNodes.findIndex(n => n.id === targetNode.id);
+                const sourceX = sourceNode.type === "input" ? 65 : 685;
+                const targetX = targetNode.type === "input" ? 65 : 685;
+                const sourceY = 50 + sourceIdx * 40;
+                const targetY = 50 + targetIdx * 40;
+                const strokeWidth = Math.max(1, link.weight * 10);
+                const color = link.type === "input_to_output" ? "#4285f4" : "#ea4335";
+                html += `<line x1="${{sourceX}}" y1="${{sourceY}}" x2="${{targetX}}" y2="${{targetY}}" stroke="${{color}}" stroke-width="${{strokeWidth}}" opacity="0.6"/>`;
+            }}
+        }});
+        html += "</svg>";
+        html += "</div>";
+        container.innerHTML = html;
+    </script>
+</body>
+</html>
+    """
+    return html_template

visualization/plotly_viz.py ADDED Viewed

	@@ -0,0 +1,443 @@

+import plotly.graph_objects as go
+import numpy as np
+from typing import List, Dict, Any, Optional, Tuple, Callable
+from .utils import (
+    clean_label, scale_weight_to_width, scale_weight_to_opacity,
+    get_node_positions, create_spline_path, format_attention_text,
+    get_color_for_weight, truncate_token_label
+)
+class AttentionVisualizer:
+    def __init__(self, config):
+        self.config = config
+        self.current_state = {
+            'selected_token': None,
+            'selected_type': None,
+            'current_step': 0,
+            'show_all': True
+        }
+        self.traces_info = {
+            'input_to_output': [],
+            'output_to_output': [],
+            'input_nodes_idx': None,
+            'output_nodes_idx': None
+        }
+    def create_interactive_plot(
+        self,
+        input_tokens: List[str],
+        output_tokens: List[str],
+        attention_matrices: List[Dict],
+        threshold: float = 0.05,
+        initial_step: int = 0,
+        normalization: str = "separate"
+    ) -> go.Figure:
+        """
+        Create the main interactive visualization.
+        """
+        # Clean labels
+        input_labels = [clean_label(token) for token in input_tokens]
+        output_labels = [clean_label(token) for token in output_tokens]
+        num_input = len(input_labels)
+        num_output = len(output_labels)
+        num_steps = len(attention_matrices)
+        if num_input == 0 or num_output == 0 or num_steps == 0:
+            return self._create_empty_figure("No data to visualize")
+        # Get node positions
+        input_x, input_y, output_x, output_y = get_node_positions(num_input, num_output)
+        # Create connection traces
+        traces = []
+        self.traces_info = {
+            'input_to_output': [],
+            'output_to_output': [],
+            'input_nodes_idx': None,
+            'output_nodes_idx': None
+        }
+        # Input to output connections
+        for j in range(num_output):
+            for i in range(num_input):
+                weight = 0
+                if j < len(attention_matrices):
+                    weight = attention_matrices[j]['input_attention'][i].item()
+                opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                width = scale_weight_to_width(weight) if opacity > 0 else 0.5
+                trace = go.Scatter(
+                    x=[input_x[i], output_x[j]],
+                    y=[input_y[i], output_y[j]],
+                    mode="lines",
+                    line=dict(
+                        color=get_color_for_weight(weight, "blue"),
+                        width=width
+                    ),
+                    opacity=opacity,
+                    showlegend=False,
+                    hoverinfo='text',
+                    text=format_attention_text(input_labels[i], output_labels[j], weight),
+                    hoverlabel=dict(bgcolor="lightskyblue", bordercolor="darkblue"),
+                    name=f"in_to_out_{i}_{j}",
+                    customdata=[(i, j)],
+                    hovertemplate="Input→Output %{customdata[0]}→%{customdata[1]}<extra></extra>"
+                )
+                traces.append(trace)
+                self.traces_info['input_to_output'].append({
+                    'input_idx': i,
+                    'output_idx': j,
+                    'trace_idx': len(traces) - 1
+                })
+        # Output to output connections
+        for j in range(1, num_output):
+            for i in range(j):
+                weight = 0
+                if j < len(attention_matrices) and attention_matrices[j]['output_attention'] is not None:
+                    if i < len(attention_matrices[j]['output_attention']):
+                        weight = attention_matrices[j]['output_attention'][i].item()
+                opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                width = scale_weight_to_width(weight) if opacity > 0 else 0.5
+                # Create spline path for curved connection
+                path_x, path_y = create_spline_path(
+                    output_x[i], output_y[i],
+                    output_x[j], output_y[j],
+                    control_offset=0.15
+                )
+                trace = go.Scatter(
+                    x=path_x,
+                    y=path_y,
+                    mode="lines",
+                    line=dict(
+                        color=get_color_for_weight(weight, "orange"),
+                        width=width,
+                        shape='spline'
+                    ),
+                    opacity=opacity,
+                    showlegend=False,
+                    hoverinfo='text',
+                    text=format_attention_text(output_labels[i], output_labels[j], weight),
+                    hoverlabel=dict(bgcolor="moccasin", bordercolor="darkorange"),
+                    name=f"out_to_out_{i}_{j}"
+                )
+                traces.append(trace)
+                self.traces_info['output_to_output'].append({
+                    'from_idx': i,
+                    'to_idx': j,
+                    'trace_idx': len(traces) - 1
+                })
+        # Input nodes
+        input_trace = go.Scatter(
+            x=input_x,
+            y=input_y,
+            mode="markers+text",
+            marker=dict(
+                size=self.config.NODE_SIZE,
+                color=self.config.INPUT_COLOR,
+                line=dict(width=self.config.NODE_LINE_WIDTH, color="darkblue")
+            ),
+            selected=dict(
+                marker=dict(
+                    size=self.config.NODE_SIZE + 6,
+                    color="rgba(0, 0, 200, 0.9)"
+                )
+            ),
+            unselected=dict(
+                marker=dict(
+                    opacity=0.65
+                )
+            ),
+            text=[truncate_token_label(label) for label in input_labels],
+            textfont=dict(size=self.config.FONT_SIZE, family=self.config.FONT_FAMILY),
+            textposition="middle left",
+            name="Input Tokens",
+            hovertemplate="Input: %{text}<br>Click to filter connections<extra></extra>",
+            customdata=[(i, 'input') for i in range(num_input)]
+        )
+        traces.append(input_trace)
+        self.traces_info['input_nodes_idx'] = len(traces) - 1
+        # Output nodes
+        output_colors = []
+        for j in range(num_output):
+            if j <= initial_step:
+                output_colors.append(self.config.OUTPUT_COLOR)
+            else:
+                output_colors.append("rgba(230, 230, 230, 0.8)")
+        output_trace = go.Scatter(
+            x=output_x,
+            y=output_y,
+            mode="markers+text",
+            marker=dict(
+                size=self.config.NODE_SIZE,
+                color=output_colors,
+                line=dict(width=self.config.NODE_LINE_WIDTH, color="darkred")
+            ),
+            selected=dict(
+                marker=dict(
+                    size=self.config.NODE_SIZE + 6,
+                    color="rgba(200, 80, 0, 0.9)"
+                )
+            ),
+            unselected=dict(
+                marker=dict(
+                    opacity=0.65
+                )
+            ),
+            text=[truncate_token_label(label) for label in output_labels],
+            textfont=dict(size=self.config.FONT_SIZE, family=self.config.FONT_FAMILY),
+            textposition="middle right",
+            name="Output Tokens",
+            hovertemplate="Output: %{text}<br>Click to filter connections<extra></extra>",
+            customdata=[(i, 'output') for i in range(num_output)]
+        )
+        traces.append(output_trace)
+        self.traces_info['output_nodes_idx'] = len(traces) - 1
+        # Create figure
+        fig = go.Figure(data=traces)
+        # Update layout
+        title = f"Token Attention Flow ({normalization.capitalize()} Normalization)"
+        fig.update_layout(
+            title=title,
+            xaxis=dict(
+                range=[-0.1, 1.1],
+                showgrid=False,
+                zeroline=False,
+                showticklabels=False,
+                fixedrange=True
+            ),
+            yaxis=dict(
+                range=[0, 1],
+                showgrid=False,
+                zeroline=False,
+                showticklabels=False,
+                fixedrange=True
+            ),
+            hovermode="closest",
+            clickmode="event+select",
+            dragmode="select",
+            width=self.config.PLOT_WIDTH,
+            height=max(self.config.PLOT_HEIGHT, num_input * 30, num_output * 30),
+            plot_bgcolor="white",
+            margin=dict(l=150, r=200, t=80, b=80),
+            hoverdistance=20,
+            hoverlabel=dict(font_size=12, font_family=self.config.FONT_FAMILY),
+            showlegend=True,
+            legend=dict(
+                yanchor="top",
+                y=0.99,
+                xanchor="left",
+                x=1.02
+            ),
+            # Preserve UI state on updates
+            uirevision="constant"
+        )
+        # Add legend traces
+        fig.add_trace(go.Scatter(
+            x=[None], y=[None],
+            mode='lines',
+            line=dict(color='rgba(0, 0, 255, 0.6)', width=2),
+            name='Input→Output'
+        ))
+        fig.add_trace(go.Scatter(
+            x=[None], y=[None],
+            mode='lines',
+            line=dict(color='rgba(255, 165, 0, 0.6)', width=2),
+            name='Output→Output'
+        ))
+        # Add annotations
+        fig.add_annotation(
+            x=0.5, y=0.02,
+            text=f"Step {initial_step} / {num_steps-1}: Generating '{output_labels[initial_step] if initial_step < len(output_labels) else ''}'",
+            showarrow=False,
+            font=dict(size=12, color="darkred"),
+            xref="paper", yref="paper"
+        )
+        fig.add_annotation(
+            x=0.01, y=0.98,
+            text="💡 Click tokens to filter connections | Use step slider to navigate generation",
+            showarrow=False,
+            font=dict(size=10, color="gray"),
+            align="left",
+            xref="paper", yref="paper"
+        )
+        self.current_state['current_step'] = initial_step
+        return fig
+    def update_for_step(
+        self,
+        fig: go.Figure,
+        step: int,
+        attention_matrices: List[Dict],
+        output_tokens: List[str],
+        threshold: float = 0.05
+    ) -> go.Figure:
+        """
+        Update visualization for a specific generation step.
+        """
+        if step >= len(attention_matrices):
+            return fig
+        output_labels = [clean_label(token) for token in output_tokens]
+        with fig.batch_update():
+            # Update input-to-output connections for current step
+            for conn_info in self.traces_info['input_to_output']:
+                if conn_info['output_idx'] == step:
+                    weight = attention_matrices[step]['input_attention'][conn_info['input_idx']].item()
+                    opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                    width = scale_weight_to_width(weight) if opacity > 0 else 0.5
+                    trace_idx = conn_info['trace_idx']
+                    fig.data[trace_idx].opacity = opacity
+                    fig.data[trace_idx].line.width = width
+                    fig.data[trace_idx].line.color = get_color_for_weight(weight, "blue")
+                elif conn_info['output_idx'] > step:
+                    # Hide future connections
+                    fig.data[conn_info['trace_idx']].opacity = 0
+            # Update output-to-output connections
+            for conn_info in self.traces_info['output_to_output']:
+                if conn_info['to_idx'] == step and attention_matrices[step]['output_attention'] is not None:
+                    if conn_info['from_idx'] < len(attention_matrices[step]['output_attention']):
+                        weight = attention_matrices[step]['output_attention'][conn_info['from_idx']].item()
+                        opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                        width = scale_weight_to_width(weight) if opacity > 0 else 0.5
+                        trace_idx = conn_info['trace_idx']
+                        fig.data[trace_idx].opacity = opacity
+                        fig.data[trace_idx].line.width = width
+                        fig.data[trace_idx].line.color = get_color_for_weight(weight, "orange")
+                elif conn_info['to_idx'] > step:
+                    # Hide future connections
+                    fig.data[conn_info['trace_idx']].opacity = 0
+            # Update output node colors
+            output_colors = []
+            for j in range(len(output_tokens)):
+                if j <= step:
+                    output_colors.append(self.config.OUTPUT_COLOR)
+                else:
+                    output_colors.append("rgba(230, 230, 230, 0.8)")
+            if self.traces_info['output_nodes_idx'] is not None:
+                fig.data[self.traces_info['output_nodes_idx']].marker.color = output_colors
+            # Update step annotation
+            fig.layout.annotations[0].text = f"Step {step} / {len(attention_matrices)-1}: Generating '{output_labels[step] if step < len(output_labels) else ''}'"
+        self.current_state['current_step'] = step
+        return fig
+    def filter_by_token(
+        self,
+        fig: go.Figure,
+        token_idx: int,
+        token_type: str,
+        attention_matrices: List[Dict],
+        threshold: float = 0.05
+    ) -> go.Figure:
+        """
+        Filter connections to show only those related to selected token.
+        """
+        with fig.batch_update():
+            current_step = self.current_state['current_step']
+            if token_type == 'input':
+                # Show only connections from this input token
+                for conn_info in self.traces_info['input_to_output']:
+                    if conn_info['input_idx'] == token_idx and conn_info['output_idx'] <= current_step:
+                        weight = attention_matrices[conn_info['output_idx']]['input_attention'][token_idx].item()
+                        opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                        fig.data[conn_info['trace_idx']].opacity = opacity if opacity > 0 else 0
+                    else:
+                        fig.data[conn_info['trace_idx']].opacity = 0
+                # Hide all output-to-output connections
+                for conn_info in self.traces_info['output_to_output']:
+                    fig.data[conn_info['trace_idx']].opacity = 0
+            elif token_type == 'output':
+                # Show connections to this output token
+                for conn_info in self.traces_info['input_to_output']:
+                    if conn_info['output_idx'] == token_idx and token_idx <= current_step:
+                        weight = attention_matrices[token_idx]['input_attention'][conn_info['input_idx']].item()
+                        opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                        fig.data[conn_info['trace_idx']].opacity = opacity if opacity > 0 else 0
+                    else:
+                        fig.data[conn_info['trace_idx']].opacity = 0
+                # Show connections from/to this output token
+                for conn_info in self.traces_info['output_to_output']:
+                    show = False
+                    if conn_info['to_idx'] == token_idx and token_idx <= current_step:
+                        if attention_matrices[token_idx]['output_attention'] is not None:
+                            if conn_info['from_idx'] < len(attention_matrices[token_idx]['output_attention']):
+                                weight = attention_matrices[token_idx]['output_attention'][conn_info['from_idx']].item()
+                                opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                                fig.data[conn_info['trace_idx']].opacity = opacity if opacity > 0 else 0
+                                show = True
+                    elif conn_info['from_idx'] == token_idx and conn_info['to_idx'] <= current_step:
+                        if attention_matrices[conn_info['to_idx']]['output_attention'] is not None:
+                            if token_idx < len(attention_matrices[conn_info['to_idx']]['output_attention']):
+                                weight = attention_matrices[conn_info['to_idx']]['output_attention'][token_idx].item()
+                                opacity = scale_weight_to_opacity(weight, threshold=threshold)
+                                fig.data[conn_info['trace_idx']].opacity = opacity if opacity > 0 else 0
+                                show = True
+                    if not show:
+                        fig.data[conn_info['trace_idx']].opacity = 0
+        self.current_state['selected_token'] = token_idx
+        self.current_state['selected_type'] = token_type
+        self.current_state['show_all'] = False
+        return fig
+    def show_all_connections(
+        self,
+        fig: go.Figure,
+        attention_matrices: List[Dict],
+        threshold: float = 0.05
+    ) -> go.Figure:
+        """
+        Reset to show all connections for current step.
+        """
+        self.current_state['selected_token'] = None
+        self.current_state['selected_type'] = None
+        self.current_state['show_all'] = True
+        return self.update_for_step(
+            fig,
+            self.current_state['current_step'],
+            attention_matrices,
+            [clean_label(t) for t in attention_matrices],
+            threshold
+        )
+    def _create_empty_figure(self, message: str) -> go.Figure:
+        """Create an empty figure with a message."""
+        fig = go.Figure()
+        fig.update_layout(
+            title=message,
+            xaxis={'visible': False},
+            yaxis={'visible': False},
+            width=self.config.PLOT_WIDTH,
+            height=self.config.PLOT_HEIGHT
+        )
+        return fig

visualization/simple_svg_viz.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import json
+from typing import List, Dict, Any, Optional, Tuple
+from .utils import clean_label, scale_weight_to_width, scale_weight_to_opacity
+class SimpleSVGVisualizer:
+    def __init__(self, config):
+        self.config = config
+    def create_visualization_html(
+        self,
+        input_tokens: List[str],
+        output_tokens: List[str],
+        attention_matrices: List[Dict],
+        threshold: float = 0.05,
+        initial_step: int = 0,
+        selected_token: Optional[int] = None,
+        selected_type: Optional[str] = None
+    ) -> str:
+        """Create a simple SVG visualization without D3."""
+        # Clean labels
+        input_labels = [clean_label(token) for token in input_tokens]
+        output_labels = [clean_label(token) for token in output_tokens]
+        # Calculate positions
+        width = self.config.PLOT_WIDTH
+        height = self.config.PLOT_HEIGHT
+        margin = 100
+        input_x = margin
+        output_x = width - margin
+        # Create SVG elements
+        svg_elements = []
+        # Background
+        svg_elements.append(f'<rect width="{width}" height="{height}" fill="white" stroke="#ddd"/>')
+        # Title
+        svg_elements.append(f'<text x="{width/2}" y="30" text-anchor="middle" font-size="16" font-weight="bold">Token Attention Flow</text>')
+        # Calculate vertical positions
+        input_y_positions = []
+        output_y_positions = []
+        if len(input_labels) > 0:
+            input_spacing = (height - 2 * margin) / max(1, len(input_labels) - 1)
+            input_y_positions = [margin + i * input_spacing for i in range(len(input_labels))]
+        if len(output_labels) > 0:
+            output_spacing = (height - 2 * margin) / max(1, len(output_labels) - 1)
+            output_y_positions = [margin + i * output_spacing for i in range(len(output_labels))]
+        # Draw connections
+        for j in range(min(initial_step + 1, len(output_labels))):
+            if j < len(attention_matrices):
+                for i in range(len(input_labels)):
+                    weight = attention_matrices[j]['input_attention'][i].item()
+                    # Apply filtering
+                    if selected_token is not None:
+                        if selected_type == 'input' and i != selected_token:
+                            continue
+                        elif selected_type == 'output' and j != selected_token:
+                            continue
+                    if weight > threshold:
+                        opacity = scale_weight_to_opacity(weight, threshold)
+                        width_val = scale_weight_to_width(weight)
+                        svg_elements.append(
+                            f'<line x1="{input_x}" y1="{input_y_positions[i]}" '
+                            f'x2="{output_x}" y2="{output_y_positions[j]}" '
+                            f'stroke="blue" stroke-width="{width_val}" opacity="{opacity}"/>'
+                        )
+        # Draw input nodes
+        for i, label in enumerate(input_labels):
+            y = input_y_positions[i]
+            color = "yellow" if selected_token == i and selected_type == 'input' else self.config.INPUT_COLOR
+            svg_elements.append(
+                f'<circle cx="{input_x}" cy="{y}" r="{self.config.NODE_SIZE/2}" '
+                f'fill="{color}" stroke="darkblue" stroke-width="2" '
+                f'style="cursor: pointer" '
+                f'onclick="handleTokenClick({i}, \'input\')"/>'
+            )
+            svg_elements.append(
+                f'<text x="{input_x - self.config.NODE_SIZE/2 - 10}" y="{y + 5}" '
+                f'text-anchor="end" font-size="{self.config.FONT_SIZE}">{label}</text>'
+            )
+        # Draw output nodes
+        for j, label in enumerate(output_labels):
+            y = output_y_positions[j]
+            color = "yellow" if selected_token == j and selected_type == 'output' else (
+                self.config.OUTPUT_COLOR if j <= initial_step else "#e6e6e6"
+            )
+            svg_elements.append(
+                f'<circle cx="{output_x}" cy="{y}" r="{self.config.NODE_SIZE/2}" '
+                f'fill="{color}" stroke="darkred" stroke-width="2" '
+                f'style="cursor: pointer" '
+                f'onclick="handleTokenClick({j}, \'output\')"/>'
+            )
+            svg_elements.append(
+                f'<text x="{output_x + self.config.NODE_SIZE/2 + 10}" y="{y + 5}" '
+                f'text-anchor="start" font-size="{self.config.FONT_SIZE}">{label}</text>'
+            )
+        # Step info
+        svg_elements.append(
+            f'<text x="{width/2}" y="{height - 20}" text-anchor="middle" font-size="12" fill="darkred">'
+            f'Step {initial_step} / {len(output_labels) - 1}: Generating "{output_labels[initial_step] if initial_step < len(output_labels) else ""}"'
+            f'</text>'
+        )
+        # Create HTML
+        html = f"""
+        <div style="width: 100%; overflow-x: auto;">
+            <svg width="{width}" height="{height}" style="border: 1px solid #ddd;">
+                {''.join(svg_elements)}
+            </svg>
+        </div>
+        <script>
+        function handleTokenClick(index, type) {{
+            console.log('Token clicked:', index, type);
+            const hiddenInput = document.querySelector('#clicked-token-d3 textarea');
+            if (hiddenInput) {{
+                const clickData = JSON.stringify({{index: index, type: type}});
+                hiddenInput.value = clickData;
+                hiddenInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+            }}
+        }}
+        </script>
+        """
+        return html

visualization/utils.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import re
+from typing import List, Tuple, Optional
+import numpy as np
+def clean_label(token: str) -> str:
+    """
+    Cleans token labels for visualization.
+    Handles various tokenizer-specific formatting.
+    """
+    label = str(token)
+    # Handle common tokenizer prefixes
+    label = label.replace('Ġ', ' ')  # GPT-2 style space
+    label = label.replace('▁', ' ')  # SentencePiece style space
+    label = label.replace('Ċ', '\\n')  # Newline
+    # Handle special tokens
+    label = label.replace('</s>', '[EOS]')
+    label = label.replace('<s>', '[BOS]')
+    label = label.replace('<unk>', '[UNK]')
+    label = label.replace('<pad>', '[PAD]')
+    label = label.replace('<|begin_of_text|>', '[BOS]')
+    label = label.replace('<|end_of_text|>', '[EOS]')
+    label = label.replace('<|endoftext|>', '[EOS]')
+    # Remove byte-level encoding markers
+    label = re.sub(r'<0x[0-9A-Fa-f]{2}>', '', label)
+    # Clean up whitespace
+    label = label.strip()
+    # Return cleaned label or placeholder
+    return label if label else "[EMPTY]"
+def scale_weight_to_width(
+    weight: float,
+    min_width: float = 0.5,
+    max_width: float = 3.0,
+    scale_factor: float = 5.0
+) -> float:
+    """
+    Scale attention weight to line width for visualization.
+    Args:
+        weight: Attention weight (0-1)
+        min_width: Minimum line width
+        max_width: Maximum line width
+        scale_factor: Scaling factor for weight
+    Returns:
+        Scaled line width
+    """
+    scaled = min(1.0, weight * scale_factor)
+    return min_width + (max_width - min_width) * scaled
+def scale_weight_to_opacity(
+    weight: float,
+    min_opacity: float = 0.1,
+    max_opacity: float = 1.0,
+    threshold: float = 0.0
+) -> float:
+    """
+    Scale attention weight to opacity for visualization.
+    Args:
+        weight: Attention weight (0-1)
+        min_opacity: Minimum opacity
+        max_opacity: Maximum opacity
+        threshold: Threshold below which opacity is 0
+    Returns:
+        Scaled opacity
+    """
+    if weight < threshold:
+        return 0.0
+    # Linear scaling above threshold
+    normalized = (weight - threshold) / (1.0 - threshold) if threshold < 1.0 else weight
+    return min_opacity + (max_opacity - min_opacity) * normalized
+def get_node_positions(
+    num_input: int,
+    num_output: int,
+    spacing: str = 'linear'
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Calculate node positions for visualization.
+    Args:
+        num_input: Number of input tokens
+        num_output: Number of output tokens
+        spacing: Spacing strategy ('linear', 'equal')
+    Returns:
+        Tuple of (input_x, input_y, output_x, output_y)
+    """
+    # Y positions (vertical)
+    if spacing == 'linear':
+        input_y = np.linspace(0.1, 0.9, num_input) if num_input > 1 else np.array([0.5])
+        output_y = np.linspace(0.1, 0.9, num_output) if num_output > 1 else np.array([0.5])
+    else:  # equal spacing
+        total_height = 0.8
+        input_spacing = total_height / (num_input + 1)
+        output_spacing = total_height / (num_output + 1)
+        input_y = np.array([0.1 + (i + 1) * input_spacing for i in range(num_input)])
+        output_y = np.array([0.1 + (i + 1) * output_spacing for i in range(num_output)])
+    # X positions (horizontal)
+    input_x = np.full(num_input, 0.1)
+    output_x = np.full(num_output, 0.9)
+    return input_x, input_y, output_x, output_y
+def create_spline_path(
+    start_x: float,
+    start_y: float,
+    end_x: float,
+    end_y: float,
+    control_offset: float = 0.15
+) -> Tuple[List[float], List[float]]:
+    """
+    Create a spline path for output-to-output connections.
+    Args:
+        start_x, start_y: Starting position
+        end_x, end_y: Ending position
+        control_offset: Offset for control points
+    Returns:
+        Tuple of (x_path, y_path) for spline
+    """
+    # Create control points for smooth curve
+    path_x = [
+        start_x,
+        start_x + control_offset,
+        end_x + control_offset,
+        end_x
+    ]
+    path_y = [
+        start_y,
+        start_y,
+        end_y,
+        end_y
+    ]
+    return path_x, path_y
+def format_attention_text(
+    from_token: str,
+    to_token: str,
+    weight: float,
+    connection_type: str = "attention"
+) -> str:
+    """
+    Format hover text for attention connections.
+    Args:
+        from_token: Source token
+        to_token: Target token
+        weight: Attention weight
+        connection_type: Type of connection
+    Returns:
+        Formatted hover text
+    """
+    return (
+        f"{from_token} → {to_token}<br>"
+        f"{connection_type.capitalize()} Weight: {weight:.4f}"
+    )
+def get_color_for_weight(
+    weight: float,
+    base_color: str = "blue",
+    use_gradient: bool = True
+) -> str:
+    """
+    Get color for attention weight visualization.
+    Args:
+        weight: Attention weight (0-1)
+        base_color: Base color name
+        use_gradient: Whether to use gradient based on weight
+    Returns:
+        Color string for plotly
+    """
+    if not use_gradient:
+        if base_color == "blue":
+            return "rgba(0, 0, 255, 0.6)"
+        elif base_color == "orange":
+            return "rgba(255, 165, 0, 0.6)"
+        else:
+            return "rgba(128, 128, 128, 0.6)"
+    # Create gradient based on weight
+    if base_color == "blue":
+        # Light blue to dark blue
+        intensity = int(255 - weight * 155)  # 255 to 100
+        return f"rgba(0, {intensity}, 255, {0.3 + weight * 0.4})"
+    elif base_color == "orange":
+        # Light orange to dark orange
+        intensity = int(255 - weight * 100)  # 255 to 155
+        return f"rgba(255, {intensity}, 0, {0.3 + weight * 0.4})"
+    else:
+        # Gray scale
+        intensity = int(200 - weight * 100)  # 200 to 100
+        return f"rgba({intensity}, {intensity}, {intensity}, {0.3 + weight * 0.4})"
+def truncate_token_label(token: str, max_length: int = 15) -> str:
+    """
+    Truncate long token labels for display.
+    Args:
+        token: Token string
+        max_length: Maximum length
+    Returns:
+        Truncated token with ellipsis if needed
+    """
+    cleaned = clean_label(token)
+    if len(cleaned) > max_length:
+        return cleaned[:max_length-3] + "..."
+    return cleaned