Spaces:
Running
on
Zero
Running
on
Zero
Fix short generations
Browse files
app.py
CHANGED
@@ -6,9 +6,16 @@ import gradio as gr
|
|
6 |
import numpy as np
|
7 |
import spaces
|
8 |
from scipy.signal import convolve2d
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
attnlrp.register(model)
|
13 |
|
14 |
def really_clean_tokens(tokens):
|
@@ -54,6 +61,7 @@ def generate_and_visualize(prompt, num_tokens=10):
|
|
54 |
return input_tokens, all_relevances, generated_tokens
|
55 |
|
56 |
def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
|
57 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
58 |
|
59 |
### FIND ZONES OF INTEREST
|
@@ -61,6 +69,9 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
61 |
kernel_width = 6
|
62 |
context_width = 20 # Number of tokens to include as context on each side
|
63 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
|
|
|
|
64 |
|
65 |
# Compute the rolling sum using 2D convolution
|
66 |
rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
|
|
|
6 |
import numpy as np
|
7 |
import spaces
|
8 |
from scipy.signal import convolve2d
|
9 |
+
from huggingface_hub import login
|
10 |
+
import os
|
11 |
+
from dotenv import load_dotenv
|
12 |
|
13 |
+
load_dotenv()
|
14 |
+
|
15 |
+
login(os.get("HF_TOKEN"))
|
16 |
+
|
17 |
+
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.bfloat16, device_map="cuda")
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
19 |
attnlrp.register(model)
|
20 |
|
21 |
def really_clean_tokens(tokens):
|
|
|
61 |
return input_tokens, all_relevances, generated_tokens
|
62 |
|
63 |
def process_relevances(input_tokens, all_relevances, generated_tokens):
|
64 |
+
|
65 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
66 |
|
67 |
### FIND ZONES OF INTEREST
|
|
|
69 |
kernel_width = 6
|
70 |
context_width = 20 # Number of tokens to include as context on each side
|
71 |
kernel = np.ones((kernel_width, kernel_width))
|
72 |
+
|
73 |
+
if len(generated_tokens) < kernel_width:
|
74 |
+
return [(token, None, None) for token in generated_tokens]
|
75 |
|
76 |
# Compute the rolling sum using 2D convolution
|
77 |
rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
|