m-ric HF staff commited on
Commit
e76d6fa
β€’
1 Parent(s): c3e1717

Fix short generations

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -6,9 +6,16 @@ import gradio as gr
6
  import numpy as np
7
  import spaces
8
  from scipy.signal import convolve2d
 
 
 
9
 
10
- model = LlamaForCausalLM.from_pretrained("HuggingFaceTB/SmolLM-1.7B-Instruct", torch_dtype=torch.bfloat16, device_map="cuda")
11
- tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-1.7B-Instruct")
 
 
 
 
12
  attnlrp.register(model)
13
 
14
  def really_clean_tokens(tokens):
@@ -54,6 +61,7 @@ def generate_and_visualize(prompt, num_tokens=10):
54
  return input_tokens, all_relevances, generated_tokens
55
 
56
  def process_relevances(input_tokens, all_relevances, generated_tokens):
 
57
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
58
 
59
  ### FIND ZONES OF INTEREST
@@ -61,6 +69,9 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
61
  kernel_width = 6
62
  context_width = 20 # Number of tokens to include as context on each side
63
  kernel = np.ones((kernel_width, kernel_width))
 
 
 
64
 
65
  # Compute the rolling sum using 2D convolution
66
  rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
 
6
  import numpy as np
7
  import spaces
8
  from scipy.signal import convolve2d
9
+ from huggingface_hub import login
10
+ import os
11
+ from dotenv import load_dotenv
12
 
13
+ load_dotenv()
14
+
15
+ login(os.get("HF_TOKEN"))
16
+
17
+ model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.bfloat16, device_map="cuda")
18
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
19
  attnlrp.register(model)
20
 
21
  def really_clean_tokens(tokens):
 
61
  return input_tokens, all_relevances, generated_tokens
62
 
63
  def process_relevances(input_tokens, all_relevances, generated_tokens):
64
+
65
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
66
 
67
  ### FIND ZONES OF INTEREST
 
69
  kernel_width = 6
70
  context_width = 20 # Number of tokens to include as context on each side
71
  kernel = np.ones((kernel_width, kernel_width))
72
+
73
+ if len(generated_tokens) < kernel_width:
74
+ return [(token, None, None) for token in generated_tokens]
75
 
76
  # Compute the rolling sum using 2D convolution
77
  rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')