m-ric HF staff commited on
Commit
4cbfdf7
1 Parent(s): e76d6fa

Add quantization options

Browse files
Files changed (2) hide show
  1. app.py +24 -9
  2. requirements.txt +2 -1
app.py CHANGED
@@ -10,13 +10,25 @@ from huggingface_hub import login
10
  import os
11
  from dotenv import load_dotenv
12
 
 
 
 
 
 
 
 
13
  load_dotenv()
14
 
15
- login(os.get("HF_TOKEN"))
 
 
16
 
17
- model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.bfloat16, device_map="cuda")
18
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
 
 
19
  attnlrp.register(model)
 
20
 
21
  def really_clean_tokens(tokens):
22
  tokens = clean_tokens(tokens)
@@ -65,7 +77,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
65
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
66
 
67
  ### FIND ZONES OF INTEREST
68
- threshold_per_token = 0.2
69
  kernel_width = 6
70
  context_width = 20 # Number of tokens to include as context on each side
71
  kernel = np.ones((kernel_width, kernel_width))
@@ -74,10 +86,11 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
74
  return [(token, None, None) for token in generated_tokens]
75
 
76
  # Compute the rolling sum using 2D convolution
77
- rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
78
 
79
  # Find where the rolled sum is greater than the threshold
80
- significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
 
81
 
82
  def find_largest_contiguous_patch(array):
83
  current_patch_start = None
@@ -191,7 +204,7 @@ css = """
191
  examples = [
192
  """Context: Mount Everest attracts many climbers, including highly experienced mountaineers. There are two main climbing routes, one approaching the summit from the southeast in Nepal (known as the standard route) and the other from the north in Tibet. While not posing substantial technical climbing challenges on the standard route, Everest presents dangers such as altitude sickness, weather, and wind, as well as hazards from avalanches and the Khumbu Icefall. As of November 2022, 310 people have died on Everest. Over 200 bodies remain on the mountain and have not been removed due to the dangerous conditions. The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.
193
 
194
- Question: How high did they climb in 1922? According to the text, the 1922 expedition reached 8,""",
195
  """Hurricane Katrina killed hundreds of people as it made landfall on New Orleans in 2005 - many of these deaths could have been avoided if alerts had been given one day earlier. Accurate weather forecasts are really life-saving.
196
 
197
  🔥 Now, NASA and IBM just dropped a game-changing new model: the first ever foundation model for weather! This means, it's the first time we have a generalist model not restricted to one task, but able to predict 160 weather variables!
@@ -200,7 +213,7 @@ Prithvi WxC (Prithvi, "पृथ्वी", is the Sanskrit name for Earth) - is
200
 
201
  💡 But it comes with some important tweaks: under the hood, Prithvi WxC uses a clever transformer-based architecture with 25 encoder and 5 decoder blocks. It alternates between "local" and "global" attention to capture both regional and global weather patterns.
202
 
203
- How many weather variables can Prithvi predict? Prithvi can""",
204
  """Transformers v4.45.0 released: includes a lightning-fast method to build tools! ⚡️
205
 
206
  During user research with colleagues @MoritzLaurer and @Jofthomas , we discovered that the class definition currently in used to define a Tool in transformers.agents is a bit tedious to use, because it goes in great detail.
@@ -209,7 +222,9 @@ During user research with colleagues @MoritzLaurer and @Jofthomas , we discovere
209
 
210
  ✅ Voilà, you're good to go!
211
 
212
- How can you build tools simply in transformers? Just use the decorator""",
 
 
213
  ]
214
 
215
  with gr.Blocks(css=css) as demo:
 
10
  import os
11
  from dotenv import load_dotenv
12
 
13
+ from transformers import BitsAndBytesConfig
14
+
15
+ quantization_config = BitsAndBytesConfig(
16
+ load_in_8bit=True,
17
+ bnb_8bit_compute_dtype=torch.bfloat16,
18
+ )
19
+
20
  load_dotenv()
21
 
22
+ login(os.getenv("HF_TOKEN"))
23
+
24
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
25
 
26
+ print(f"Loading model {model_id}...")
27
+
28
+ model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="cuda", use_safetensors=True)
29
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
30
  attnlrp.register(model)
31
+ print(f"Loaded model.")
32
 
33
  def really_clean_tokens(tokens):
34
  tokens = clean_tokens(tokens)
 
77
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
78
 
79
  ### FIND ZONES OF INTEREST
80
+ threshold_per_token = 0.05
81
  kernel_width = 6
82
  context_width = 20 # Number of tokens to include as context on each side
83
  kernel = np.ones((kernel_width, kernel_width))
 
86
  return [(token, None, None) for token in generated_tokens]
87
 
88
  # Compute the rolling sum using 2D convolution
89
+ rolled_sum = convolve2d(attention_matrix, kernel, mode='valid') / kernel_width**2
90
 
91
  # Find where the rolled sum is greater than the threshold
92
+ significant_areas = rolled_sum > threshold_per_token
93
+ print(f"Found {significant_areas.sum()} relevant tokens. Lower threshold to find more. Max was {rolled_sum.max()}")
94
 
95
  def find_largest_contiguous_patch(array):
96
  current_patch_start = None
 
204
  examples = [
205
  """Context: Mount Everest attracts many climbers, including highly experienced mountaineers. There are two main climbing routes, one approaching the summit from the southeast in Nepal (known as the standard route) and the other from the north in Tibet. While not posing substantial technical climbing challenges on the standard route, Everest presents dangers such as altitude sickness, weather, and wind, as well as hazards from avalanches and the Khumbu Icefall. As of November 2022, 310 people have died on Everest. Over 200 bodies remain on the mountain and have not been removed due to the dangerous conditions. The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.
206
 
207
+ Question: How high did they climb in 1922? According to the text,""",
208
  """Hurricane Katrina killed hundreds of people as it made landfall on New Orleans in 2005 - many of these deaths could have been avoided if alerts had been given one day earlier. Accurate weather forecasts are really life-saving.
209
 
210
  🔥 Now, NASA and IBM just dropped a game-changing new model: the first ever foundation model for weather! This means, it's the first time we have a generalist model not restricted to one task, but able to predict 160 weather variables!
 
213
 
214
  💡 But it comes with some important tweaks: under the hood, Prithvi WxC uses a clever transformer-based architecture with 25 encoder and 5 decoder blocks. It alternates between "local" and "global" attention to capture both regional and global weather patterns.
215
 
216
+ How many weather variables can Prithvi predict? Answer:""",
217
  """Transformers v4.45.0 released: includes a lightning-fast method to build tools! ⚡️
218
 
219
  During user research with colleagues @MoritzLaurer and @Jofthomas , we discovered that the class definition currently in used to define a Tool in transformers.agents is a bit tedious to use, because it goes in great detail.
 
222
 
223
  ✅ Voilà, you're good to go!
224
 
225
+ How can you build tools simply in transformers?
226
+
227
+ Answer:""",
228
  ]
229
 
230
  with gr.Blocks(css=css) as demo:
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  lxt
2
  numpy
3
- scipy
 
 
1
  lxt
2
  numpy
3
+ scipy
4
+ python-dotenv