SixOpen commited on
Commit
a2466a4
β€’
1 Parent(s): 6a3ae96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -131,11 +131,11 @@ def patch_representation(model, input_ids, layer, position, representation):
131
  output[:, position, :] = representation
132
  return output
133
 
134
- handle = model.model.layers[layer].mlp.register_forward_hook(hook)
135
  patched_outputs = model(input_ids)
136
  handle.remove()
137
 
138
- return patched_outputs.logits[:, -1, :]
139
 
140
  @spaces.GPU(duration=120)
141
  def compare_models(text, layers, neuron_indices, top_k, max_length, att_heads, temperature, top_k_sampling, top_p_sampling):
@@ -221,7 +221,7 @@ inputs = [
221
  gr.Textbox(label="Layers", value="9,10,11", placeholder="e.g. 9,10,11"),
222
  gr.Textbox(label="Neuron Indices", value="100,200,300,400", placeholder="e.g. 100,200,300,400"),
223
  gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Top Tokens"),
224
- gr.Slider(minimum=50, maximum=500, step=1, value=92, label="Max Response Length"),
225
  gr.Textbox(label="Attention Heads", value="108,120,132", placeholder="e.g. 108,120,132 (Layer 9 Heads 0,1,2)"),
226
  gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.7, label="Temperature"),
227
  gr.Slider(minimum=0, maximum=100, step=1, value=50, label="Top-k Sampling"),
@@ -237,7 +237,7 @@ outputs = [
237
  gr.Plot(label="Abliterated Model Activation Heatmap")
238
  ]
239
 
240
- title = "Phi-3 Abliteration Analysis"
241
  description = """
242
  Compare the original phi-3 model with its ablated counterpart to scrutinize its inner workings and identify differences- suggestion: try prompts where refusal would be expected (i.e. How do I torrent a movie online?), patterns of letters/characters such as repetitions, or number sequences.
243
  The plots and results will update based on your selection, hover over them for details.
 
131
  output[:, position, :] = representation
132
  return output
133
 
134
+ handle = model.model.layers[layer].mlp.register_forward_hook(hook) #during the forward pass, hook is called with i/o of the MLP at the given layer
135
  patched_outputs = model(input_ids)
136
  handle.remove()
137
 
138
+ return patched_outputs.logits[:, -1, :] #returns logits of the patched output at the last position
139
 
140
  @spaces.GPU(duration=120)
141
  def compare_models(text, layers, neuron_indices, top_k, max_length, att_heads, temperature, top_k_sampling, top_p_sampling):
 
221
  gr.Textbox(label="Layers", value="9,10,11", placeholder="e.g. 9,10,11"),
222
  gr.Textbox(label="Neuron Indices", value="100,200,300,400", placeholder="e.g. 100,200,300,400"),
223
  gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Top Tokens"),
224
+ gr.Slider(minimum=50, maximum=500, step=1, value=70, label="Max Response Length"),
225
  gr.Textbox(label="Attention Heads", value="108,120,132", placeholder="e.g. 108,120,132 (Layer 9 Heads 0,1,2)"),
226
  gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.7, label="Temperature"),
227
  gr.Slider(minimum=0, maximum=100, step=1, value=50, label="Top-k Sampling"),
 
237
  gr.Plot(label="Abliterated Model Activation Heatmap")
238
  ]
239
 
240
+ title = "Phi-3 Analysis"
241
  description = """
242
  Compare the original phi-3 model with its ablated counterpart to scrutinize its inner workings and identify differences- suggestion: try prompts where refusal would be expected (i.e. How do I torrent a movie online?), patterns of letters/characters such as repetitions, or number sequences.
243
  The plots and results will update based on your selection, hover over them for details.