taufeeque commited on
Commit
835c841
1 Parent(s): 811a8b4
Files changed (3) hide show
  1. .streamlit/config.toml +8 -0
  2. Code_Browser.py +13 -7
  3. utils.py +31 -30
.streamlit/config.toml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [global]
2
+ disableWidgetStateDuplicationWarning = true
3
+
4
+ [theme]
5
+ base="dark"
6
+ primaryColor="DeepSkyBlue"
7
+ backgroundColor="#121821"
8
+ secondaryBackgroundColor="#303540"
Code_Browser.py CHANGED
@@ -25,10 +25,12 @@ dirs = glob.glob(base_cache_dir + "models/*/")
25
  model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
26
  model_name_options = ["_".join(m) for m in model_name_options]
27
  model_name_options = sorted(set(model_name_options))
 
28
 
29
  model_name = st.selectbox(
30
  "Model",
31
  model_name_options,
 
32
  key=webapp_utils.persist("model_name"),
33
  )
34
 
@@ -87,10 +89,11 @@ st.session_state["ccb"] = ccb
87
  st.session_state["cb_at"] = cb_at
88
  st.session_state["is_attn"] = is_attn
89
 
90
- st.markdown("## Metrics")
91
- # hide metrics by default
92
- if st.checkbox("Show Model Metrics"):
93
- st.write(metrics)
 
94
 
95
  st.markdown("## Demo Codes")
96
  demo_file_path = codes_cache_path + "demo_codes.txt"
@@ -122,7 +125,7 @@ if st.checkbox("Show Demo Codes"):
122
  if len(demo_codes) == 0:
123
  st.markdown(
124
  f"""
125
- <div style="font-size: 1.3rem; color: red;">
126
  No demo codes found in file {demo_file_path}
127
  </div>
128
  """,
@@ -214,7 +217,10 @@ if regex_pattern:
214
  ccb,
215
  model_name,
216
  )
217
- st.markdown(f"Found :green[{re_token_matches}] matches")
 
 
 
218
  num_search_cols = 7 if is_attn else 6
219
  non_deploy_offset = 0
220
  if not DEPLOY_MODE:
@@ -296,7 +302,7 @@ if regex_pattern:
296
 
297
  st.markdown("## Code Token Activations")
298
 
299
- filter_codes = st.checkbox("Filter Codes", key="filter_codes")
300
  act_range, layer_code_acts = None, None
301
  if filter_codes:
302
  act_range = st.slider(
 
25
  model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
26
  model_name_options = ["_".join(m) for m in model_name_options]
27
  model_name_options = sorted(set(model_name_options))
28
+ def_model_idx = ["attn" in m for m in model_name_options].index(True)
29
 
30
  model_name = st.selectbox(
31
  "Model",
32
  model_name_options,
33
+ index=def_model_idx,
34
  key=webapp_utils.persist("model_name"),
35
  )
36
 
 
89
  st.session_state["cb_at"] = cb_at
90
  st.session_state["is_attn"] = is_attn
91
 
92
+ if not DEPLOY_MODE:
93
+ st.markdown("## Metrics")
94
+ # hide metrics by default
95
+ if st.checkbox("Show Model Metrics"):
96
+ st.write(metrics)
97
 
98
  st.markdown("## Demo Codes")
99
  demo_file_path = codes_cache_path + "demo_codes.txt"
 
125
  if len(demo_codes) == 0:
126
  st.markdown(
127
  f"""
128
+ <div style="font-size: 1.0rem; color: red;">
129
  No demo codes found in file {demo_file_path}
130
  </div>
131
  """,
 
217
  ccb,
218
  model_name,
219
  )
220
+ st.markdown(
221
+ f"Found <span style='color:green;'>{re_token_matches}</span> matches",
222
+ unsafe_allow_html=True,
223
+ )
224
  num_search_cols = 7 if is_attn else 6
225
  non_deploy_offset = 0
226
  if not DEPLOY_MODE:
 
302
 
303
  st.markdown("## Code Token Activations")
304
 
305
+ filter_codes = st.checkbox("Show filters", key="filter_codes")
306
  act_range, layer_code_acts = None, None
307
  if filter_codes:
308
  act_range = st.slider(
utils.py CHANGED
@@ -152,39 +152,42 @@ def features_to_tokens(cb_key, cb_acts, num_codes, code=None):
152
  return features_tokens
153
 
154
 
155
- def color_str(s: str, color: str, html: bool):
156
  """Color the string for html or terminal."""
 
157
  if html:
 
158
  return f"<span style='color:{color}'>{s}</span>"
159
  else:
 
160
  return colored(s, color)
161
 
162
 
163
- def color_tokens_red_automata(tokens, red_idx, html=False):
164
- """Separate states with a dash and color red the tokens in red_idx."""
165
  ret_string = ""
166
- itr_over_red_idx = 0
167
  tokens_enumerate = enumerate(tokens)
168
  if tokens[0] == "<|endoftext|>":
169
  next(tokens_enumerate)
170
- if red_idx[0] == 0:
171
- itr_over_red_idx += 1
172
  for i, c in tokens_enumerate:
173
  if i % 2 == 1:
174
  ret_string += "-"
175
- if itr_over_red_idx < len(red_idx) and i == red_idx[itr_over_red_idx]:
176
- ret_string += color_str(c, "red", html)
177
- itr_over_red_idx += 1
178
  else:
179
  ret_string += c
180
  return ret_string
181
 
182
 
183
- def color_tokens_red(tokens, red_idx, n=3, html=False):
184
- """Color red the tokens in red_idx."""
185
  ret_string = ""
186
  last_colored_token_idx = -1
187
- for i in red_idx:
188
  c_str = tokens[i]
189
  if i <= last_colored_token_idx + 2 * n + 1:
190
  ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
@@ -194,7 +197,7 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
194
  )
195
  ret_string += " ... "
196
  ret_string += "".join(tokens[i - n : i])
197
- ret_string += color_str(c_str, "red", html)
198
  last_colored_token_idx = i
199
  ret_string += "".join(
200
  tokens[
@@ -207,15 +210,15 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
207
  def prepare_example_print(
208
  example_id,
209
  example_tokens,
210
- tokens_to_color_red,
211
  html,
212
- color_red_fn=color_tokens_red,
213
  ):
214
  """Format example to print."""
215
- example_output = color_str(example_id, "green", html)
216
  example_output += (
217
  ": "
218
- + color_red_fn(example_tokens, tokens_to_color_red, html=html)
219
  + ("<br>" if html else "\n")
220
  )
221
  return example_output
@@ -238,10 +241,8 @@ def tkn_print(
238
  print_output = [] if return_example_list else ""
239
  curr_ex = ll[0][0]
240
  total_examples = 0
241
- tokens_to_color_red = []
242
- color_red_fn = (
243
- color_tokens_red_automata if separate_states else partial(color_tokens_red, n=n)
244
- )
245
  for idx in indices:
246
  if total_examples > max_examples:
247
  break
@@ -251,31 +252,31 @@ def tkn_print(
251
  curr_ex_output = prepare_example_print(
252
  curr_ex,
253
  tokens[curr_ex],
254
- tokens_to_color_red,
255
  html,
256
- color_red_fn,
257
  )
258
  total_examples += 1
259
  if return_example_list:
260
- print_output.append((curr_ex_output, len(tokens_to_color_red)))
261
  else:
262
  print_output += curr_ex_output
263
  curr_ex = i
264
- tokens_to_color_red = []
265
- tokens_to_color_red.append(j)
266
  curr_ex_output = prepare_example_print(
267
  curr_ex,
268
  tokens[curr_ex],
269
- tokens_to_color_red,
270
  html,
271
- color_red_fn,
272
  )
273
  if return_example_list:
274
- print_output.append((curr_ex_output, len(tokens_to_color_red)))
275
  else:
276
  print_output += curr_ex_output
277
  asterisk_str = "********************************************"
278
- print_output += color_str(asterisk_str, "green", html)
279
  total_examples += 1
280
 
281
  return print_output
 
152
  return features_tokens
153
 
154
 
155
+ def color_str(s: str, html: bool, color: Optional[str] = None):
156
  """Color the string for html or terminal."""
157
+
158
  if html:
159
+ color = "DeepSkyBlue" if color is None else color
160
  return f"<span style='color:{color}'>{s}</span>"
161
  else:
162
+ color = "light_cyan" if color is None else color
163
  return colored(s, color)
164
 
165
 
166
+ def color_tokens_automata(tokens, color_idx, html=False):
167
+ """Separate states with a dash and color red the tokens in color_idx."""
168
  ret_string = ""
169
+ itr_over_color_idx = 0
170
  tokens_enumerate = enumerate(tokens)
171
  if tokens[0] == "<|endoftext|>":
172
  next(tokens_enumerate)
173
+ if color_idx[0] == 0:
174
+ itr_over_color_idx += 1
175
  for i, c in tokens_enumerate:
176
  if i % 2 == 1:
177
  ret_string += "-"
178
+ if itr_over_color_idx < len(color_idx) and i == color_idx[itr_over_color_idx]:
179
+ ret_string += color_str(c, html)
180
+ itr_over_color_idx += 1
181
  else:
182
  ret_string += c
183
  return ret_string
184
 
185
 
186
+ def color_tokens(tokens, color_idx, n=3, html=False):
187
+ """Color the tokens in color_idx."""
188
  ret_string = ""
189
  last_colored_token_idx = -1
190
+ for i in color_idx:
191
  c_str = tokens[i]
192
  if i <= last_colored_token_idx + 2 * n + 1:
193
  ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
 
197
  )
198
  ret_string += " ... "
199
  ret_string += "".join(tokens[i - n : i])
200
+ ret_string += color_str(c_str, html)
201
  last_colored_token_idx = i
202
  ret_string += "".join(
203
  tokens[
 
210
  def prepare_example_print(
211
  example_id,
212
  example_tokens,
213
+ tokens_to_color,
214
  html,
215
+ color_fn=color_tokens,
216
  ):
217
  """Format example to print."""
218
+ example_output = color_str(example_id, html, "green")
219
  example_output += (
220
  ": "
221
+ + color_fn(example_tokens, tokens_to_color, html=html)
222
  + ("<br>" if html else "\n")
223
  )
224
  return example_output
 
241
  print_output = [] if return_example_list else ""
242
  curr_ex = ll[0][0]
243
  total_examples = 0
244
+ tokens_to_color = []
245
+ color_fn = color_tokens_automata if separate_states else partial(color_tokens, n=n)
 
 
246
  for idx in indices:
247
  if total_examples > max_examples:
248
  break
 
252
  curr_ex_output = prepare_example_print(
253
  curr_ex,
254
  tokens[curr_ex],
255
+ tokens_to_color,
256
  html,
257
+ color_fn,
258
  )
259
  total_examples += 1
260
  if return_example_list:
261
+ print_output.append((curr_ex_output, len(tokens_to_color)))
262
  else:
263
  print_output += curr_ex_output
264
  curr_ex = i
265
+ tokens_to_color = []
266
+ tokens_to_color.append(j)
267
  curr_ex_output = prepare_example_print(
268
  curr_ex,
269
  tokens[curr_ex],
270
+ tokens_to_color,
271
  html,
272
+ color_fn,
273
  )
274
  if return_example_list:
275
+ print_output.append((curr_ex_output, len(tokens_to_color)))
276
  else:
277
  print_output += curr_ex_output
278
  asterisk_str = "********************************************"
279
+ print_output += color_str(asterisk_str, html, "green")
280
  total_examples += 1
281
 
282
  return print_output