cmpatino HF Staff commited on
Commit
8ef5720
·
1 Parent(s): 042c934

Change tokenization visualizer

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -147,12 +147,21 @@ def make_html_block(student_tokenizer, teacher_tokenizer, text, idx):
147
 
148
  highlighted = highlight_groups(student_tokenizer, teacher_tokenizer, s_ids, t_ids, s_groups, t_groups)
149
 
150
- # Build tokenized views
151
  s_tokens = [student_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in s_ids]
152
  t_tokens = [teacher_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in t_ids]
153
 
154
- s_tokens_html = " | ".join(html.escape(f'"{t}"') for t in s_tokens)
155
- t_tokens_html = " | ".join(html.escape(f'"{t}"') for t in t_tokens)
 
 
 
 
 
 
 
 
 
156
 
157
  tokenized_section = f'''
158
  <div style="margin-bottom:15px;">
 
147
 
148
  highlighted = highlight_groups(student_tokenizer, teacher_tokenizer, s_ids, t_ids, s_groups, t_groups)
149
 
150
+ # Build tokenized views with alternating colors
151
  s_tokens = [student_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in s_ids]
152
  t_tokens = [teacher_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in t_ids]
153
 
154
+ color1 = "#fff9c4"
155
+ color2 = "#b2ebf2"
156
+
157
+ s_tokens_html = "".join(
158
+ f'<span style="background-color:{color1 if i % 2 == 0 else color2};">{html.escape(t)}</span>'
159
+ for i, t in enumerate(s_tokens)
160
+ )
161
+ t_tokens_html = "".join(
162
+ f'<span style="background-color:{color1 if i % 2 == 0 else color2};">{html.escape(t)}</span>'
163
+ for i, t in enumerate(t_tokens)
164
+ )
165
 
166
  tokenized_section = f'''
167
  <div style="margin-bottom:15px;">