Spaces:
Running
Running
Change tokenization visualizer
Browse files
app.py
CHANGED
|
@@ -147,12 +147,21 @@ def make_html_block(student_tokenizer, teacher_tokenizer, text, idx):
|
|
| 147 |
|
| 148 |
highlighted = highlight_groups(student_tokenizer, teacher_tokenizer, s_ids, t_ids, s_groups, t_groups)
|
| 149 |
|
| 150 |
-
# Build tokenized views
|
| 151 |
s_tokens = [student_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in s_ids]
|
| 152 |
t_tokens = [teacher_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in t_ids]
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
tokenized_section = f'''
|
| 158 |
<div style="margin-bottom:15px;">
|
|
|
|
| 147 |
|
| 148 |
highlighted = highlight_groups(student_tokenizer, teacher_tokenizer, s_ids, t_ids, s_groups, t_groups)
|
| 149 |
|
| 150 |
+
# Build tokenized views with alternating colors
|
| 151 |
s_tokens = [student_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in s_ids]
|
| 152 |
t_tokens = [teacher_tokenizer.decode([tid], skip_special_tokens=False, clean_up_tokenization_spaces=False) for tid in t_ids]
|
| 153 |
|
| 154 |
+
color1 = "#fff9c4"
|
| 155 |
+
color2 = "#b2ebf2"
|
| 156 |
+
|
| 157 |
+
s_tokens_html = "".join(
|
| 158 |
+
f'<span style="background-color:{color1 if i % 2 == 0 else color2};">{html.escape(t)}</span>'
|
| 159 |
+
for i, t in enumerate(s_tokens)
|
| 160 |
+
)
|
| 161 |
+
t_tokens_html = "".join(
|
| 162 |
+
f'<span style="background-color:{color1 if i % 2 == 0 else color2};">{html.escape(t)}</span>'
|
| 163 |
+
for i, t in enumerate(t_tokens)
|
| 164 |
+
)
|
| 165 |
|
| 166 |
tokenized_section = f'''
|
| 167 |
<div style="margin-bottom:15px;">
|