Konstantin
commited on
Commit
•
c80f1e3
1
Parent(s):
89b186a
Remove padding in words in the token attribution
Browse files
app.py
CHANGED
@@ -64,16 +64,35 @@ def format_explainer_html(html_string):
|
|
64 |
"""Extract tokens with attribution-based background color."""
|
65 |
inside_token_prefix = '##'
|
66 |
soup = BeautifulSoup(html_string, 'html.parser')
|
67 |
-
p = soup.new_tag('p'
|
68 |
-
|
69 |
# Select token elements and remove model specific tokens
|
|
|
70 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
71 |
text = token.font.text.strip()
|
72 |
if text.startswith(inside_token_prefix):
|
73 |
text = text[len(inside_token_prefix):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
token.string = text
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
|
79 |
def classify_comment(comment):
|
|
|
64 |
"""Extract tokens with attribution-based background color."""
|
65 |
inside_token_prefix = '##'
|
66 |
soup = BeautifulSoup(html_string, 'html.parser')
|
67 |
+
p = soup.new_tag('p',
|
68 |
+
attrs={'style': 'color: black; background-color: white;'})
|
69 |
# Select token elements and remove model specific tokens
|
70 |
+
current_word = None
|
71 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
72 |
text = token.font.text.strip()
|
73 |
if text.startswith(inside_token_prefix):
|
74 |
text = text[len(inside_token_prefix):]
|
75 |
+
else:
|
76 |
+
# Create a new span for each word (sequence of sub-tokens)
|
77 |
+
if current_word is not None:
|
78 |
+
p.append(current_word)
|
79 |
+
p.append(' ')
|
80 |
+
current_word = soup.new_tag('span')
|
81 |
+
token.attrs['style'] = f"{token.attrs['style']}; padding: 0.2em 0em;"
|
82 |
token.string = text
|
83 |
+
current_word.append(token)
|
84 |
+
|
85 |
+
# Add last word
|
86 |
+
p.append(current_word)
|
87 |
+
|
88 |
+
# Add left and right-padding to each word
|
89 |
+
for span in p.find_all('span'):
|
90 |
+
span.find_all('mark')[0].attrs['style'] = (
|
91 |
+
f"{span.find_all('mark')[0].attrs['style']} padding-left: 0.2em;")
|
92 |
+
span.find_all('mark')[-1].attrs['style'] = (
|
93 |
+
f"{span.find_all('mark')[-1].attrs['style']} padding-right: 0.2em;")
|
94 |
+
|
95 |
+
return p
|
96 |
|
97 |
|
98 |
def classify_comment(comment):
|