Konstantin commited on
Commit
c80f1e3
1 Parent(s): 89b186a

Remove padding in words in the token attribution

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -64,16 +64,35 @@ def format_explainer_html(html_string):
64
  """Extract tokens with attribution-based background color."""
65
  inside_token_prefix = '##'
66
  soup = BeautifulSoup(html_string, 'html.parser')
67
- p = soup.new_tag('p')
68
- p.append(soup.new_tag('font', attrs={'color': 'black'}))
69
  # Select token elements and remove model specific tokens
 
70
  for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
71
  text = token.font.text.strip()
72
  if text.startswith(inside_token_prefix):
73
  text = text[len(inside_token_prefix):]
 
 
 
 
 
 
 
74
  token.string = text
75
- p.font.append(token)
76
- return p.prettify()
 
 
 
 
 
 
 
 
 
 
 
77
 
78
 
79
  def classify_comment(comment):
64
  """Extract tokens with attribution-based background color."""
65
  inside_token_prefix = '##'
66
  soup = BeautifulSoup(html_string, 'html.parser')
67
+ p = soup.new_tag('p',
68
+ attrs={'style': 'color: black; background-color: white;'})
69
  # Select token elements and remove model specific tokens
70
+ current_word = None
71
  for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
72
  text = token.font.text.strip()
73
  if text.startswith(inside_token_prefix):
74
  text = text[len(inside_token_prefix):]
75
+ else:
76
+ # Create a new span for each word (sequence of sub-tokens)
77
+ if current_word is not None:
78
+ p.append(current_word)
79
+ p.append(' ')
80
+ current_word = soup.new_tag('span')
81
+ token.attrs['style'] = f"{token.attrs['style']}; padding: 0.2em 0em;"
82
  token.string = text
83
+ current_word.append(token)
84
+
85
+ # Add last word
86
+ p.append(current_word)
87
+
88
+ # Add left and right-padding to each word
89
+ for span in p.find_all('span'):
90
+ span.find_all('mark')[0].attrs['style'] = (
91
+ f"{span.find_all('mark')[0].attrs['style']} padding-left: 0.2em;")
92
+ span.find_all('mark')[-1].attrs['style'] = (
93
+ f"{span.find_all('mark')[-1].attrs['style']} padding-right: 0.2em;")
94
+
95
+ return p
96
 
97
 
98
  def classify_comment(comment):