joaogante HF staff commited on
Commit
fd9a520
1 Parent(s): b7c3808

get spaces back

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -41,13 +41,15 @@ if __name__ == "__main__":
41
  transition_proba = np.exp(transition_scores)
42
  # We only have scores for the generated tokens, so pop out the prompt tokens
43
  input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
44
- generated_tokens = outputs.sequences[:, input_length:]
 
45
 
46
  # On decoder-only models, you might want to initialize the highlighted output with the prompt (wo labels)
47
  if model.config.is_encoder_decoder:
48
  highlighted_out = []
49
  else:
50
- highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
 
51
  # Get the (decoded_token, label) pairs for the generated tokens
52
  for token, proba in zip(generated_tokens[0], transition_proba[0]):
53
  this_label = None
@@ -56,7 +58,7 @@ if __name__ == "__main__":
56
  if proba >= min_proba:
57
  this_label = label
58
  break
59
- highlighted_out.append((tokenizer.decode(token), this_label))
60
 
61
  return highlighted_out
62
 
 
41
  transition_proba = np.exp(transition_scores)
42
  # We only have scores for the generated tokens, so pop out the prompt tokens
43
  input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
44
+ generated_ids = outputs.sequences[:, input_length:]
45
+ generated_tokens = tokenizer.convert_ids_to_tokens(generated_ids[0])
46
 
47
  # On decoder-only models, you might want to initialize the highlighted output with the prompt (wo labels)
48
  if model.config.is_encoder_decoder:
49
  highlighted_out = []
50
  else:
51
+ input_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids)
52
+ highlighted_out = [(token.replace("_", " "), None) for token in input_tokens]
53
  # Get the (decoded_token, label) pairs for the generated tokens
54
  for token, proba in zip(generated_tokens[0], transition_proba[0]):
55
  this_label = None
 
58
  if proba >= min_proba:
59
  this_label = label
60
  break
61
+ highlighted_out.append((token.replace("_", " "), this_label))
62
 
63
  return highlighted_out
64