Spaces:

joaogante
/

color-coded-text-generation

Running

joaogante HF staff commited on Feb 7, 2023

Commit

fd9a520

•

1 Parent(s): b7c3808

get spaces back

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,13 +41,15 @@ if __name__ == "__main__":
         transition_proba = np.exp(transition_scores)
         # We only have scores for the generated tokens, so pop out the prompt tokens
         input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
-        generated_tokens = outputs.sequences[:, input_length:]
         # On decoder-only models, you might want to initialize the highlighted output with the prompt (wo labels)
         if model.config.is_encoder_decoder:
             highlighted_out = []
         else:
-            highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
         # Get the (decoded_token, label) pairs for the generated tokens
         for token, proba in zip(generated_tokens[0], transition_proba[0]):
             this_label = None
@@ -56,7 +58,7 @@ if __name__ == "__main__":
                 if proba >= min_proba:
                     this_label = label
                     break
-            highlighted_out.append((tokenizer.decode(token), this_label))
         return highlighted_out

         transition_proba = np.exp(transition_scores)
         # We only have scores for the generated tokens, so pop out the prompt tokens
         input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
+        generated_ids = outputs.sequences[:, input_length:]
+        generated_tokens = tokenizer.convert_ids_to_tokens(generated_ids[0])
         # On decoder-only models, you might want to initialize the highlighted output with the prompt (wo labels)
         if model.config.is_encoder_decoder:
             highlighted_out = []
         else:
+            input_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids)
+            highlighted_out = [(token.replace("_", " "), None) for token in input_tokens]
         # Get the (decoded_token, label) pairs for the generated tokens
         for token, proba in zip(generated_tokens[0], transition_proba[0]):
             this_label = None
                 if proba >= min_proba:
                     this_label = label
                     break
+            highlighted_out.append((token.replace("_", " "), this_label))
         return highlighted_out