Spaces:

joaogante
/

color-coded-text-generation

Running

App Files Files Community

joaogante HF staff commited on Feb 7, 2023

Commit

ac2cf21

•

1 Parent(s): 242bb12

add markdown

Browse files

Files changed (1) hide show

app.py +73 -55

app.py CHANGED Viewed

@@ -3,60 +3,78 @@ import gradio as gr
 from transformers import GPT2Tokenizer, AutoModelForCausalLM
 import numpy as np
-tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-model = AutoModelForCausalLM.from_pretrained("gpt2")
-tokenizer.pad_token_id = tokenizer.eos_token_id
-# if prob > x, then label = y; sorted in descending probability order
-probs_to_label = [
-    (0.1, "p >= 10%"),
-    (0.01, "p >= 1%"),
-    (1e-20, "p < 1%"),
-]
-label_to_color = {
-    "p >= 10%": "green",
-    "p >= 1%": "yellow",
-    "p < 1%": "red"
-}
-def get_tokens_and_scores(prompt):
-    inputs = tokenizer([prompt], return_tensors="pt")
-    outputs = model.generate(**inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True)
-    transition_scores = model.compute_transition_scores(
-        outputs.sequences, outputs.scores, normalize_logits=True
-    )
-    transition_proba = np.exp(transition_scores)
-    input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
-    generated_tokens = outputs.sequences[:, input_length:]
-    highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
-    for token, proba in zip(generated_tokens[0], transition_proba[0]):
-        this_label = None
-        assert 0. <= proba <= 1.0
-        for min_proba, label in probs_to_label:
-            if proba >= min_proba:
-                this_label = label
-                break
-        highlighted_out.append((tokenizer.decode(token), this_label))
-    return highlighted_out
-demo = gr.Interface(
-    get_tokens_and_scores,
-    [
-        gr.Textbox(
-          label="Prompt",
-          lines=3,
-          value="Today is",
-        ),
-    ],
-    gr.HighlightedText(
-        label="Highlighted generation",
-        combine_adjacent=True,
-        show_legend=True,
-    ).style(color_map=label_to_color),
-)
 if __name__ == "__main__":
     demo.launch()

 from transformers import GPT2Tokenizer, AutoModelForCausalLM
 import numpy as np
+MODEL_NAME = "gpt2"
+if __name__ == "__main__":
+    # Define your model and your tokenizer
+    tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+        model.config.pad_token_id = model.config.eos_token_id
+    # Define your color-coding labels; if prob > x, then label = y; Sorted in descending probability order!
+    probs_to_label = [
+        (0.1, "p >= 10%"),
+        (0.01, "p >= 1%"),
+        (1e-20, "p < 1%"),
+    ]
+    label_to_color = {
+        "p >= 10%": "green",
+        "p >= 1%": "yellow",
+        "p < 1%": "red"
+    }
+    def get_tokens_and_labels(prompt):
+        """
+        Given the prompt (text), return a list of tuples (decoded_token, label)
+        """
+        inputs = tokenizer([prompt], return_tensors="pt")
+        outputs = model.generate(
+            **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True
+        )
+        # Important, don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
+        transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
+        transition_proba = np.exp(transition_scores)
+        # We only have scores for the generated tokens, so pop out the prompt tokens
+        input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
+        generated_tokens = outputs.sequences[:, input_length:]
+        # initialize the highlighted output with the prompt, which will have no color label
+        highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
+        # get the (decoded_token, label) pairs for the generated tokens
+        for token, proba in zip(generated_tokens[0], transition_proba[0]):
+            this_label = None
+            assert 0. <= proba <= 1.0
+            for min_proba, label in probs_to_label:
+                if proba >= min_proba:
+                    this_label = label
+                    break
+            highlighted_out.append((tokenizer.decode(token), this_label))
+        return highlighted_out
+    demo = gr.Blocks()
+    with demo:
+        gr.Markdown(
+            """
+            # Foo Bar
+            """
+        )
+        prompt = gr.Textbox(label="Prompt", lines=3, value="Today is")
+        highlighted_text = gr.HighlightedText(
+            label="Highlighted generation",
+            combine_adjacent=True,
+            show_legend=True,
+        ).style(color_map=label_to_color),
+        button = gr.Button(f"Generate with {MODEL_NAME}")
+        button.click(get_tokens_and_labels, inputs=prompt, outputs=highlighted_text)
 if __name__ == "__main__":
     demo.launch()