joaogante HF staff commited on
Commit
ac2cf21
1 Parent(s): 242bb12

add markdown

Browse files
Files changed (1) hide show
  1. app.py +73 -55
app.py CHANGED
@@ -3,60 +3,78 @@ import gradio as gr
3
  from transformers import GPT2Tokenizer, AutoModelForCausalLM
4
  import numpy as np
5
 
6
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
7
- model = AutoModelForCausalLM.from_pretrained("gpt2")
8
- tokenizer.pad_token_id = tokenizer.eos_token_id
9
-
10
- # if prob > x, then label = y; sorted in descending probability order
11
- probs_to_label = [
12
- (0.1, "p >= 10%"),
13
- (0.01, "p >= 1%"),
14
- (1e-20, "p < 1%"),
15
- ]
16
-
17
- label_to_color = {
18
- "p >= 10%": "green",
19
- "p >= 1%": "yellow",
20
- "p < 1%": "red"
21
- }
22
-
23
- def get_tokens_and_scores(prompt):
24
- inputs = tokenizer([prompt], return_tensors="pt")
25
- outputs = model.generate(**inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True)
26
- transition_scores = model.compute_transition_scores(
27
- outputs.sequences, outputs.scores, normalize_logits=True
28
- )
29
- transition_proba = np.exp(transition_scores)
30
- input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
31
- generated_tokens = outputs.sequences[:, input_length:]
32
- highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
33
-
34
- for token, proba in zip(generated_tokens[0], transition_proba[0]):
35
- this_label = None
36
- assert 0. <= proba <= 1.0
37
- for min_proba, label in probs_to_label:
38
- if proba >= min_proba:
39
- this_label = label
40
- break
41
- highlighted_out.append((tokenizer.decode(token), this_label))
42
-
43
- return highlighted_out
44
-
45
-
46
- demo = gr.Interface(
47
- get_tokens_and_scores,
48
- [
49
- gr.Textbox(
50
- label="Prompt",
51
- lines=3,
52
- value="Today is",
53
- ),
54
- ],
55
- gr.HighlightedText(
56
- label="Highlighted generation",
57
- combine_adjacent=True,
58
- show_legend=True,
59
- ).style(color_map=label_to_color),
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  if __name__ == "__main__":
62
  demo.launch()
 
3
  from transformers import GPT2Tokenizer, AutoModelForCausalLM
4
  import numpy as np
5
 
6
+
7
+ MODEL_NAME = "gpt2"
8
+
9
+
10
+ if __name__ == "__main__":
11
+ # Define your model and your tokenizer
12
+ tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
14
+ if tokenizer.pad_token_id is None:
15
+ tokenizer.pad_token_id = tokenizer.eos_token_id
16
+ model.config.pad_token_id = model.config.eos_token_id
17
+
18
+ # Define your color-coding labels; if prob > x, then label = y; Sorted in descending probability order!
19
+ probs_to_label = [
20
+ (0.1, "p >= 10%"),
21
+ (0.01, "p >= 1%"),
22
+ (1e-20, "p < 1%"),
23
+ ]
24
+
25
+ label_to_color = {
26
+ "p >= 10%": "green",
27
+ "p >= 1%": "yellow",
28
+ "p < 1%": "red"
29
+ }
30
+
31
+ def get_tokens_and_labels(prompt):
32
+ """
33
+ Given the prompt (text), return a list of tuples (decoded_token, label)
34
+ """
35
+ inputs = tokenizer([prompt], return_tensors="pt")
36
+ outputs = model.generate(
37
+ **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True
38
+ )
39
+ # Important, don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
40
+ transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
41
+ transition_proba = np.exp(transition_scores)
42
+ # We only have scores for the generated tokens, so pop out the prompt tokens
43
+ input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
44
+ generated_tokens = outputs.sequences[:, input_length:]
45
+
46
+ # initialize the highlighted output with the prompt, which will have no color label
47
+ highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
48
+ # get the (decoded_token, label) pairs for the generated tokens
49
+ for token, proba in zip(generated_tokens[0], transition_proba[0]):
50
+ this_label = None
51
+ assert 0. <= proba <= 1.0
52
+ for min_proba, label in probs_to_label:
53
+ if proba >= min_proba:
54
+ this_label = label
55
+ break
56
+ highlighted_out.append((tokenizer.decode(token), this_label))
57
+
58
+ return highlighted_out
59
+
60
+ demo = gr.Blocks()
61
+ with demo:
62
+ gr.Markdown(
63
+ """
64
+ # Foo Bar
65
+ """
66
+ )
67
+
68
+ prompt = gr.Textbox(label="Prompt", lines=3, value="Today is")
69
+ highlighted_text = gr.HighlightedText(
70
+ label="Highlighted generation",
71
+ combine_adjacent=True,
72
+ show_legend=True,
73
+ ).style(color_map=label_to_color),
74
+ button = gr.Button(f"Generate with {MODEL_NAME}")
75
+
76
+ button.click(get_tokens_and_labels, inputs=prompt, outputs=highlighted_text)
77
+
78
+
79
  if __name__ == "__main__":
80
  demo.launch()