Saibo Geng commited on
Commit
5204c67
β€’
1 Parent(s): f026dba

use Gante GPT2 code

Browse files
Files changed (1) hide show
  1. app.py +18 -36
app.py CHANGED
@@ -1,16 +1,14 @@
1
  import gradio as gr
2
 
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import numpy as np
5
 
6
-
7
  MODEL_NAME = "gpt2"
8
 
9
-
10
  if __name__ == "__main__":
11
  # Define your model and your tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) # or AutoModelForCausalLM
14
  if tokenizer.pad_token_id is None:
15
  tokenizer.pad_token_id = tokenizer.eos_token_id
16
  model.config.pad_token_id = model.config.eos_token_id
@@ -28,70 +26,54 @@ if __name__ == "__main__":
28
  "p < 1%": "red"
29
  }
30
 
 
31
  def get_tokens_and_labels(prompt):
32
  """
33
  Given the prompt (text), return a list of tuples (decoded_token, label)
34
  """
35
  inputs = tokenizer([prompt], return_tensors="pt")
36
  outputs = model.generate(
37
- **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True
38
  )
39
  # Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
40
  transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
41
  transition_proba = np.exp(transition_scores)
42
  # We only have scores for the generated tokens, so pop out the prompt tokens
43
  input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
44
- generated_ids = outputs.sequences[:, input_length:]
45
- generated_tokens = tokenizer.convert_ids_to_tokens(generated_ids[0])
46
 
47
- # Important: you might need to find a tokenization character to replace (e.g. "Δ " for BPE) and get the correct
48
- # spacing into the final output πŸ‘Ό
49
- if model.config.is_encoder_decoder:
50
- highlighted_out = []
51
- else:
52
- input_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids)
53
- highlighted_out = [(token.replace("▁", " "), None) for token in input_tokens]
54
  # Get the (decoded_token, label) pairs for the generated tokens
55
- for token, proba in zip(generated_tokens, transition_proba[0]):
56
  this_label = None
57
  assert 0. <= proba <= 1.0
58
  for min_proba, label in probs_to_label:
59
  if proba >= min_proba:
60
  this_label = label
61
  break
62
- highlighted_out.append((token.replace("▁", " "), this_label))
63
 
64
  return highlighted_out
65
 
 
66
  demo = gr.Blocks()
67
  with demo:
68
  gr.Markdown(
69
  """
70
- # 🌈 Color-Coded Text Generation 🌈
71
-
72
  This is a demo of how you can obtain the probabilities of each generated token, and use them to
73
- color code the model output. Internally, it relies on
74
- [`compute_transition_scores`](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.compute_transition_scores),
 
75
  which was added in `transformers` v4.26.0.
76
-
77
- ⚠️ For instance, with the pre-populated input and its color-coded output, you can see that
78
- `google/flan-t5-base` struggles with arithmetics.
79
-
80
- πŸ€— Feel free to clone this demo and modify it to your needs πŸ€—
81
  """
82
  )
83
 
84
  with gr.Row():
85
  with gr.Column():
86
- prompt = gr.Textbox(
87
- label="Prompt",
88
- lines=3,
89
- value=(
90
- "Answer the following question by reasoning step-by-step. The cafeteria had 23 apples. "
91
- "If they used 20 for lunch and bought 6 more, how many apples do they have?"
92
- ),
93
- )
94
- button = gr.Button(f"Generate with {MODEL_NAME}")
95
  with gr.Column():
96
  highlighted_text = gr.HighlightedText(
97
  label="Highlighted generation",
@@ -101,6 +83,6 @@ if __name__ == "__main__":
101
 
102
  button.click(get_tokens_and_labels, inputs=prompt, outputs=highlighted_text)
103
 
104
-
105
  if __name__ == "__main__":
106
  demo.launch()
 
 
1
  import gradio as gr
2
 
3
+ from transformers import GPT2Tokenizer, AutoModelForCausalLM
4
  import numpy as np
5
 
 
6
  MODEL_NAME = "gpt2"
7
 
 
8
  if __name__ == "__main__":
9
  # Define your model and your tokenizer
10
+ tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
12
  if tokenizer.pad_token_id is None:
13
  tokenizer.pad_token_id = tokenizer.eos_token_id
14
  model.config.pad_token_id = model.config.eos_token_id
 
26
  "p < 1%": "red"
27
  }
28
 
29
+
30
  def get_tokens_and_labels(prompt):
31
  """
32
  Given the prompt (text), return a list of tuples (decoded_token, label)
33
  """
34
  inputs = tokenizer([prompt], return_tensors="pt")
35
  outputs = model.generate(
36
+ **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True
37
  )
38
  # Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
39
  transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
40
  transition_proba = np.exp(transition_scores)
41
  # We only have scores for the generated tokens, so pop out the prompt tokens
42
  input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
43
+ generated_tokens = outputs.sequences[:, input_length:]
 
44
 
45
+ # Initialize the highlighted output with the prompt, which will have no color label
46
+ highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
 
 
 
 
 
47
  # Get the (decoded_token, label) pairs for the generated tokens
48
+ for token, proba in zip(generated_tokens[0], transition_proba[0]):
49
  this_label = None
50
  assert 0. <= proba <= 1.0
51
  for min_proba, label in probs_to_label:
52
  if proba >= min_proba:
53
  this_label = label
54
  break
55
+ highlighted_out.append((tokenizer.decode(token), this_label))
56
 
57
  return highlighted_out
58
 
59
+
60
  demo = gr.Blocks()
61
  with demo:
62
  gr.Markdown(
63
  """
64
+ # 🌈 Color Coded Text Generation 🌈
 
65
  This is a demo of how you can obtain the probabilities of each generated token, and use them to
66
+ color code the model output.
67
+ Feel free to clone this demo and modify it to your needs πŸ€—
68
+ Internally, it relies on [`compute_transition_scores`](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.compute_transition_scores),
69
  which was added in `transformers` v4.26.0.
 
 
 
 
 
70
  """
71
  )
72
 
73
  with gr.Row():
74
  with gr.Column():
75
+ prompt = gr.Textbox(label="Prompt", lines=3, value="Today is")
76
+ button = gr.Button(f"Generate with {MODEL_NAME}, using sampling!")
 
 
 
 
 
 
 
77
  with gr.Column():
78
  highlighted_text = gr.HighlightedText(
79
  label="Highlighted generation",
 
83
 
84
  button.click(get_tokens_and_labels, inputs=prompt, outputs=highlighted_text)
85
 
 
86
  if __name__ == "__main__":
87
  demo.launch()
88
+