joaogante HF staff commited on
Commit
282bfce
β€’
1 Parent(s): e2ed84a

change demo from gpt2 to flan-t5

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -1,16 +1,16 @@
1
  import gradio as gr
2
 
3
- from transformers import GPT2Tokenizer, AutoModelForCausalLM
4
  import numpy as np
5
 
6
 
7
- MODEL_NAME = "gpt2"
8
 
9
 
10
  if __name__ == "__main__":
11
  # Define your model and your tokenizer
12
- tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
13
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
14
  if tokenizer.pad_token_id is None:
15
  tokenizer.pad_token_id = tokenizer.eos_token_id
16
  model.config.pad_token_id = model.config.eos_token_id
@@ -34,7 +34,7 @@ if __name__ == "__main__":
34
  """
35
  inputs = tokenizer([prompt], return_tensors="pt")
36
  outputs = model.generate(
37
- **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True
38
  )
39
  # Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
40
  transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
@@ -43,8 +43,11 @@ if __name__ == "__main__":
43
  input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
44
  generated_tokens = outputs.sequences[:, input_length:]
45
 
46
- # Initialize the highlighted output with the prompt, which will have no color label
47
- highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
 
 
 
48
  # Get the (decoded_token, label) pairs for the generated tokens
49
  for token, proba in zip(generated_tokens[0], transition_proba[0]):
50
  this_label = None
@@ -64,18 +67,18 @@ if __name__ == "__main__":
64
  # 🌈 Color Coded Text Generation 🌈
65
 
66
  This is a demo of how you can obtain the probabilities of each generated token, and use them to
67
- color code the model output.
68
- Feel free to clone this demo and modify it to your needs πŸ€—
69
-
70
- Internally, it relies on [`compute_transition_scores`](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.compute_transition_scores),
71
  which was added in `transformers` v4.26.0.
 
 
72
  """
73
  )
74
 
75
  with gr.Row():
76
  with gr.Column():
77
- prompt = gr.Textbox(label="Prompt", lines=3, value="Today is")
78
- button = gr.Button(f"Generate with {MODEL_NAME}, using sampling!")
79
  with gr.Column():
80
  highlighted_text = gr.HighlightedText(
81
  label="Highlighted generation",
 
1
  import gradio as gr
2
 
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import numpy as np
5
 
6
 
7
+ MODEL_NAME = "google/flan-t5-base"
8
 
9
 
10
  if __name__ == "__main__":
11
  # Define your model and your tokenizer
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) # or AutoModelForCausalLM
14
  if tokenizer.pad_token_id is None:
15
  tokenizer.pad_token_id = tokenizer.eos_token_id
16
  model.config.pad_token_id = model.config.eos_token_id
 
34
  """
35
  inputs = tokenizer([prompt], return_tensors="pt")
36
  outputs = model.generate(
37
+ **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True
38
  )
39
  # Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
40
  transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
 
43
  input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
44
  generated_tokens = outputs.sequences[:, input_length:]
45
 
46
+ # On decoder-only models, you might want to initialize the highlighted output with the prompt (wo labels)
47
+ if model.config.is_encoder_decoder:
48
+ highlighted_out = []
49
+ else:
50
+ highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
51
  # Get the (decoded_token, label) pairs for the generated tokens
52
  for token, proba in zip(generated_tokens[0], transition_proba[0]):
53
  this_label = None
 
67
  # 🌈 Color Coded Text Generation 🌈
68
 
69
  This is a demo of how you can obtain the probabilities of each generated token, and use them to
70
+ color code the model output. Internally, it relies on
71
+ [`compute_transition_scores`](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.compute_transition_scores),
 
 
72
  which was added in `transformers` v4.26.0.
73
+
74
+ πŸ€— Feel free to clone this demo and modify it to your needs πŸ€—
75
  """
76
  )
77
 
78
  with gr.Row():
79
  with gr.Column():
80
+ prompt = gr.Textbox(label="Prompt", lines=3, value="Translate to English: omelette du fromage")
81
+ button = gr.Button(f"Generate with {MODEL_NAME}")
82
  with gr.Column():
83
  highlighted_text = gr.HighlightedText(
84
  label="Highlighted generation",