VictorSanh commited on
Commit
7df19dd
·
1 Parent(s): f10b974

Update visualization

Browse files
Files changed (2) hide show
  1. app_bis.py +7 -9
  2. app_dialogue.py +32 -71
app_bis.py CHANGED
@@ -1,7 +1,6 @@
1
  import logging
2
  import os
3
  import re
4
-
5
  import time
6
  from io import BytesIO
7
 
@@ -10,7 +9,6 @@ import requests
10
  import torch
11
  import transformers
12
  from accelerate.utils import get_max_memory
13
-
14
  from joblib import Parallel, delayed
15
  from PIL import Image
16
  from transformers import AutoTokenizer
@@ -699,17 +697,17 @@ with gr.Blocks() as demo:
699
  converted into real newline characters.
700
  See examples and additional details below.""")
701
 
702
- #gr.HTML("<h3 align='center'>Help to write prompts:🙌</h3><br>Put the urls to the images inside the image tokens, it will be converted into the real image tokens. Put <fake_token_around_image> before and after each image token WITHOUT space. The texts \\n will be converted into real newline characters. See examples and additional details below.")
703
- #gr.Markdown(MSG_MAIN)
704
- #with gr.Row():
705
- #with gr.Column():
706
  gr.Markdown("## Input")
707
  with gr.Row():
708
  if not IS_MAIN_SPACE:
709
  images = gr.File(label="Images", file_count="multiple")
710
  prompt = gr.Textbox(label="Prompt", placeholder="Enter the prompt here", lines=5)
711
 
712
- #gr.Markdown("## Common parameters to all decoding strategy")
713
  with gr.Row():
714
  with gr.Accordion("Common parameters to all decoding strategy", open=False, elem_id="common_params"):
715
  temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Softmax temperature")
@@ -751,7 +749,7 @@ with gr.Blocks() as demo:
751
  label="Stop generation when an image token, a bos or a eos token is generated", value=False
752
  )
753
 
754
- #gr.Markdown("## Decoding strategy and its specific parameters")
755
  with gr.Accordion("Decoding strategy and its specific parameters", open=False, elem_id="decoding_params"):
756
  decoding_strategy = gr.Dropdown(
757
  ["greedy", "beam_search", "beam_sampling", "sampling_top_k", "sampling_top_p", "contrastive_sampling"],
@@ -793,7 +791,7 @@ with gr.Blocks() as demo:
793
 
794
  submit = gr.Button(label="Generate")
795
 
796
- #with gr.Column():
797
  with gr.Row():
798
  if IS_MAIN_SPACE:
799
  outputs = [
 
1
  import logging
2
  import os
3
  import re
 
4
  import time
5
  from io import BytesIO
6
 
 
9
  import torch
10
  import transformers
11
  from accelerate.utils import get_max_memory
 
12
  from joblib import Parallel, delayed
13
  from PIL import Image
14
  from transformers import AutoTokenizer
 
697
  converted into real newline characters.
698
  See examples and additional details below.""")
699
 
700
+ # gr.HTML("<h3 align='center'>Help to write prompts:🙌</h3><br>Put the urls to the images inside the image tokens, it will be converted into the real image tokens. Put <fake_token_around_image> before and after each image token WITHOUT space. The texts \\n will be converted into real newline characters. See examples and additional details below.")
701
+ # gr.Markdown(MSG_MAIN)
702
+ # with gr.Row():
703
+ # with gr.Column():
704
  gr.Markdown("## Input")
705
  with gr.Row():
706
  if not IS_MAIN_SPACE:
707
  images = gr.File(label="Images", file_count="multiple")
708
  prompt = gr.Textbox(label="Prompt", placeholder="Enter the prompt here", lines=5)
709
 
710
+ # gr.Markdown("## Common parameters to all decoding strategy")
711
  with gr.Row():
712
  with gr.Accordion("Common parameters to all decoding strategy", open=False, elem_id="common_params"):
713
  temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Softmax temperature")
 
749
  label="Stop generation when an image token, a bos or a eos token is generated", value=False
750
  )
751
 
752
+ # gr.Markdown("## Decoding strategy and its specific parameters")
753
  with gr.Accordion("Decoding strategy and its specific parameters", open=False, elem_id="decoding_params"):
754
  decoding_strategy = gr.Dropdown(
755
  ["greedy", "beam_search", "beam_sampling", "sampling_top_k", "sampling_top_p", "contrastive_sampling"],
 
791
 
792
  submit = gr.Button(label="Generate")
793
 
794
+ # with gr.Column():
795
  with gr.Row():
796
  if IS_MAIN_SPACE:
797
  outputs = [
app_dialogue.py CHANGED
@@ -1,12 +1,11 @@
1
  import os
2
 
3
  import gradio as gr
4
- import requests
5
 
6
 
7
  models = [
8
- "HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
9
- # "HuggingFaceM4/tr_210_ift_mixture_opt_step-2500",
10
  ]
11
 
12
  SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
@@ -31,18 +30,9 @@ BAN_TOKENS = "<image>;<fake_token_around_image>"
31
  EOS_TOKENS = "</s>;User"
32
 
33
  import logging
34
- import re
35
- from io import BytesIO
36
 
37
- import torch
38
  from accelerate.utils import get_max_memory
39
- from PIL import Image
40
- from transformers import AutoTokenizer
41
-
42
- from m4.models.vllama.configuration_vllama import VLlamaConfig
43
- from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
44
- from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
45
- from m4.training.utils import build_image_transform
46
 
47
 
48
  TOKENIZER_FAST = True
@@ -52,7 +42,12 @@ logging.basicConfig(level=logging.INFO)
52
  logger = logging.getLogger()
53
 
54
 
55
- def load_tokenizer_model(model_name):
 
 
 
 
 
56
  tokenizer = AutoTokenizer.from_pretrained(
57
  model_name,
58
  use_fast=TOKENIZER_FAST,
@@ -61,7 +56,7 @@ def load_tokenizer_model(model_name):
61
  )
62
  # tokenizer.padding_side = "left" -> we don't need that, do we?
63
 
64
- config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
65
  max_memory_map = get_max_memory()
66
 
67
  for key in max_memory_map.keys():
@@ -71,7 +66,7 @@ def load_tokenizer_model(model_name):
71
  # Decrease 2 for Pytorch overhead and 2 for the forward to be safe
72
  max_memory_map[key] = f"{max_memory_map[key] - 4} GiB"
73
 
74
- model = VLlamaForCausalLM.from_pretrained(
75
  model_name,
76
  use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
77
  device_map="auto",
@@ -83,28 +78,23 @@ def load_tokenizer_model(model_name):
83
  print("Current device map:", model.hf_device_map)
84
  print("Model default generation config:", model.generation_config)
85
  # TODO: the device_map looks very inefficien right now. that could be improved
86
- return tokenizer, model
87
 
88
 
89
- def fetch_images(url_images):
90
- headers = {
91
- "User-Agent": (
92
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0"
93
- " Safari/537.36"
94
- )
95
- }
96
- images = []
97
- for url in url_images:
98
- if isinstance(url, str):
99
- images.append(Image.open(BytesIO(requests.get(url, stream=True, headers=headers).content)))
100
  else:
101
- images.append(url)
102
- return images
103
-
104
 
105
  def model_generation(
106
  prompt,
107
- images,
108
  tokenizer,
109
  model,
110
  temperature,
@@ -123,31 +113,15 @@ def model_generation(
123
  top_p,
124
  penalty_alpha,
125
  ):
126
- # Preparing inputs
127
- tokens = tokenizer(
128
- [prompt],
129
  truncation=True,
130
  max_length=MAX_SEQ_LEN - 512, # TODO: replace the 512 value with `max_new_tokens`
131
  padding=True,
132
- add_special_tokens=False,
133
  )
134
-
135
- input_ids = torch.tensor([[tokenizer.bos_token_id] + tokens.input_ids[0]])
136
- attention_mask = torch.tensor([[1] + tokens.attention_mask[0]])
137
-
138
- image_attention_mask = [
139
- incremental_to_binary_attention_mask(
140
- image_attention_mask_for_packed_input_ids(input_ids[0].unsqueeze(0), tokenizer)[0], num_classes=len(images)
141
- )
142
- ]
143
-
144
- image_transform = build_image_transform(eval=True)
145
- pixel_values = [torch.stack([image_transform(img) for img in images])]
146
-
147
- input_ids = input_ids.to(0)
148
- attention_mask = attention_mask.to(0)
149
- pixel_values = torch.stack(pixel_values).to(0)
150
- image_attention_mask = torch.cat(image_attention_mask, 0).to(0)
151
 
152
  # Excluding some words from the generation
153
  bad_words_ids = None
@@ -179,13 +153,6 @@ def model_generation(
179
  )
180
  eos_token_ids += tokenized_eos_token
181
 
182
- # Inputs
183
- input_args = {
184
- "input_ids": input_ids,
185
- "attention_mask": attention_mask,
186
- "pixel_values": pixel_values,
187
- "image_attention_mask": image_attention_mask,
188
- }
189
  # Common parameters to all decoding strategies
190
  # This documentation is useful to read: https://huggingface.co/docs/transformers/main/en/generation_strategies
191
  generation_args = {
@@ -239,7 +206,7 @@ def model_generation(
239
  tokenizer.batch_decode(generated_tokens, skip_special_tokens=hide_special_tokens)[0]
240
  )
241
 
242
- actual_generated_tokens = generated_tokens[:, input_ids.shape[-1] :]
243
  first_end_token = len(actual_generated_tokens[0])
244
  actual_generated_tokens = actual_generated_tokens[:, :first_end_token]
245
  generated_text = tokenizer.batch_decode(actual_generated_tokens, skip_special_tokens=hide_special_tokens)[0]
@@ -285,7 +252,7 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
285
  show_label=False,
286
  container=False,
287
  )
288
- tokenizer, model = load_tokenizer_model(model_selector.value)
289
 
290
  imagebox = gr.Image(
291
  type="pil",
@@ -329,7 +296,7 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
329
  elem_id="chatbot",
330
  label="Idefics Chatbot",
331
  visible=True,
332
- height=550,
333
  value=[
334
  [
335
  (
@@ -391,7 +358,7 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
391
  user_prompt,
392
  chat_history,
393
  ):
394
- global model, tokenizer
395
 
396
  temperature = 1.0
397
  no_repeat_ngram_size = 0
@@ -412,15 +379,9 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
412
  history=chat_history,
413
  )
414
 
415
- url_images = re.findall(r"<image(.*?)>", formated_prompt)
416
- for idx, url_image in enumerate(url_images):
417
- formated_prompt = formated_prompt.replace(url_image, "")
418
- url_images[idx] = url_images[idx][1:]
419
- images = fetch_images(url_images)
420
-
421
  generated_text = model_generation(
422
  prompt=formated_prompt,
423
- images=images,
424
  tokenizer=tokenizer,
425
  model=model,
426
  temperature=temperature,
 
1
  import os
2
 
3
  import gradio as gr
 
4
 
5
 
6
  models = [
7
+ "HuggingFaceM4/idefics-9b-instruct",
8
+ # "HuggingFaceM4/idefics-80b-instruct",
9
  ]
10
 
11
  SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
 
30
  EOS_TOKENS = "</s>;User"
31
 
32
  import logging
 
 
33
 
 
34
  from accelerate.utils import get_max_memory
35
+ from transformers import AutoTokenizer, AutoProcessor, AutoConfig, AutoModelForCausalLM
 
 
 
 
 
 
36
 
37
 
38
  TOKENIZER_FAST = True
 
42
  logger = logging.getLogger()
43
 
44
 
45
+ def load_processor_tokenizer_model(model_name):
46
+ processor = AutoProcessor.from_pretrained(
47
+ model_name,
48
+ use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
49
+ truncation_side="left",
50
+ )
51
  tokenizer = AutoTokenizer.from_pretrained(
52
  model_name,
53
  use_fast=TOKENIZER_FAST,
 
56
  )
57
  # tokenizer.padding_side = "left" -> we don't need that, do we?
58
 
59
+ config = AutoConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
60
  max_memory_map = get_max_memory()
61
 
62
  for key in max_memory_map.keys():
 
66
  # Decrease 2 for Pytorch overhead and 2 for the forward to be safe
67
  max_memory_map[key] = f"{max_memory_map[key] - 4} GiB"
68
 
69
+ model = AutoModelForCausalLM.from_pretrained(
70
  model_name,
71
  use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
72
  device_map="auto",
 
78
  print("Current device map:", model.hf_device_map)
79
  print("Model default generation config:", model.generation_config)
80
  # TODO: the device_map looks very inefficien right now. that could be improved
81
+ return processor, tokenizer, model
82
 
83
 
84
+ def split_prompt_into_list(prompt_str):
85
+ """Convert a full string prompt to the list format expected by the processor."""
86
+ prompt_splitted = prompt_str.split("<fake_token_around_image>")
87
+ prompt_list = []
88
+ for ps in prompt_splitted:
89
+ if ps.startswith("<image:"):
90
+ prompt_list.append(ps[7:-1])
 
 
 
 
91
  else:
92
+ prompt_list.append(ps)
93
+ return prompt_list
 
94
 
95
  def model_generation(
96
  prompt,
97
+ processor,
98
  tokenizer,
99
  model,
100
  temperature,
 
113
  top_p,
114
  penalty_alpha,
115
  ):
116
+ input_args = processor(
117
+ [split_prompt_into_list(prompt)],
118
+ eval_mode=True,
119
  truncation=True,
120
  max_length=MAX_SEQ_LEN - 512, # TODO: replace the 512 value with `max_new_tokens`
121
  padding=True,
 
122
  )
123
+ for k, v in input_args.items():
124
+ input_args[k] = v.to(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # Excluding some words from the generation
127
  bad_words_ids = None
 
153
  )
154
  eos_token_ids += tokenized_eos_token
155
 
 
 
 
 
 
 
 
156
  # Common parameters to all decoding strategies
157
  # This documentation is useful to read: https://huggingface.co/docs/transformers/main/en/generation_strategies
158
  generation_args = {
 
206
  tokenizer.batch_decode(generated_tokens, skip_special_tokens=hide_special_tokens)[0]
207
  )
208
 
209
+ actual_generated_tokens = generated_tokens[:, input_args["input_ids"].shape[-1] :]
210
  first_end_token = len(actual_generated_tokens[0])
211
  actual_generated_tokens = actual_generated_tokens[:, :first_end_token]
212
  generated_text = tokenizer.batch_decode(actual_generated_tokens, skip_special_tokens=hide_special_tokens)[0]
 
252
  show_label=False,
253
  container=False,
254
  )
255
+ processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
256
 
257
  imagebox = gr.Image(
258
  type="pil",
 
296
  elem_id="chatbot",
297
  label="Idefics Chatbot",
298
  visible=True,
299
+ height=750,
300
  value=[
301
  [
302
  (
 
358
  user_prompt,
359
  chat_history,
360
  ):
361
+ global processor, model, tokenizer
362
 
363
  temperature = 1.0
364
  no_repeat_ngram_size = 0
 
379
  history=chat_history,
380
  )
381
 
 
 
 
 
 
 
382
  generated_text = model_generation(
383
  prompt=formated_prompt,
384
+ processor=processor,
385
  tokenizer=tokenizer,
386
  model=model,
387
  temperature=temperature,