ysharma HF staff commited on
Commit
b027e38
1 Parent(s): 7bdb82c

enable chat sharing

Browse files
Files changed (1) hide show
  1. app_dialogue.py +113 -18
app_dialogue.py CHANGED
@@ -9,6 +9,8 @@ from urllib.parse import urlparse
9
 
10
  import gradio as gr
11
  import PIL
 
 
12
  from accelerate.utils import get_max_memory, set_seed
13
  from PIL import Image
14
  from transformers import AutoConfig, AutoProcessor, IdeficsForVisionText2Text
@@ -59,7 +61,88 @@ logger = logging.getLogger()
59
  SEED = 38
60
  set_seed(38)
61
 
62
- def convert_to_rgb(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
64
  # for transparent images. The call to `alpha_composite` handles this case
65
  if image.mode == "RGB":
@@ -73,7 +156,7 @@ def convert_to_rgb(image):
73
 
74
 
75
  # Conversion between PIL Image <-> base64 <-> Markdown utils
76
- def pil_to_base64(pil_image):
77
  """
78
  Convert an PIL image into base64 string representation
79
  """
@@ -83,7 +166,7 @@ def pil_to_base64(pil_image):
83
  return encoded_image
84
 
85
 
86
- def pil_to_markdown_im(image):
87
  """
88
  Convert a PIL image into markdown filled with the base64 string representation.
89
  """
@@ -92,13 +175,13 @@ def pil_to_markdown_im(image):
92
  return img_str
93
 
94
 
95
- def base64_to_pil(encoded_image):
96
  decoded_image = base64.b64decode(encoded_image)
97
  pil_image = Image.open(BytesIO(decoded_image))
98
  return pil_image
99
 
100
 
101
- def im_markdown_to_pil(im_markdown_str):
102
  pattern = r'<img src="data:image/png;base64,([^"]+)" />'
103
  match = re.search(pattern, im_markdown_str)
104
  img_b64_str = match.group(1)
@@ -159,12 +242,15 @@ def isolate_images_urls(prompt_list):
159
  ]
160
  ```
161
  """
 
162
  linearized_list = []
163
  for prompt in prompt_list:
164
  # Prompt can be either a string, or a PIL image
165
  if isinstance(prompt, PIL.Image.Image):
166
  linearized_list.append(prompt)
167
- elif isinstance(prompt, str):
 
 
168
  if "<fake_token_around_image>" not in prompt:
169
  linearized_list.append(prompt)
170
  else:
@@ -212,9 +298,12 @@ def user_prompt_list_to_markdown(user_prompt_list: List[Union[str, PIL.Image.Ima
212
  resulting_string = ""
213
  for elem in user_prompt_list:
214
  if isinstance(elem, str):
215
- resulting_string += elem
216
- elif isinstance(elem, PIL.Image.Image):
217
- resulting_string += pil_to_markdown_im(convert_to_rgb(elem))
 
 
 
218
  else:
219
  raise ValueError(
220
  "Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
@@ -271,26 +360,25 @@ def format_user_prompt_with_im_history_and_system_conditioning(
271
  Produces the resulting list that needs to go inside the processor.
272
  It handles the potential image box input, the history and the system conditionning.
273
  """
 
274
  resulting_list = copy.deepcopy(SYSTEM_PROMPT)
275
 
276
  # Format history
277
  for turn in history:
278
  user_utterance, assistant_utterance = turn
279
  splitted_user_utterance = split_str_on_im_markdown(user_utterance)
280
- splitted_user_utterance = [
281
- im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
282
- for s in splitted_user_utterance
283
- if s != ""
284
- ]
285
  if isinstance(splitted_user_utterance[0], str):
286
  resulting_list.append("\nUser: ")
287
  else:
288
  resulting_list.append("\nUser:")
289
  resulting_list.extend(splitted_user_utterance)
290
  resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
 
291
 
292
  # Format current input
293
  current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
 
294
  if current_image is None:
295
  if "<img src=data:image/png;base64" in current_user_prompt_str:
296
  raise ValueError("The UI does not support inputing via the text box an image in base64.")
@@ -300,8 +388,8 @@ def format_user_prompt_with_im_history_and_system_conditioning(
300
  resulting_list.append("<end_of_utterance>\nAssistant:")
301
  return resulting_list, current_user_prompt_list
302
  else:
303
- # Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
304
- resulting_list.extend(["\nUser:", current_image, f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"])
305
  return resulting_list, [current_user_prompt_str]
306
 
307
 
@@ -457,7 +545,14 @@ textbox = gr.Textbox(
457
  container=False,
458
  label="Text input",
459
  )
460
- with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
 
 
 
 
 
 
 
461
  gr.Markdown(
462
  """
463
  # IDEFICS
@@ -484,7 +579,7 @@ with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
484
  )
485
  processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
486
 
487
- imagebox = gr.Image(type="pil", label="Image input")
488
 
489
  with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
490
  max_new_tokens = gr.Slider(
 
9
 
10
  import gradio as gr
11
  import PIL
12
+ import uuid
13
+ import requests
14
  from accelerate.utils import get_max_memory, set_seed
15
  from PIL import Image
16
  from transformers import AutoConfig, AutoProcessor, IdeficsForVisionText2Text
 
61
  SEED = 38
62
  set_seed(38)
63
 
64
+
65
+ def convert_to_rgb_pil(image):
66
+ """
67
+ Convert a PIL Image object to RGB mode and save it locally.
68
+
69
+ The function ensures that images with transparency (alpha channel)
70
+ are overlaid on a white background before saving.
71
+
72
+ Parameters:
73
+ - image (PIL.Image.Image): The input image to be processed.
74
+
75
+ Returns:
76
+ - str: The path to the saved RGB image.
77
+
78
+ """
79
+ # Save the converted image to a temporary file
80
+ filename = f"{uuid.uuid4()}.jpg"
81
+ local_path = f"{filename}"
82
+
83
+ if image.mode != "RGB":
84
+ image_rgba = image.convert("RGBA")
85
+ background = Image.new("RGBA", image_rgba.size, (255, 255, 255))
86
+ alpha_composite = Image.alpha_composite(background, image_rgba)
87
+ alpha_composite = alpha_composite.convert("RGB")
88
+ alpha_composite.save(local_path)
89
+ else:
90
+ image.save(local_path)
91
+
92
+ return local_path # Return the path to the saved image
93
+
94
+
95
+ def convert_to_rgb(filepath_or_pilimg):
96
+ """
97
+ Convert an image to RGB mode, handling transparency for non-RGB images.
98
+
99
+ This function can accept either a file path to an image or a PIL Image object.
100
+ For transparent images, the function overlays the image onto a white background
101
+ to handle the transparency before converting it to RGB mode.
102
+
103
+ Parameters:
104
+ - filepath_or_pilimg (str or PIL.Image.Image): The file path to an image or a PIL
105
+ Image object to be processed.
106
+
107
+ Returns:
108
+ - str: If the input was a file path, the return will be the path to the original
109
+ image (if it's already in RGB) or the path to the saved RGB image.
110
+ If the input was a PIL Image object, the return will be the path to the saved
111
+ RGB image.
112
+
113
+ """
114
+ # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
115
+ # for transparent images. The call to `alpha_composite` handles this case
116
+
117
+ if isinstance(filepath_or_pilimg, PIL.Image.Image):
118
+ return convert_to_rgb_pil(filepath_or_pilimg)
119
+
120
+ with Image.open(filepath_or_pilimg) as image:
121
+ # Check if the image is already in the RGB format
122
+ if image.mode == "RGB":
123
+ return filepath_or_pilimg # If already in RGB, return the original path
124
+
125
+ # Convert image to RGBA
126
+ image_rgba = image.convert("RGBA")
127
+
128
+ # Create a white background image of the same size
129
+ background = Image.new("RGBA", image_rgba.size, (255, 255, 255))
130
+
131
+ # Composite the original image over the white background
132
+ alpha_composite = Image.alpha_composite(background, image_rgba)
133
+
134
+ # Convert the composited image to RGB format
135
+ alpha_composite = alpha_composite.convert("RGB")
136
+
137
+ # Save the converted image to a temporary file
138
+ filename = f"{uuid.uuid4()}.jpg"
139
+ local_path = f"{filename}"
140
+ alpha_composite.save(local_path)
141
+
142
+ return local_path # Return the path to the saved image
143
+
144
+
145
+ def tmp_convert_to_rgb(image):
146
  # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
147
  # for transparent images. The call to `alpha_composite` handles this case
148
  if image.mode == "RGB":
 
156
 
157
 
158
  # Conversion between PIL Image <-> base64 <-> Markdown utils
159
+ def tmp_pil_to_base64(pil_image):
160
  """
161
  Convert an PIL image into base64 string representation
162
  """
 
166
  return encoded_image
167
 
168
 
169
+ def tmp_pil_to_markdown_im(image):
170
  """
171
  Convert a PIL image into markdown filled with the base64 string representation.
172
  """
 
175
  return img_str
176
 
177
 
178
+ def tmp_base64_to_pil(encoded_image):
179
  decoded_image = base64.b64decode(encoded_image)
180
  pil_image = Image.open(BytesIO(decoded_image))
181
  return pil_image
182
 
183
 
184
+ def tmp_im_markdown_to_pil(im_markdown_str):
185
  pattern = r'<img src="data:image/png;base64,([^"]+)" />'
186
  match = re.search(pattern, im_markdown_str)
187
  img_b64_str = match.group(1)
 
242
  ]
243
  ```
244
  """
245
+
246
  linearized_list = []
247
  for prompt in prompt_list:
248
  # Prompt can be either a string, or a PIL image
249
  if isinstance(prompt, PIL.Image.Image):
250
  linearized_list.append(prompt)
251
+ elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
252
+ linearized_list.append(prompt)
253
+ elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
254
  if "<fake_token_around_image>" not in prompt:
255
  linearized_list.append(prompt)
256
  else:
 
298
  resulting_string = ""
299
  for elem in user_prompt_list:
300
  if isinstance(elem, str):
301
+ if "/tmp/gradio/" not in elem:
302
+ resulting_string += elem
303
+ elif "/tmp/gradio/" in elem:
304
+ resulting_string += f"![](/file={convert_to_rgb(elem)})"
305
+ elif isinstance(elem, PIL.Image.Image):
306
+ resulting_string += f"![](/file={convert_to_rgb(elem)})"
307
  else:
308
  raise ValueError(
309
  "Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
 
360
  Produces the resulting list that needs to go inside the processor.
361
  It handles the potential image box input, the history and the system conditionning.
362
  """
363
+
364
  resulting_list = copy.deepcopy(SYSTEM_PROMPT)
365
 
366
  # Format history
367
  for turn in history:
368
  user_utterance, assistant_utterance = turn
369
  splitted_user_utterance = split_str_on_im_markdown(user_utterance)
370
+
 
 
 
 
371
  if isinstance(splitted_user_utterance[0], str):
372
  resulting_list.append("\nUser: ")
373
  else:
374
  resulting_list.append("\nUser:")
375
  resulting_list.extend(splitted_user_utterance)
376
  resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
377
+
378
 
379
  # Format current input
380
  current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
381
+
382
  if current_image is None:
383
  if "<img src=data:image/png;base64" in current_user_prompt_str:
384
  raise ValueError("The UI does not support inputing via the text box an image in base64.")
 
388
  resulting_list.append("<end_of_utterance>\nAssistant:")
389
  return resulting_list, current_user_prompt_list
390
  else:
391
+ # Choosing to put the image first when the image is inputted through the UI, but this is an arbitrary choice.
392
+ resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
393
  return resulting_list, [current_user_prompt_str]
394
 
395
 
 
545
  container=False,
546
  label="Text input",
547
  )
548
+
549
+ css = """
550
+ #chatbot {
551
+ background-image: url('https://huggingface.co/spaces/ysharma/m4-dialogue_copy4/resolve/main/idefics_720_2.png');
552
+ background-repeat: repeat;}
553
+ """
554
+
555
+ with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base(), css=css) as demo:
556
  gr.Markdown(
557
  """
558
  # IDEFICS
 
579
  )
580
  processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
581
 
582
+ imagebox = gr.Image(type="filepath", label="Image input")
583
 
584
  with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
585
  max_new_tokens = gr.Slider(