Spaces:

HuggingFaceM4
/

idefics_playground

Runtime error

App Files Files Community

ysharma HF staff commited on Aug 14, 2023

Commit

b027e38

1 Parent(s): 7bdb82c

enable chat sharing

Browse files

Files changed (1) hide show

app_dialogue.py +113 -18

app_dialogue.py CHANGED Viewed

@@ -9,6 +9,8 @@ from urllib.parse import urlparse
 import gradio as gr
 import PIL
 from accelerate.utils import get_max_memory, set_seed
 from PIL import Image
 from transformers import AutoConfig, AutoProcessor, IdeficsForVisionText2Text
@@ -59,7 +61,88 @@ logger = logging.getLogger()
 SEED = 38
 set_seed(38)
-def convert_to_rgb(image):
     # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
     # for transparent images. The call to `alpha_composite` handles this case
     if image.mode == "RGB":
@@ -73,7 +156,7 @@ def convert_to_rgb(image):
 # Conversion between PIL Image <-> base64 <-> Markdown utils
-def pil_to_base64(pil_image):
     """
     Convert an PIL image into base64 string representation
     """
@@ -83,7 +166,7 @@ def pil_to_base64(pil_image):
     return encoded_image
-def pil_to_markdown_im(image):
     """
     Convert a PIL image into markdown filled with the base64 string representation.
     """
@@ -92,13 +175,13 @@ def pil_to_markdown_im(image):
     return img_str
-def base64_to_pil(encoded_image):
     decoded_image = base64.b64decode(encoded_image)
     pil_image = Image.open(BytesIO(decoded_image))
     return pil_image
-def im_markdown_to_pil(im_markdown_str):
     pattern = r'<img src="data:image/png;base64,([^"]+)" />'
     match = re.search(pattern, im_markdown_str)
     img_b64_str = match.group(1)
@@ -159,12 +242,15 @@ def isolate_images_urls(prompt_list):
     ]
     ```
     """
     linearized_list = []
     for prompt in prompt_list:
         # Prompt can be either a string, or a PIL image
         if isinstance(prompt, PIL.Image.Image):
             linearized_list.append(prompt)
-        elif isinstance(prompt, str):
             if "<fake_token_around_image>" not in prompt:
                 linearized_list.append(prompt)
             else:
@@ -212,9 +298,12 @@ def user_prompt_list_to_markdown(user_prompt_list: List[Union[str, PIL.Image.Ima
     resulting_string = ""
     for elem in user_prompt_list:
         if isinstance(elem, str):
-            resulting_string += elem
-        elif isinstance(elem, PIL.Image.Image):
-            resulting_string += pil_to_markdown_im(convert_to_rgb(elem))
         else:
             raise ValueError(
                 "Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
@@ -271,26 +360,25 @@ def format_user_prompt_with_im_history_and_system_conditioning(
     Produces the resulting list that needs to go inside the processor.
     It handles the potential image box input, the history and the system conditionning.
     """
     resulting_list = copy.deepcopy(SYSTEM_PROMPT)
     # Format history
     for turn in history:
         user_utterance, assistant_utterance = turn
         splitted_user_utterance = split_str_on_im_markdown(user_utterance)
-        splitted_user_utterance = [
-            im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
-            for s in splitted_user_utterance
-            if s != ""
-        ]
         if isinstance(splitted_user_utterance[0], str):
             resulting_list.append("\nUser: ")
         else:
             resulting_list.append("\nUser:")
         resulting_list.extend(splitted_user_utterance)
         resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
     # Format current input
     current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
     if current_image is None:
         if "<img src=data:image/png;base64" in current_user_prompt_str:
             raise ValueError("The UI does not support inputing via the text box an image in base64.")
@@ -300,8 +388,8 @@ def format_user_prompt_with_im_history_and_system_conditioning(
         resulting_list.append("<end_of_utterance>\nAssistant:")
         return resulting_list, current_user_prompt_list
     else:
-        # Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
-        resulting_list.extend(["\nUser:", current_image, f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"])
         return resulting_list, [current_user_prompt_str]
@@ -457,7 +545,14 @@ textbox = gr.Textbox(
     container=False,
     label="Text input",
 )
-with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
     gr.Markdown(
         """
         # IDEFICS
@@ -484,7 +579,7 @@ with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
                 )
             processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
-            imagebox = gr.Image(type="pil", label="Image input")
             with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
                 max_new_tokens = gr.Slider(

 import gradio as gr
 import PIL
+import uuid
+import requests
 from accelerate.utils import get_max_memory, set_seed
 from PIL import Image
 from transformers import AutoConfig, AutoProcessor, IdeficsForVisionText2Text
 SEED = 38
 set_seed(38)
+def convert_to_rgb_pil(image):
+    """
+    Convert a PIL Image object to RGB mode and save it locally.
+    The function ensures that images with transparency (alpha channel)
+    are overlaid on a white background before saving.
+    Parameters:
+    - image (PIL.Image.Image): The input image to be processed.
+    Returns:
+    - str: The path to the saved RGB image.
+    """
+    # Save the converted image to a temporary file
+    filename = f"{uuid.uuid4()}.jpg"
+    local_path = f"{filename}"
+    if image.mode != "RGB":
+        image_rgba = image.convert("RGBA")
+        background = Image.new("RGBA", image_rgba.size, (255, 255, 255))
+        alpha_composite = Image.alpha_composite(background, image_rgba)
+        alpha_composite = alpha_composite.convert("RGB")
+        alpha_composite.save(local_path)
+    else:
+        image.save(local_path)
+    return local_path  # Return the path to the saved image
+def convert_to_rgb(filepath_or_pilimg):
+    """
+    Convert an image to RGB mode, handling transparency for non-RGB images.
+    This function can accept either a file path to an image or a PIL Image object.
+    For transparent images, the function overlays the image onto a white background
+    to handle the transparency before converting it to RGB mode.
+    Parameters:
+    - filepath_or_pilimg (str or PIL.Image.Image): The file path to an image or a PIL
+                                                  Image object to be processed.
+    Returns:
+    - str: If the input was a file path, the return will be the path to the original
+           image (if it's already in RGB) or the path to the saved RGB image.
+           If the input was a PIL Image object, the return will be the path to the saved
+           RGB image.
+    """
+    # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
+    # for transparent images. The call to `alpha_composite` handles this case
+    if isinstance(filepath_or_pilimg, PIL.Image.Image):
+        return convert_to_rgb_pil(filepath_or_pilimg)
+    with Image.open(filepath_or_pilimg) as image:
+        # Check if the image is already in the RGB format
+        if image.mode == "RGB":
+            return filepath_or_pilimg  # If already in RGB, return the original path
+        # Convert image to RGBA
+        image_rgba = image.convert("RGBA")
+        # Create a white background image of the same size
+        background = Image.new("RGBA", image_rgba.size, (255, 255, 255))
+        # Composite the original image over the white background
+        alpha_composite = Image.alpha_composite(background, image_rgba)
+        # Convert the composited image to RGB format
+        alpha_composite = alpha_composite.convert("RGB")
+        # Save the converted image to a temporary file
+        filename = f"{uuid.uuid4()}.jpg"
+        local_path = f"{filename}"
+        alpha_composite.save(local_path)
+        return local_path  # Return the path to the saved image
+def tmp_convert_to_rgb(image):
     # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
     # for transparent images. The call to `alpha_composite` handles this case
     if image.mode == "RGB":
 # Conversion between PIL Image <-> base64 <-> Markdown utils
+def tmp_pil_to_base64(pil_image):
     """
     Convert an PIL image into base64 string representation
     """
     return encoded_image
+def tmp_pil_to_markdown_im(image):
     """
     Convert a PIL image into markdown filled with the base64 string representation.
     """
     return img_str
+def tmp_base64_to_pil(encoded_image):
     decoded_image = base64.b64decode(encoded_image)
     pil_image = Image.open(BytesIO(decoded_image))
     return pil_image
+def tmp_im_markdown_to_pil(im_markdown_str):
     pattern = r'<img src="data:image/png;base64,([^"]+)" />'
     match = re.search(pattern, im_markdown_str)
     img_b64_str = match.group(1)
     ]
     ```
     """
     linearized_list = []
     for prompt in prompt_list:
         # Prompt can be either a string, or a PIL image
         if isinstance(prompt, PIL.Image.Image):
             linearized_list.append(prompt)
+        elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
+            linearized_list.append(prompt)
+        elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
             if "<fake_token_around_image>" not in prompt:
                 linearized_list.append(prompt)
             else:
     resulting_string = ""
     for elem in user_prompt_list:
         if isinstance(elem, str):
+            if "/tmp/gradio/" not in elem:
+                resulting_string += elem
+            elif "/tmp/gradio/" in elem:
+                resulting_string += f"![](/file={convert_to_rgb(elem)})"
+        elif isinstance(elem, PIL.Image.Image):
+            resulting_string += f"![](/file={convert_to_rgb(elem)})"
         else:
             raise ValueError(
                 "Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
     Produces the resulting list that needs to go inside the processor.
     It handles the potential image box input, the history and the system conditionning.
     """
     resulting_list = copy.deepcopy(SYSTEM_PROMPT)
     # Format history
     for turn in history:
         user_utterance, assistant_utterance = turn
         splitted_user_utterance = split_str_on_im_markdown(user_utterance)
         if isinstance(splitted_user_utterance[0], str):
             resulting_list.append("\nUser: ")
         else:
             resulting_list.append("\nUser:")
         resulting_list.extend(splitted_user_utterance)
         resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
     # Format current input
     current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
     if current_image is None:
         if "<img src=data:image/png;base64" in current_user_prompt_str:
             raise ValueError("The UI does not support inputing via the text box an image in base64.")
         resulting_list.append("<end_of_utterance>\nAssistant:")
         return resulting_list, current_user_prompt_list
     else:
+        # Choosing to put the image first when the image is inputted through the UI, but this is an arbitrary choice.
+        resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
         return resulting_list, [current_user_prompt_str]
     container=False,
     label="Text input",
 )
+css = """
+#chatbot {
+background-image: url('https://huggingface.co/spaces/ysharma/m4-dialogue_copy4/resolve/main/idefics_720_2.png');
+background-repeat: repeat;}
+"""
+with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base(), css=css) as demo:
     gr.Markdown(
         """
         # IDEFICS
                 )
             processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
+            imagebox = gr.Image(type="filepath", label="Image input")
             with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
                 max_new_tokens = gr.Slider(