IDEFICS3_ROCO_ZeroGPU

Running on Zero

App Files Files Community

æLtorio commited on 9 days ago

Commit

1d6cff4

•

1 Parent(s): ca9f0b9

add decriptions

Browse files

Files changed (1) hide show

app.py +69 -20

app.py CHANGED Viewed

@@ -1,41 +1,90 @@
 import gradio as gr
 from transformers import AutoProcessor, Idefics3ForConditionalGeneration, image_utils
 import torch
 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-print(f"Using device: {device}")
-model_id="eltorio/IDEFICS3_ROCO"
-# model = AutoModelForImageTextToText.from_pretrained(model_id).to(device)
-base_model_path="HuggingFaceM4/Idefics3-8B-Llama3" #or change to local path
 processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
 model = Idefics3ForConditionalGeneration.from_pretrained(
-        base_model_path, torch_dtype=torch.bfloat16
-    ).to(device)
-model.load_adapter(model_id,device_map="auto")
 def infere(image):
     messages = [
         {
-        "role": "system",
-        "content": [
-            {"type": "text", "text": "You are a valuable medical doctor and you are looking at an image of your patient."},
-        ]
         },
-    {
-        "role": "user",
-        "content": [
-            {"type": "image"},
-            {"type": "text", "text": "What do we see in this image?"},
-        ]
-    },
     ]
     prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
     inputs = processor(text=prompt, images=[image], return_tensors="pt")
-    # print(f"inputs: {inputs}")
     inputs = {k: v.to(device) for k, v in inputs.items()}
     generated_ids = model.generate(**inputs, max_new_tokens=100)
     generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
     return generated_texts
-radiotest = gr.Interface(fn=infere, inputs="image", outputs="text")
 radiotest.launch(share=True)

+# Copyright 2024 Ronan Le Meillat
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Import necessary libraries
 import gradio as gr
 from transformers import AutoProcessor, Idefics3ForConditionalGeneration, image_utils
 import torch
+# Determine the device (GPU or CPU) to run the model on
 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+print(f"Using device: {device}")  # Log the device being used
+# Define the model ID and base model path
+model_id = "eltorio/IDEFICS3_ROCO"
+base_model_path = "HuggingFaceM4/Idefics3-8B-Llama3"  # or change to local path
+# Initialize the processor from the base model path
 processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
+# Initialize the model from the base model path and set the torch dtype to bfloat16
 model = Idefics3ForConditionalGeneration.from_pretrained(
+    base_model_path, torch_dtype=torch.bfloat16
+).to(device)  # Move the model to the specified device
+# Load the adapter from the model ID and automatically map it to the device
+model.load_adapter(model_id, device_map="auto")
+# Define a function to infer a description from an image
 def infere(image):
+    """
+    Generate a description of a medical image.
+    Args:
+    - image (PIL Image): The medical image to describe.
+    Returns:
+    - generated_texts (List[str]): A list containing the generated description.
+    """
+    # Define a chat template for the model to respond to
     messages = [
         {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a valuable medical doctor and you are looking at an image of your patient."},
+            ]
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": "What do we see in this image?"},
+            ]
         },
     ]
+    # Apply the chat template and add a generation prompt
     prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
+    # Preprocess the input image and text
     inputs = processor(text=prompt, images=[image], return_tensors="pt")
+    # Move the inputs to the specified device
     inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Generate a description with the model
     generated_ids = model.generate(**inputs, max_new_tokens=100)
+    # Decode the generated IDs into text
     generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
     return generated_texts
+# Define the title, description, and device description for the Gradio interface
+title = f"<a href='https://huggingface.co/eltorio/IDEFICS3_ROCO'>IDEFICS3_ROCO</a>: Medical Image to Text <b>running on {device}</b>"
+desc = "This model generates a description of a medical image."
+device_desc = f"This model is running on {device} 🚀." if device == torch.device('cuda') else f"🐢 This model is running on {device} it will be very (very) slow. If you can donate some GPU time it will be usable 🐢. <a href='https://huggingface.co/eltorio/IDEFICS3_ROCO/discussions'>Please contact us.</a>"
+# Define the long description for the Gradio interface
+long_desc = f"This model is based on the <a href='https://huggingface.co/eltorio/IDEFICS3_ROCO'>IDEFICS3_ROCO model</a>, which is a multimodal model that can generate text from images. It has been fine-tuned on <a href='https://huggingface.co/datasets/eltorio/ROCO-radiology'>eltorio/ROCO-radiology</a>&nbsp;a dataset of medical images and can generate descriptions of medical images. Try uploading an image of a medical image and see what the model generates!<br><b>{device_desc}</b><br> 2024 - Ronan Le Meillat"
+# Create a Gradio interface with the infere function and specified title and descriptions
+radiotest = gr.Interface(fn=infere, inputs="image", outputs="text", title=title,
+                description=desc, article=long_desc)
+# Launch the Gradio interface and share it
 radiotest.launch(share=True)