IDEFICS2-8B-MedicalVQA

Build error

App Files Files Community

Kalbe-x-Bangkit commited on 17 days ago

Commit

9bc7520

•

1 Parent(s): c2c9e04

Update app.py

Browse files

change model to IDEFICS2 MedVQA

Files changed (1) hide show

app.py +111 -13

app.py CHANGED Viewed

@@ -1,5 +1,13 @@
 import gradio as gr
-from transformers import pipeline
 # Project description
 description = """
@@ -14,9 +22,8 @@ The model is trained using the [Hugging face](https://huggingface.co/datasets/fl
 Reference: [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252)
 ## Model Architecture
-The model uses a Parameterized Hypercomplex Shared Encoder network (PHYSEnet).
-![Model Architecture](path/to/your/image.png)
 Reference: [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252)
@@ -24,19 +31,87 @@ Reference: [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii
 Please select the example below or upload 4 pairs of mammography exam results.
 """
-# Load the Visual QA model
-generator = pipeline("visual-question-answering", model="jihadzakki/blip1-medvqa")
 def format_answer(image, question, history):
     try:
-        result = generator(image, question, max_new_tokens=50)
-        predicted_answer = result[0].get('answer', 'No answer found')
-        history.append((image, f"Question: {question} | Answer: {predicted_answer}"))
-        return f"Predicted Answer: {predicted_answer}", history
     except Exception as e:
         return f"Error: {str(e)}", history
 def switch_theme(mode):
     if mode == "Light Mode":
         return gr.themes.Default()
@@ -60,9 +135,9 @@ with gr.Blocks(
         secondary_hue=gr.themes.colors.red,
     )
 ) as VisualQAApp:
-    gr.Markdown(description, elem_classes="description")
-    gr.Markdown("# Visual Question Answering using BLIP Model", elem_classes="title")
     with gr.Row():
         with gr.Column():
@@ -82,6 +157,29 @@ with gr.Blocks(
         show_progress=True
     )
     with gr.Row():
         history_gallery = gr.Gallery(label="History Log", elem_id="history_log")
         submit_button.click(
@@ -117,4 +215,4 @@ with gr.Blocks(
             outputs=[feedback_input]
         )
-VisualQAApp.launch(share=True)

+import os
+import subprocess
+from PIL import Image
+import io
 import gradio as gr
+from transformers import AutoProcessor, TextIteratorStreamer
+from transformers import Idefics2ForConditionalGeneration
+import torch
+from peft import LoraConfig
+from transformers import AutoProcessor, BitsAndBytesConfig, IdeficsForVisionText2Text
 # Project description
 description = """
 Reference: [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252)
 ## Model Architecture
+![Model Architecture](img/Model-Architecture.png)
 Reference: [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252)
 Please select the example below or upload 4 pairs of mammography exam results.
 """
+DEVICE = torch.device("cuda")
+USE_LORA = False
+USE_QLORA = True
+if USE_QLORA or USE_LORA:
+    lora_config = LoraConfig(
+        r=8,
+        lora_alpha=8,
+        lora_dropout=0.1,
+        target_modules='.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$',
+        use_dora=False if USE_QLORA else True,
+        init_lora_weights="gaussian"
+    )
+    if USE_QLORA:
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16
+        )
+    model = Idefics2ForConditionalGeneration.from_pretrained(
+        # "jihadzakki/idefics2-8b-vqarad-delta",
+        torch_dtype=torch.float16,
+        quantization_config=bnb_config
+    )
+processor = AutoProcessor.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+)
 def format_answer(image, question, history):
     try:
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": question}
+                ]
+            }
+        ]
+        text = processor.apply_chat_template(messages, add_generation_prompt=True)
+        inputs = processor(text=[text.strip()], images=[image], return_tensors="pt", padding=True)
+        inputs = {key: value.to(DEVICE) for key, value in inputs.items()}
+        generated_ids = model.generate(**inputs, max_new_tokens=64)
+        generated_texts = processor.batch_decode(generated_ids[:, inputs["input_ids"].size(1):], skip_special_tokens=True)[0]
+        history.append((image, f"Question: {question} | Answer: {generated_texts}"))
+        # Store the predicted answer in a variable before deleting intermediate variables
+        predicted_answer = f"Predicted Answer: {generated_texts}"
+        # Clear the cache and delete unnecessary variables
+        del inputs
+        del generated_ids
+        del generated_texts
+        torch.cuda.empty_cache()
+        return predicted_answer, history
     except Exception as e:
+        # Clear the cache in case of an error
+        torch.cuda.empty_cache()
         return f"Error: {str(e)}", history
+def clear_history():
+    return "", []
+def undo_last(history):
+    if history:
+        history.pop()
+    return "", history
+def retry_last(image, question, history):
+    if history:
+        last_image, last_entry = history[-1]
+        return format_answer(last_image, question, history[:-1])
+    return "No previous analysis to retry.", history
 def switch_theme(mode):
     if mode == "Light Mode":
         return gr.themes.Default()
         secondary_hue=gr.themes.colors.red,
     )
 ) as VisualQAApp:
+    gr.Markdown(description, elem_classes="title")  # Display the project description
+    gr.Markdown("## Demo")
     with gr.Row():
         with gr.Column():
         show_progress=True
     )
+    with gr.Row():
+        retry_button = gr.Button("Retry")
+        undo_button = gr.Button("Undo")
+        clear_button = gr.Button("Clear")
+        retry_button.click(
+            retry_last,
+            inputs=[image_input, question_input, history_state],
+            outputs=[answer_output, history_state]
+        )
+        undo_button.click(
+            undo_last,
+            inputs=[history_state],
+            outputs=[answer_output, history_state]
+        )
+        clear_button.click(
+            clear_history,
+            inputs=[],
+            outputs=[answer_output, history_state]
+        )
     with gr.Row():
         history_gallery = gr.Gallery(label="History Log", elem_id="history_log")
         submit_button.click(
             outputs=[feedback_input]
         )
+VisualQAApp.launch(share=True, debug=True)