Spaces:

KalbeDigitalLab
/

IDEFICS2-8B-MedicalVQA

Runtime error

App Files Files Community

dafajudin commited on Jun 14

Commit

0ad6e28

•

1 Parent(s): 9e8ecff

update code

Browse files

Files changed (2) hide show

app.py +12 -11
index.html +116 -117

app.py CHANGED Viewed

@@ -35,11 +35,11 @@ if USE_QLORA or USE_LORA:
         )
     # Model yang akan digunakan
-    model = Idefics2ForConditionalGeneration.from_pretrained(
-        "jihadzakki/idefics2-8b-vqarad-delta",
-        torch_dtype=torch.float16,
-        quantization_config=bnb_config
-    )
 processor = AutoProcessor.from_pretrained(
     "HuggingFaceM4/idefics2-8b",
@@ -81,15 +81,16 @@ def format_answer(image, question, history):
         return f"Error: {str(e)}", history
 def clear_history():
-    return None, "", []
 def undo_last(history):
     if history:
         history.pop()
     if history:
         last_image, last_entry = history[-1]
-        return last_image, last_entry, history
-    return None, "", history
 def retry_last(history):
     if history:
@@ -148,19 +149,19 @@ with gr.Blocks(
         retry_button.click(
             retry_last,
             inputs=[history_state],
-            outputs=[answer_output, image_input, history_state]
         )
         undo_button.click(
             undo_last,
             inputs=[history_state],
-            outputs=[image_input, answer_output, history_state]
         )
         clear_button.click(
             clear_history,
             inputs=[],
-            outputs=[image_input, answer_output, history_state]
         )
     with gr.Row():

         )
     # Model yang akan digunakan
+    # model = Idefics2ForConditionalGeneration.from_pretrained(
+    #     "jihadzakki/idefics2-8b-vqarad-delta",
+    #     torch_dtype=torch.float16,
+    #     quantization_config=bnb_config
+    # )
 processor = AutoProcessor.from_pretrained(
     "HuggingFaceM4/idefics2-8b",
         return f"Error: {str(e)}", history
 def clear_history():
+    return None, "", [], ""
 def undo_last(history):
     if history:
         history.pop()
     if history:
         last_image, last_entry = history[-1]
+        question = last_entry.split(" | ")[0].replace("Question: ", "")
+        return last_image, question, last_entry, history
+    return None, "", "", history
 def retry_last(history):
     if history:
         retry_button.click(
             retry_last,
             inputs=[history_state],
+            outputs=[answer_output, image_input, question_input, history_state]
         )
         undo_button.click(
             undo_last,
             inputs=[history_state],
+            outputs=[image_input, question_input, answer_output, history_state]
         )
         clear_button.click(
             clear_history,
             inputs=[],
+            outputs=[image_input, question_input, answer_output, history_state]
         )
     with gr.Row():

index.html CHANGED Viewed

@@ -1,133 +1,132 @@
 <!DOCTYPE html>
 <html>
-	<head>
-		<link rel="stylesheet" href="file/style.css" />
-		<link rel="preconnect" href="https://fonts.googleapis.com" />
-		<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
-		<link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap" rel="stylesheet" />
-		<title>Visual Question Answering (VQA) for Medical Imaging</title>
-        <style>
-            * {
-                box-sizing: border-box;
-            }
-            body {
-                font-family: 'Source Sans Pro', sans-serif;
-                font-size: 16px;
-            }
-            .container {
-                width: 100%;
-                margin: 0 auto;
-            }
-            .title {
-                font-size: 24px !important;
-                font-weight: 600 !important;
-                letter-spacing: 0em;
-                text-align: center;
-                color: #374159 !important;
-            }
-            .subtitle {
-                font-size: 24px !important;
-                font-style: italic;
-                font-weight: 400 !important;
-                letter-spacing: 0em;
-                text-align: center;
-                color: #1d652a !important;
-                padding-bottom: 0.5em;
-            }
-            .overview-heading {
-                font-size: 24px !important;
-                font-weight: 600 !important;
-                letter-spacing: 0em;
-                text-align: left;
-            }
-            .overview-content {
-                font-size: 14px !important;
-                font-weight: 400 !important;
-                line-height: 33px !important;
-                letter-spacing: 0em;
-                text-align: left;
-            }
-            .content-image {
-                width: 100% !important;
-                height: auto !important;
-            }
-            .vl {
-                border-left: 5px solid #1d652a;
-                padding-left: 20px;
-                color: #1d652a !important;
-            }
-            .grid-container {
-                display: grid;
-                grid-template-columns: 1fr 2fr;
-                gap: 20px;
-                align-items: flex-start;
             margin-bottom: 1em;
-            }
-            @media screen and (max-width: 768px) {
-                .container {
-                    width: 90%;
-                }
-                .grid-container {
-                    display: block;
-                }
-                .overview-heading {
-                    font-size: 18px !important;
-                }
             }
-        </style>
-	</head>
-	<body>
-		<div class="container">
-			<h1 class="title">Visual Question Answering (VQA) for Medical Imaging</h1>
-			<h2 class="subtitle">Kalbe Digital Lab</h2>
-			<section class="overview">
-				<div class="grid-container">
-					<h3 class="overview-heading"><span class="vl">Overview</span></h3>
-					<div>
-						<p class="overview-content">
-                            The project addresses the challenge of accurate and efficient medical imaging analysis in healthcare, aiming to reduce human error and workload for radiologists.
-                            The proposed solution involves developing advanced AI models for Visual Question Answering (VQA) to assist healthcare professionals in analyzing medical images quickly and accurately.
-                            These models will be integrated into a user-friendly web application, providing a practical tool for real-world healthcare settings.
-                        </p>
-						<p class="overview-content">References: <a href="https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252" target="_blank">https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252</a></p>
-					</div>
-				</div>
-				<div class="grid-container">
-					<h3 class="overview-heading"><span class="vl">Dataset</span></h3>
-					<div>
-						<p class="overview-content">
-							The model is trained with Colorectal Nuclear Segmentation and Phenotypes (CoNSeP) dataset
-							<a href="https://huggingface.co/datasets/mdwiratathya/SLAKE-vqa-english" target="_blank">https://huggingface.co/datasets/mdwiratathya/SLAKE-vqa-english</a>. Images were extracted from 16 colorectal adenocarcinoma (CRA) WSIs.
-						</p>
-						<ul>
-							<li>Target: Nuclei</li>
-							<li>Task: Classification</li>
-							<li>Modality: Images (Histology and Label) </li>
-						</ul>
-					</div>
-				</div>
-				<div class="grid-container">
-					<h3 class="overview-heading"><span class="vl">Model Architecture</span></h3>
-					<div>
-						<p class="overview-content">The model is trained using DenseNet121 over CoNSep dataset.</p>
-						<img class="content-image" src="img/Model-Architecture.png" alt="model-architecture" />
-					</div>
-				</div>
-			</section>
-			<h3 class="overview-heading"><span class="vl">Demo</span></h3>
-			<p class="overview-content">Please select or upload a nuclei histology image and label image to see Nuclei Cells Classification capabilities of this model</p>
-		</div>
-	</body>
-</html>

 <!DOCTYPE html>
 <html>
+<head>
+    <link rel="stylesheet" href="file/style.css" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap" rel="stylesheet" />
+    <title>Visual Question Answering (VQA) for Medical Imaging</title>
+    <style>
+        * {
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Source Sans Pro', sans-serif;
+            font-size: 16px;
+        }
+        .container {
+            width: 100%;
+            margin: 0 auto;
+        }
+        .title {
+            font-size: 24px !important;
+            font-weight: 600 !important;
+            letter-spacing: 0em;
+            text-align: center;
+            color: #374159 !important;
+        }
+        .subtitle {
+            font-size: 24px !important;
+            font-style: italic;
+            font-weight: 400 !important;
+            letter-spacing: 0em;
+            text-align: center;
+            color: #1d652a !important;
+            padding-bottom: 0.5em;
+        }
+        .overview-heading {
+            font-size: 24px !important;
+            font-weight: 600 !important;
+            letter-spacing: 0em;
+            text-align: left;
+        }
+        .overview-content {
+            font-size: 14px !important;
+            font-weight: 400 !important;
+            line-height: 33px !important;
+            letter-spacing: 0em;
+            text-align: left;
+        }
+        .content-image {
+            width: 100% !important;
+            height: auto !important;
+        }
+        .vl {
+            border-left: 5px solid #1d652a;
+            padding-left: 20px;
+            color: #1d652a !important;
+        }
+        .grid-container {
+            display: grid;
+            grid-template-columns: 1fr 2fr;
+            gap: 20px;
+            align-items: flex-start;
             margin-bottom: 1em;
+        }
+        @media screen and (max-width: 768px) {
+            .container {
+                width: 90%;
+            }
+            .grid-container {
+                display: block;
+            }
+            .overview-heading {
+                font-size: 18px !important;
             }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1 class="title">Visual Question Answering (VQA) for Medical Imaging</h1>
+        <h2 class="subtitle">Kalbe Digital Lab</h2>
+        <section class="overview">
+            <div class="grid-container">
+                <h3 class="overview-heading"><span class="vl">Overview</span></h3>
+                <div>
+                    <p class="overview-content">
+                        This project addresses the challenge of accurate and efficient medical imaging analysis in healthcare,
+                        aiming to reduce human error and workload for radiologists. The proposed solution involves developing advanced AI
+                        models for Visual Question Answering (VQA) to assist healthcare professionals in analyzing
+                        medical images (radiology images) quickly and accurately. We fine-tune HuggingFace multimodal model Idefics2-8b using radiology VQA datasets.
+                    </p>
+                </div>
+            </div>
+            <div class="grid-container">
+                <h3 class="overview-heading"><span class="vl">Dataset</span></h3>
+                <div>
+                    <p class="overview-content">
+                        We fine-tune pre-trained model using these datasets :
+                    </p>
+                    <ul>
+                        <li><a href="https://huggingface.co/datasets/flaviagiammarino/vqa-rad" target="_blank">VQA-RAD dataset</a></li>
+                        <li><a href="https://huggingface.co/datasets/mdwiratathya/SLAKE-vqa-english" target="_blank">SLAKE dataset</a></li>
+                        <li><a href="https://huggingface.co/datasets/mdwiratathya/ROCO-radiology" target="_blank">ROCO dataset</a></li>
+                    </ul>
+                </div>
+            </div>
+            <div class="grid-container">
+                <h3 class="overview-heading"><span class="vl">Model Architecture</span></h3>
+                <div>
+                    <p class="overview-content">The model is trained using Idefics2-8b.</p>
+                    <img class="content-image" src="img/idefics2_architecture.png" alt="model-architecture" />
+                </div>
+            </div>
+        </section>
+        <h3 class="overview-heading"><span class="vl">Demo</span></h3>
+        <p class="overview-content">Please select or upload a image and text to see the prediction of this model</p>
+    </div>
+</body>
+</html>