dafajudin commited on
Commit
0ad6e28
1 Parent(s): 9e8ecff

update code

Browse files
Files changed (2) hide show
  1. app.py +12 -11
  2. index.html +116 -117
app.py CHANGED
@@ -35,11 +35,11 @@ if USE_QLORA or USE_LORA:
35
  )
36
 
37
  # Model yang akan digunakan
38
- model = Idefics2ForConditionalGeneration.from_pretrained(
39
- "jihadzakki/idefics2-8b-vqarad-delta",
40
- torch_dtype=torch.float16,
41
- quantization_config=bnb_config
42
- )
43
 
44
  processor = AutoProcessor.from_pretrained(
45
  "HuggingFaceM4/idefics2-8b",
@@ -81,15 +81,16 @@ def format_answer(image, question, history):
81
  return f"Error: {str(e)}", history
82
 
83
  def clear_history():
84
- return None, "", []
85
 
86
  def undo_last(history):
87
  if history:
88
  history.pop()
89
  if history:
90
  last_image, last_entry = history[-1]
91
- return last_image, last_entry, history
92
- return None, "", history
 
93
 
94
  def retry_last(history):
95
  if history:
@@ -148,19 +149,19 @@ with gr.Blocks(
148
  retry_button.click(
149
  retry_last,
150
  inputs=[history_state],
151
- outputs=[answer_output, image_input, history_state]
152
  )
153
 
154
  undo_button.click(
155
  undo_last,
156
  inputs=[history_state],
157
- outputs=[image_input, answer_output, history_state]
158
  )
159
 
160
  clear_button.click(
161
  clear_history,
162
  inputs=[],
163
- outputs=[image_input, answer_output, history_state]
164
  )
165
 
166
  with gr.Row():
 
35
  )
36
 
37
  # Model yang akan digunakan
38
+ # model = Idefics2ForConditionalGeneration.from_pretrained(
39
+ # "jihadzakki/idefics2-8b-vqarad-delta",
40
+ # torch_dtype=torch.float16,
41
+ # quantization_config=bnb_config
42
+ # )
43
 
44
  processor = AutoProcessor.from_pretrained(
45
  "HuggingFaceM4/idefics2-8b",
 
81
  return f"Error: {str(e)}", history
82
 
83
  def clear_history():
84
+ return None, "", [], ""
85
 
86
  def undo_last(history):
87
  if history:
88
  history.pop()
89
  if history:
90
  last_image, last_entry = history[-1]
91
+ question = last_entry.split(" | ")[0].replace("Question: ", "")
92
+ return last_image, question, last_entry, history
93
+ return None, "", "", history
94
 
95
  def retry_last(history):
96
  if history:
 
149
  retry_button.click(
150
  retry_last,
151
  inputs=[history_state],
152
+ outputs=[answer_output, image_input, question_input, history_state]
153
  )
154
 
155
  undo_button.click(
156
  undo_last,
157
  inputs=[history_state],
158
+ outputs=[image_input, question_input, answer_output, history_state]
159
  )
160
 
161
  clear_button.click(
162
  clear_history,
163
  inputs=[],
164
+ outputs=[image_input, question_input, answer_output, history_state]
165
  )
166
 
167
  with gr.Row():
index.html CHANGED
@@ -1,133 +1,132 @@
1
  <!DOCTYPE html>
2
  <html>
3
- <head>
4
- <link rel="stylesheet" href="file/style.css" />
5
- <link rel="preconnect" href="https://fonts.googleapis.com" />
6
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
7
- <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap" rel="stylesheet" />
8
- <title>Visual Question Answering (VQA) for Medical Imaging</title>
9
- <style>
10
- * {
11
- box-sizing: border-box;
12
- }
13
 
14
- body {
15
- font-family: 'Source Sans Pro', sans-serif;
16
- font-size: 16px;
17
- }
18
 
19
- .container {
20
- width: 100%;
21
- margin: 0 auto;
22
- }
23
 
24
- .title {
25
- font-size: 24px !important;
26
- font-weight: 600 !important;
27
- letter-spacing: 0em;
28
- text-align: center;
29
- color: #374159 !important;
30
- }
31
 
32
- .subtitle {
33
- font-size: 24px !important;
34
- font-style: italic;
35
- font-weight: 400 !important;
36
- letter-spacing: 0em;
37
- text-align: center;
38
- color: #1d652a !important;
39
- padding-bottom: 0.5em;
40
- }
41
 
42
- .overview-heading {
43
- font-size: 24px !important;
44
- font-weight: 600 !important;
45
- letter-spacing: 0em;
46
- text-align: left;
47
- }
48
 
49
- .overview-content {
50
- font-size: 14px !important;
51
- font-weight: 400 !important;
52
- line-height: 33px !important;
53
- letter-spacing: 0em;
54
- text-align: left;
55
- }
56
 
57
- .content-image {
58
- width: 100% !important;
59
- height: auto !important;
60
- }
61
 
62
- .vl {
63
- border-left: 5px solid #1d652a;
64
- padding-left: 20px;
65
- color: #1d652a !important;
66
- }
67
 
68
- .grid-container {
69
- display: grid;
70
- grid-template-columns: 1fr 2fr;
71
- gap: 20px;
72
- align-items: flex-start;
73
  margin-bottom: 1em;
74
- }
75
 
76
- @media screen and (max-width: 768px) {
77
- .container {
78
- width: 90%;
79
- }
80
 
81
- .grid-container {
82
- display: block;
83
- }
84
 
85
- .overview-heading {
86
- font-size: 18px !important;
87
- }
88
  }
89
- </style>
90
- </head>
91
- <body>
92
- <div class="container">
93
- <h1 class="title">Visual Question Answering (VQA) for Medical Imaging</h1>
94
- <h2 class="subtitle">Kalbe Digital Lab</h2>
95
- <section class="overview">
96
- <div class="grid-container">
97
- <h3 class="overview-heading"><span class="vl">Overview</span></h3>
98
- <div>
99
- <p class="overview-content">
100
- The project addresses the challenge of accurate and efficient medical imaging analysis in healthcare, aiming to reduce human error and workload for radiologists.
101
- The proposed solution involves developing advanced AI models for Visual Question Answering (VQA) to assist healthcare professionals in analyzing medical images quickly and accurately.
102
- These models will be integrated into a user-friendly web application, providing a practical tool for real-world healthcare settings.
103
- </p>
104
- <p class="overview-content">References: <a href="https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252" target="_blank">https://www.sciencedirect.com/science/article/abs/pii/S0933365723001252</a></p>
105
- </div>
106
- </div>
107
- <div class="grid-container">
108
- <h3 class="overview-heading"><span class="vl">Dataset</span></h3>
109
- <div>
110
- <p class="overview-content">
111
- The model is trained with Colorectal Nuclear Segmentation and Phenotypes (CoNSeP) dataset
112
- <a href="https://huggingface.co/datasets/mdwiratathya/SLAKE-vqa-english" target="_blank">https://huggingface.co/datasets/mdwiratathya/SLAKE-vqa-english</a>. Images were extracted from 16 colorectal adenocarcinoma (CRA) WSIs.
113
- </p>
114
- <ul>
115
- <li>Target: Nuclei</li>
116
- <li>Task: Classification</li>
117
- <li>Modality: Images (Histology and Label) </li>
118
- </ul>
119
- </div>
120
- </div>
121
- <div class="grid-container">
122
- <h3 class="overview-heading"><span class="vl">Model Architecture</span></h3>
123
- <div>
124
- <p class="overview-content">The model is trained using DenseNet121 over CoNSep dataset.</p>
125
- <img class="content-image" src="img/Model-Architecture.png" alt="model-architecture" />
126
- </div>
127
- </div>
128
- </section>
129
- <h3 class="overview-heading"><span class="vl">Demo</span></h3>
130
- <p class="overview-content">Please select or upload a nuclei histology image and label image to see Nuclei Cells Classification capabilities of this model</p>
131
- </div>
132
- </body>
133
- </html>
 
1
  <!DOCTYPE html>
2
  <html>
3
+ <head>
4
+ <link rel="stylesheet" href="file/style.css" />
5
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
6
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
7
+ <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap" rel="stylesheet" />
8
+ <title>Visual Question Answering (VQA) for Medical Imaging</title>
9
+ <style>
10
+ * {
11
+ box-sizing: border-box;
12
+ }
13
 
14
+ body {
15
+ font-family: 'Source Sans Pro', sans-serif;
16
+ font-size: 16px;
17
+ }
18
 
19
+ .container {
20
+ width: 100%;
21
+ margin: 0 auto;
22
+ }
23
 
24
+ .title {
25
+ font-size: 24px !important;
26
+ font-weight: 600 !important;
27
+ letter-spacing: 0em;
28
+ text-align: center;
29
+ color: #374159 !important;
30
+ }
31
 
32
+ .subtitle {
33
+ font-size: 24px !important;
34
+ font-style: italic;
35
+ font-weight: 400 !important;
36
+ letter-spacing: 0em;
37
+ text-align: center;
38
+ color: #1d652a !important;
39
+ padding-bottom: 0.5em;
40
+ }
41
 
42
+ .overview-heading {
43
+ font-size: 24px !important;
44
+ font-weight: 600 !important;
45
+ letter-spacing: 0em;
46
+ text-align: left;
47
+ }
48
 
49
+ .overview-content {
50
+ font-size: 14px !important;
51
+ font-weight: 400 !important;
52
+ line-height: 33px !important;
53
+ letter-spacing: 0em;
54
+ text-align: left;
55
+ }
56
 
57
+ .content-image {
58
+ width: 100% !important;
59
+ height: auto !important;
60
+ }
61
 
62
+ .vl {
63
+ border-left: 5px solid #1d652a;
64
+ padding-left: 20px;
65
+ color: #1d652a !important;
66
+ }
67
 
68
+ .grid-container {
69
+ display: grid;
70
+ grid-template-columns: 1fr 2fr;
71
+ gap: 20px;
72
+ align-items: flex-start;
73
  margin-bottom: 1em;
74
+ }
75
 
76
+ @media screen and (max-width: 768px) {
77
+ .container {
78
+ width: 90%;
79
+ }
80
 
81
+ .grid-container {
82
+ display: block;
83
+ }
84
 
85
+ .overview-heading {
86
+ font-size: 18px !important;
 
87
  }
88
+ }
89
+ </style>
90
+ </head>
91
+ <body>
92
+ <div class="container">
93
+ <h1 class="title">Visual Question Answering (VQA) for Medical Imaging</h1>
94
+ <h2 class="subtitle">Kalbe Digital Lab</h2>
95
+ <section class="overview">
96
+ <div class="grid-container">
97
+ <h3 class="overview-heading"><span class="vl">Overview</span></h3>
98
+ <div>
99
+ <p class="overview-content">
100
+ This project addresses the challenge of accurate and efficient medical imaging analysis in healthcare,
101
+ aiming to reduce human error and workload for radiologists. The proposed solution involves developing advanced AI
102
+ models for Visual Question Answering (VQA) to assist healthcare professionals in analyzing
103
+ medical images (radiology images) quickly and accurately. We fine-tune HuggingFace multimodal model Idefics2-8b using radiology VQA datasets.
104
+ </p>
105
+ </div>
106
+ </div>
107
+ <div class="grid-container">
108
+ <h3 class="overview-heading"><span class="vl">Dataset</span></h3>
109
+ <div>
110
+ <p class="overview-content">
111
+ We fine-tune pre-trained model using these datasets :
112
+ </p>
113
+ <ul>
114
+ <li><a href="https://huggingface.co/datasets/flaviagiammarino/vqa-rad" target="_blank">VQA-RAD dataset</a></li>
115
+ <li><a href="https://huggingface.co/datasets/mdwiratathya/SLAKE-vqa-english" target="_blank">SLAKE dataset</a></li>
116
+ <li><a href="https://huggingface.co/datasets/mdwiratathya/ROCO-radiology" target="_blank">ROCO dataset</a></li>
117
+ </ul>
118
+ </div>
119
+ </div>
120
+ <div class="grid-container">
121
+ <h3 class="overview-heading"><span class="vl">Model Architecture</span></h3>
122
+ <div>
123
+ <p class="overview-content">The model is trained using Idefics2-8b.</p>
124
+ <img class="content-image" src="img/idefics2_architecture.png" alt="model-architecture" />
125
+ </div>
126
+ </div>
127
+ </section>
128
+ <h3 class="overview-heading"><span class="vl">Demo</span></h3>
129
+ <p class="overview-content">Please select or upload a image and text to see the prediction of this model</p>
130
+ </div>
131
+ </body>
132
+ </html>