Doron Adler commited on
Commit
d1a3ab8
1 Parent(s): 9bb320a

Added gokaygokay's "Cascaded task"

Browse files
Files changed (1) hide show
  1. app.py +66 -18
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
@@ -13,7 +16,7 @@ import matplotlib.patches as patches
13
  import random
14
  import numpy as np
15
 
16
- import os
17
  from unittest.mock import patch
18
  from transformers import AutoModelForCausalLM, AutoProcessor
19
  from transformers.dynamic_module_utils import get_imports
@@ -145,16 +148,43 @@ def process_image(image, task_prompt, text_input=None):
145
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
146
  if task_prompt == 'Caption':
147
  task_prompt = '<CAPTION>'
148
- result = run_example(task_prompt, image)
149
- return result, None
150
  elif task_prompt == 'Detailed Caption':
151
  task_prompt = '<DETAILED_CAPTION>'
152
- result = run_example(task_prompt, image)
153
- return result, None
154
  elif task_prompt == 'More Detailed Caption':
155
  task_prompt = '<MORE_DETAILED_CAPTION>'
156
- result = run_example(task_prompt, image)
157
- return result, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  elif task_prompt == 'Object Detection':
159
  task_prompt = '<OD>'
160
  results = run_example(task_prompt, image)
@@ -203,8 +233,8 @@ def process_image(image, task_prompt, text_input=None):
203
  return results, None
204
  elif task_prompt == 'OCR':
205
  task_prompt = '<OCR>'
206
- result = run_example(task_prompt, image)
207
- return result, None
208
  elif task_prompt == 'OCR with Region':
209
  task_prompt = '<OCR_WITH_REGION>'
210
  results = run_example(task_prompt, image)
@@ -222,19 +252,37 @@ css = """
222
  }
223
  """
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  with gr.Blocks(css=css) as demo:
226
  gr.Markdown(DESCRIPTION)
227
  with gr.Tab(label="Florence-2 Image Captioning"):
228
  with gr.Row():
229
  with gr.Column():
230
- input_img = gr.Image(label="Input Picture")
231
- task_prompt = gr.Dropdown(choices=[
232
- 'Caption', 'Detailed Caption', 'More Detailed Caption', 'Object Detection',
233
- 'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding',
234
- 'Referring Expression Segmentation', 'Region to Segmentation',
235
- 'Open Vocabulary Detection', 'Region to Category', 'Region to Description',
236
- 'OCR', 'OCR with Region'
237
- ], label="Task Prompt", value= 'Caption')
238
  text_input = gr.Textbox(label="Text Input (optional)")
239
  submit_btn = gr.Button(value="Submit")
240
  with gr.Column():
@@ -255,4 +303,4 @@ with gr.Blocks(css=css) as demo:
255
 
256
  submit_btn.click(process_image, [input_img, task_prompt, text_input], [output_text, output_img])
257
 
258
- demo.launch(debug=True)
 
1
+ import os
2
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
3
+
4
  import gradio as gr
5
  from transformers import AutoProcessor, AutoModelForCausalLM
6
  import spaces
 
16
  import random
17
  import numpy as np
18
 
19
+
20
  from unittest.mock import patch
21
  from transformers import AutoModelForCausalLM, AutoProcessor
22
  from transformers.dynamic_module_utils import get_imports
 
148
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
149
  if task_prompt == 'Caption':
150
  task_prompt = '<CAPTION>'
151
+ results = run_example(task_prompt, image)
152
+ return results, None
153
  elif task_prompt == 'Detailed Caption':
154
  task_prompt = '<DETAILED_CAPTION>'
155
+ results = run_example(task_prompt, image)
156
+ return results, None
157
  elif task_prompt == 'More Detailed Caption':
158
  task_prompt = '<MORE_DETAILED_CAPTION>'
159
+ results = run_example(task_prompt, image)
160
+ return results, None
161
+ elif task_prompt == 'Caption + Grounding':
162
+ task_prompt = '<CAPTION>'
163
+ results = run_example(task_prompt, image)
164
+ text_input = results[task_prompt]
165
+ task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
166
+ results = run_example(task_prompt, image, text_input)
167
+ results['<CAPTION>'] = text_input
168
+ fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
169
+ return results, fig_to_pil(fig)
170
+ elif task_prompt == 'Detailed Caption + Grounding':
171
+ task_prompt = '<DETAILED_CAPTION>'
172
+ results = run_example(task_prompt, image)
173
+ text_input = results[task_prompt]
174
+ task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
175
+ results = run_example(task_prompt, image, text_input)
176
+ results['<DETAILED_CAPTION>'] = text_input
177
+ fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
178
+ return results, fig_to_pil(fig)
179
+ elif task_prompt == 'More Detailed Caption + Grounding':
180
+ task_prompt = '<MORE_DETAILED_CAPTION>'
181
+ results = run_example(task_prompt, image)
182
+ text_input = results[task_prompt]
183
+ task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
184
+ results = run_example(task_prompt, image, text_input)
185
+ results['<MORE_DETAILED_CAPTION>'] = text_input
186
+ fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
187
+ return results, fig_to_pil(fig)
188
  elif task_prompt == 'Object Detection':
189
  task_prompt = '<OD>'
190
  results = run_example(task_prompt, image)
 
233
  return results, None
234
  elif task_prompt == 'OCR':
235
  task_prompt = '<OCR>'
236
+ results = run_example(task_prompt, image)
237
+ return results, None
238
  elif task_prompt == 'OCR with Region':
239
  task_prompt = '<OCR_WITH_REGION>'
240
  results = run_example(task_prompt, image)
 
252
  }
253
  """
254
 
255
+
256
+ single_task_list =[
257
+ 'Caption', 'Detailed Caption', 'More Detailed Caption', 'Object Detection',
258
+ 'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding',
259
+ 'Referring Expression Segmentation', 'Region to Segmentation',
260
+ 'Open Vocabulary Detection', 'Region to Category', 'Region to Description',
261
+ 'OCR', 'OCR with Region'
262
+ ]
263
+
264
+ cascaded_task_list =[
265
+ 'Caption + Grounding', 'Detailed Caption + Grounding', 'More Detailed Caption + Grounding'
266
+ ]
267
+
268
+
269
+ def update_task_dropdown(choice):
270
+ if choice == 'Cascaded task':
271
+ return gr.Dropdown(choices=cascaded_task_list, value='Caption + Grounding')
272
+ else:
273
+ return gr.Dropdown(choices=single_task_list, value='Caption')
274
+
275
+
276
+
277
  with gr.Blocks(css=css) as demo:
278
  gr.Markdown(DESCRIPTION)
279
  with gr.Tab(label="Florence-2 Image Captioning"):
280
  with gr.Row():
281
  with gr.Column():
282
+ input_img = gr.Image(label="Input Picture")
283
+ task_type = gr.Radio(choices=['Single task', 'Cascaded task'], label='Task type selector', value='Single task')
284
+ task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
285
+ task_type.change(fn=update_task_dropdown, inputs=task_type, outputs=task_prompt)
 
 
 
 
286
  text_input = gr.Textbox(label="Text Input (optional)")
287
  submit_btn = gr.Button(value="Submit")
288
  with gr.Column():
 
303
 
304
  submit_btn.click(process_image, [input_img, task_prompt, text_input], [output_text, output_img])
305
 
306
+ demo.launch(debug=True)