SkalskiP commited on
Commit
5d15f06
1 Parent(s): 9c79daa

more captioning tasks

Browse files
Files changed (2) hide show
  1. app.py +12 -11
  2. utils/tasks.py +18 -4
app.py CHANGED
@@ -5,7 +5,8 @@ import spaces
5
 
6
  from utils.annotate import annotate_with_boxes
7
  from utils.models import load_models, run_inference, CHECKPOINTS
8
- from utils.tasks import TASK_NAMES, TASKS
 
9
 
10
  MARKDOWN = """
11
  # Better Florence-2 Playground 🔥
@@ -25,12 +26,12 @@ MARKDOWN = """
25
  </div>
26
  """
27
 
28
- OBJECT_DETECTION_EXAMPLES = [
29
- ["microsoft/Florence-2-large-ft", "Object Detection", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
30
- ]
31
- CAPTION_EXAMPLES = [
32
- ["microsoft/Florence-2-large-ft", "Caption", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
33
- ]
34
 
35
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
  MODELS, PROCESSORS = load_models(DEVICE)
@@ -41,13 +42,13 @@ def process(checkpoint_dropdown, task_dropdown, image_input):
41
  model = MODELS[checkpoint_dropdown]
42
  processor = PROCESSORS[checkpoint_dropdown]
43
  task = TASKS[task_dropdown]
44
- if task_dropdown == "Object Detection":
45
  _, response = run_inference(
46
  model, processor, DEVICE, image_input, task)
47
  detections = sv.Detections.from_lmm(
48
  lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size)
49
  return annotate_with_boxes(image_input, detections)
50
- elif task_dropdown == "Caption":
51
  _, response = run_inference(
52
  model, processor, DEVICE, image_input, task)
53
  return response[task]
@@ -73,7 +74,7 @@ with gr.Blocks() as demo:
73
  with gr.Column():
74
  @gr.render(inputs=task_dropdown_component)
75
  def show_output(text):
76
- if text == "Object Detection":
77
  image_output_component = gr.Image(type='pil', label='Image Output')
78
  submit_button_component.click(
79
  fn=process,
@@ -84,7 +85,7 @@ with gr.Blocks() as demo:
84
  ],
85
  outputs=image_output_component
86
  )
87
- elif text == "Caption":
88
  text_output_component = gr.Textbox(label='Caption Output')
89
  submit_button_component.click(
90
  fn=process,
 
5
 
6
  from utils.annotate import annotate_with_boxes
7
  from utils.models import load_models, run_inference, CHECKPOINTS
8
+ from utils.tasks import TASK_NAMES, TASKS, OBJECT_DETECTION_TASK_NAME, \
9
+ CAPTION_TASK_NAMES
10
 
11
  MARKDOWN = """
12
  # Better Florence-2 Playground 🔥
 
26
  </div>
27
  """
28
 
29
+ # OBJECT_DETECTION_EXAMPLES = [
30
+ # ["microsoft/Florence-2-large-ft", "Object Detection", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
31
+ # ]
32
+ # CAPTION_EXAMPLES = [
33
+ # ["microsoft/Florence-2-large-ft", "Caption", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
34
+ # ]
35
 
36
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
  MODELS, PROCESSORS = load_models(DEVICE)
 
42
  model = MODELS[checkpoint_dropdown]
43
  processor = PROCESSORS[checkpoint_dropdown]
44
  task = TASKS[task_dropdown]
45
+ if task_dropdown == OBJECT_DETECTION_TASK_NAME:
46
  _, response = run_inference(
47
  model, processor, DEVICE, image_input, task)
48
  detections = sv.Detections.from_lmm(
49
  lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size)
50
  return annotate_with_boxes(image_input, detections)
51
+ elif task_dropdown in CAPTION_TASK_NAMES:
52
  _, response = run_inference(
53
  model, processor, DEVICE, image_input, task)
54
  return response[task]
 
74
  with gr.Column():
75
  @gr.render(inputs=task_dropdown_component)
76
  def show_output(text):
77
+ if text == OBJECT_DETECTION_TASK_NAME:
78
  image_output_component = gr.Image(type='pil', label='Image Output')
79
  submit_button_component.click(
80
  fn=process,
 
85
  ],
86
  outputs=image_output_component
87
  )
88
+ elif text in CAPTION_TASK_NAMES:
89
  text_output_component = gr.Textbox(label='Caption Output')
90
  submit_button_component.click(
91
  fn=process,
utils/tasks.py CHANGED
@@ -1,8 +1,22 @@
 
 
 
 
 
1
  TASK_NAMES = [
2
- "Object Detection",
3
- "Caption"
 
 
4
  ]
5
  TASKS = {
6
- "Object Detection": "<OD>",
7
- "Caption": "<CAPTION>"
 
 
8
  }
 
 
 
 
 
 
1
+ OBJECT_DETECTION_TASK_NAME = "Object Detection"
2
+ CAPTION_TASK_NAME = "Caption"
3
+ DETAILED_CAPTION_TASK_NAME = "Detailed Caption"
4
+ MORE_DETAILED_CAPTION_TASK_NAME = "More Detailed Caption"
5
+
6
  TASK_NAMES = [
7
+ OBJECT_DETECTION_TASK_NAME,
8
+ CAPTION_TASK_NAME,
9
+ DETAILED_CAPTION_TASK_NAME,
10
+ MORE_DETAILED_CAPTION_TASK_NAME
11
  ]
12
  TASKS = {
13
+ OBJECT_DETECTION_TASK_NAME: "<OD>",
14
+ CAPTION_TASK_NAME: "<CAPTION>",
15
+ DETAILED_CAPTION_TASK_NAME: "<DETAILED_CAPTION>",
16
+ MORE_DETAILED_CAPTION_TASK_NAME: "<MORE_DETAILED_CAPTION>"
17
  }
18
+ CAPTION_TASK_NAMES = [
19
+ CAPTION_TASK_NAME,
20
+ DETAILED_CAPTION_TASK_NAME,
21
+ MORE_DETAILED_CAPTION_TASK_NAME
22
+ ]