jw2yang commited on
Commit
d1def87
1 Parent(s): d575688

change the task order

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -78,7 +78,7 @@ def inference(image, task, *args, **kwargs):
78
  return referring_captioning([model_last, model_cap], image, *args, **kwargs)
79
  elif task == 'Text Retrieval':
80
  return text_retrieval(model_cap, image, *args, **kwargs)
81
- elif task == 'Image/Region Retrieval (Only Support Exampled 80 images)':
82
  return region_retrieval([model_cap, model_last], image, *args, **kwargs)
83
 
84
  '''
@@ -88,7 +88,7 @@ title = "X-Decoder All-in-One Demo"
88
  description = "<p style='text-align: center'> <a href='' target='_blank'>Project Page</a> | <a href='' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='' target='_blank'>Video</a> </p>"
89
  article = "The Demo is Run on X-Decoder (Focal-T)."
90
 
91
- inputs = [gr.inputs.Image(type='pil'), gr.inputs.Radio(choices=["Referring Segmentation", 'Open Vocabulary Semantic Segmentation','Open Vocabulary Instance Segmentation', "Open Vocabulary Panoptic Segmentation", "Image Captioning", "Text Retrieval", "Referring Editing", "Referring Captioning (Beta)", "Image/Region Retrieval (Only Support Exampled 80 images)"], type="value", default="OpenVocab Semantic Segmentation", label="Task"), gr.Textbox(label="xdecoder_text"), gr.Textbox(label="inpainting_text"), gr.Textbox(label="task_description")]
92
  gr.Interface(
93
  fn=inference,
94
  inputs=inputs,
@@ -99,7 +99,7 @@ gr.Interface(
99
  gr.Textbox(label="text restuls"),
100
  gr.outputs.Image(
101
  type="pil",
102
- label="inpainting results"),
103
  ],
104
  examples=[
105
  ["./images/fruit.jpg", "Referring Segmentation", "The larger watermelon.,The front white flower.,White tea pot.,Flower bunch.,white vase.,The peach on the left.,The brown knife.", '', 'Format: s,s,s'],
@@ -108,7 +108,7 @@ gr.Interface(
108
  ["./images/owls.jpeg", "Open Vocabulary Instance Segmentation", "owl", '', 'Format: y,y,y'],
109
  ["./images/mountain.jpeg", "Image Captioning", "", '', ''],
110
  ["./images/rose.webp", "Text Retrieval", "lily,rose,peoney,tulip", '', 'Format: s,s,s'],
111
- ["./images/region_retrieval.png", "Image/Region Retrieval (Only Support Exampled 80 images)", "The tangerine on the plate.", '', 'Please describe the object in a detailed way.'],
112
  ["./images/landscape.jpg", "Referring Captioning (Beta)", "cloud", '', 'Please fill in a noun/noun phrase. (may start with a/the)'],
113
  ["./images/apples.jpg", "Referring Editing", "a green apple", 'a pear', 'x-decoder + ldm (inference takes ~20s), use inpainting_text "clean and empty scene" for image inpainting'],
114
  ],
 
78
  return referring_captioning([model_last, model_cap], image, *args, **kwargs)
79
  elif task == 'Text Retrieval':
80
  return text_retrieval(model_cap, image, *args, **kwargs)
81
+ elif task == 'Image/Region Retrieval':
82
  return region_retrieval([model_cap, model_last], image, *args, **kwargs)
83
 
84
  '''
 
88
  description = "<p style='text-align: center'> <a href='' target='_blank'>Project Page</a> | <a href='' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='' target='_blank'>Video</a> </p>"
89
  article = "The Demo is Run on X-Decoder (Focal-T)."
90
 
91
+ inputs = [gr.inputs.Image(type='pil'), gr.inputs.Radio(choices=["Referring Segmentation", 'Open Vocabulary Semantic Segmentation','Open Vocabulary Instance Segmentation', "Open Vocabulary Panoptic Segmentation", "Image Captioning", "Text Retrieval", "Image/Region Retrieval", "Referring Captioning (Beta)", "Referring Editing"], type="value", default="OpenVocab Semantic Segmentation", label="Task"), gr.Textbox(label="xdecoder_text"), gr.Textbox(label="inpainting_text"), gr.Textbox(label="task_description")]
92
  gr.Interface(
93
  fn=inference,
94
  inputs=inputs,
 
99
  gr.Textbox(label="text restuls"),
100
  gr.outputs.Image(
101
  type="pil",
102
+ label="editing results"),
103
  ],
104
  examples=[
105
  ["./images/fruit.jpg", "Referring Segmentation", "The larger watermelon.,The front white flower.,White tea pot.,Flower bunch.,white vase.,The peach on the left.,The brown knife.", '', 'Format: s,s,s'],
 
108
  ["./images/owls.jpeg", "Open Vocabulary Instance Segmentation", "owl", '', 'Format: y,y,y'],
109
  ["./images/mountain.jpeg", "Image Captioning", "", '', ''],
110
  ["./images/rose.webp", "Text Retrieval", "lily,rose,peoney,tulip", '', 'Format: s,s,s'],
111
+ ["./images/region_retrieval.png", "Image/Region Retrieval", "The tangerine on the plate.", '', 'Please describe the object in a detailed way (80 images in the pool).'],
112
  ["./images/landscape.jpg", "Referring Captioning (Beta)", "cloud", '', 'Please fill in a noun/noun phrase. (may start with a/the)'],
113
  ["./images/apples.jpg", "Referring Editing", "a green apple", 'a pear', 'x-decoder + ldm (inference takes ~20s), use inpainting_text "clean and empty scene" for image inpainting'],
114
  ],