Spaces:

shikunl
/

prismer

Running on A10G

App Files Files Community

shikunl commited on Mar 11, 2023

Commit

64fb58a

1 Parent(s): c83f375

Update structure

Browse files

Files changed (4) hide show

app.py +31 -4
examples/1.jpeg +0 -0
gradio_caption.py +32 -0
gradio_vqa.py +33 -0

app.py CHANGED Viewed

@@ -1,7 +1,34 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+import torch
+from gradio_caption import create_demo as create_caption
+from gradio_vqa import create_demo as create_vqa
+css = """
+#img-display-input {
+    height: auto;
+    max-height: 40vh;
+    }
+#img-display-output {
+    max-height: 40vh;
+    }
+"""
+description = """
+# Prismer
+The official demo for **Prismer: A Vision-Language Model with An Ensemble of Experts**.
+Please refer to our [project page](https://shikun.io/projects/prismer) or [github](https://github.com/NVlabs/prismer) for more details.
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(description)
+    with gr.Tab("Zero-shot Image Captioning"):
+        create_caption()
+    with gr.Tab("Visual Question Answering"):
+        create_vqa()
+if __name__ == '__main__':
+    demo.queue().launch()

examples/1.jpeg ADDED Viewed

gradio_caption.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gradio as gr
+from PIL import Image
+import tempfile
+def predict_depth(model, image):
+    depth = model.infer_pil(image)
+    return depth
+def create_demo():
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_type = gr.Dropdown(["Prismer-Base", "Prismer-Large"], label="Model Size", value="Prismer-Base")
+            rgb = gr.Image(label="Input Image", type='pil', elem_id='img-display-input')
+            submit = gr.Button("Submit")
+        with gr.Column(scale=2):
+            pred = gr.Textbox(label="Model Prediction")
+            with gr.Row():
+                depth = gr.Image(label="Depth", elem_id='img-display-output')
+                edge = gr.Image(label="Edge", elem_id='img-display-output')
+                normals = gr.Image(label="Normals", elem_id='img-display-output')
+            with gr.Row():
+                seg = gr.Image(label="Segmentation", elem_id='img-display-output')
+                obj_det = gr.Image(label="Object Detection", elem_id='img-display-output')
+                ocr_det = gr.Image(label="OCR Detection", elem_id='img-display-output')
+    def on_submit(im, model_type):
+        return pred, depth, edge, normals, seg, obj_det, ocr_det
+    submit.click(on_submit, inputs=[rgb, model_type], outputs=[pred, depth, edge, normals, seg, obj_det, ocr_det])
+    examples = gr.Examples(examples=["examples/1.jpeg"], inputs=[rgb])

gradio_vqa.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import gradio as gr
+from PIL import Image
+import tempfile
+def predict_depth(model, image):
+    depth = model.infer_pil(image)
+    return depth
+def create_demo():
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_type = gr.Dropdown(["Prismer-Base", "Prismer-Large"], label="Model Size", value="Prismer-Base")
+            ques = gr.Textbox(label="Question", placeholder="What's the number of this player?")
+            rgb = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto")
+            submit = gr.Button("Submit")
+        with gr.Column(scale=2):
+            pred = gr.Textbox(label="Model Prediction")
+            with gr.Row():
+                depth = gr.Image(label="Depth", elem_id='img-display-output')
+                edge = gr.Image(label="Edge", elem_id='img-display-output')
+                normals = gr.Image(label="Normals", elem_id='img-display-output')
+            with gr.Row():
+                seg = gr.Image(label="Segmentation", elem_id='img-display-output')
+                obj_det = gr.Image(label="Object Detection", elem_id='img-display-output')
+                ocr_det = gr.Image(label="OCR Detection", elem_id='img-display-output')
+    def on_submit(im, q, model_type):
+        return pred, depth, edge, normals, seg, obj_det, ocr_det
+    submit.click(on_submit, inputs=[rgb, ques, model_type], outputs=[pred, depth, edge, normals, seg, obj_det, ocr_det])
+    examples = gr.Examples(examples=["examples/1.jpeg"], inputs=[rgb])