Spaces:

taesiri
/

BLIP-2

App Files Files Community

taesiri commited on Feb 3, 2023

Commit

02a6b14

•

1 Parent(s): 07c39b0

update

Files changed (4) hide show

5kstbz-0001.png +0 -0
Blue_Jay_0044_62759.jpg +0 -0
ILSVRC2012_val_00000008.JPEG +0 -0
app.py +19 -5

5kstbz-0001.png ADDED Viewed

Blue_Jay_0044_62759.jpg ADDED Viewed

ILSVRC2012_val_00000008.JPEG ADDED Viewed

app.py CHANGED Viewed

@@ -58,9 +58,8 @@ def clear_chat(history):
 with gr.Blocks() as demo:
-    gr.Markdown("# BLIP-2")
     gr.Markdown(
-        "## Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models"
     )
     gr.Markdown(
         "This demo uses `OPT2.7B` weights. For more information please see [Github](https://github.com/salesforce/LAVIS/tree/main/projects/blip2) or [Paper](https://arxiv.org/abs/2301.12597)."
@@ -71,7 +70,7 @@ with gr.Blocks() as demo:
             input_image = gr.Image(label="Image", type="pil")
             caption_type = gr.Radio(
                 ["Beam Search", "Nucleus Sampling"],
-                label="Caption Type",
                 value="Beam Search",
             )
             btn_caption = gr.Button("Generate Caption")
@@ -98,8 +97,23 @@ with gr.Blocks() as demo:
     btn_clear.click(clear_chat, inputs=[chat_state], outputs=[chatbot, chat_state])
     gr.Examples(
-        [["./merlion.png", "Beam Search", "which city is this?", None, None]],
-        inputs=[input_image, caption_type, question_txt, chat_state, chatbot],
     )
 demo.launch()

 with gr.Blocks() as demo:
     gr.Markdown(
+        "## BLIP-2 - Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models"
     )
     gr.Markdown(
         "This demo uses `OPT2.7B` weights. For more information please see [Github](https://github.com/salesforce/LAVIS/tree/main/projects/blip2) or [Paper](https://arxiv.org/abs/2301.12597)."
             input_image = gr.Image(label="Image", type="pil")
             caption_type = gr.Radio(
                 ["Beam Search", "Nucleus Sampling"],
+                label="Caption Decoding Strategy",
                 value="Beam Search",
             )
             btn_caption = gr.Button("Generate Caption")
     btn_clear.click(clear_chat, inputs=[chat_state], outputs=[chatbot, chat_state])
     gr.Examples(
+        [
+            ["./merlion.png", "Beam Search", "which city is this?"],
+            [
+                "./Blue_Jay_0044_62759.jpg",
+                "Beam Search",
+                "what is the name of this bird?",
+            ],
+            ["./5kstbz-0001.png", "Beam Search", "where is the man standing?"],
+            [
+                "ILSVRC2012_val_00000008.JPEG",
+                "eam Search",
+                "Name the colors of macarons you see in the image.",
+            ],
+        ],
+        inputs=[input_image, caption_type, question_txt],
     )
+    gr.Markdown("Sample images are taken from ImageNet, CUB and GamePhysics datasets.")
 demo.launch()