recognize-anything

Running

xinyu1205 Ma Jinyu commited on Jun 12, 2023

Commit

02634b8

1 Parent(s): 28ec2a0

Re-arrange GUI (#3)

- Re-arrange GUI (3504b1c7011cf69d871547fefb55b9b3b69e66a4)

Co-authored-by: Ma Jinyu <Miles-Ma@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +140 -28

app.py CHANGED Viewed

@@ -65,31 +65,143 @@ def inference(raw_image, model_n , input_tag):
         return tag_1[0],'none',caption[0]
-inputs = [
-    gr.inputs.Image(type='pil'),
-    gr.inputs.Radio(choices=['Recognize Anything Model',"Tag2Text Model"],
-    type="value",
-    default="Recognize Anything Model",
-    label="Select Model"          ),
-    gr.inputs.Textbox(lines=2, label="User Specified Tags (Optional and Currently only Tag2Text is Supported, Enter with commas)")
-                ]
-outputs = [gr.outputs.Textbox(label="Tags"),gr.outputs.Textbox(label="标签"), gr.outputs.Textbox(label="Caption (currently only Tag2Text is supported)")]
-# title = "Recognize Anything Model"
-title = "<font size='10'> Recognize Anything Model</font>"
-description = "Welcome to the Recognize Anything Model (RAM) and Tag2Text Model demo! <li><b>Recognize Anything Model:</b>  Upload your image to get the <b>English and Chinese outputs of the image tags</b>!</li><li><b>Tag2Text Model:</b> Upload your image to get the <b>tags</b> and <b>caption</b> of the image. Optional: You can also input specified tags to get the corresponding caption.</li> "
-article = "<p style='text-align: center'>RAM and Tag2Text is training on open-source datasets, and we are persisting in refining and iterating upon it.<br/><a href='https://recognize-anything.github.io/' target='_blank'>Recognize Anything: A Strong Image Tagging Model</a> | <a href='https://https://tag2text.github.io/' target='_blank'>Tag2Text: Guiding Language-Image Model via Image Tagging</a> | <a href='https://github.com/xinyu1205/Tag2Text' target='_blank'>Github Repo</a></p>"
-demo = gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=[
-                                                                                                                ['images/demo1.jpg',"Recognize Anything Model","none"],
-                                                                                                                ['images/demo2.jpg',"Recognize Anything Model","none"],
-                                                                                                                 ['images/demo4.jpg',"Recognize Anything Model","none"],
-                                                                                                                 ['images/demo4.jpg',"Tag2Text Model","power line"],
-                                                                                                                 ['images/demo4.jpg',"Tag2Text Model","track, train"] ,
-                                                                                                                ])
-demo.launch(enable_queue=True)

         return tag_1[0],'none',caption[0]
+def build_gui():
+    description = """
+        <center><strong><font size='10'>Recognize Anything Model</font></strong></center>
+        <br>
+        Welcome to the Recognize Anything Model (RAM) and Tag2Text Model demo! <br><br>
+        <li>
+            <b>Recognize Anything Model:</b> Upload your image to get the <b>English and Chinese outputs of the image tags</b>!
+        </li>
+        <li>
+            <b>Tag2Text Model:</b> Upload your image to get the <b>tags</b> and <b>caption</b> of the image.
+            Optional: You can also input specified tags to get the corresponding caption.
+        </li>
+    """  # noqa
+    article = """
+        <p style='text-align: center'>
+            RAM and Tag2Text is training on open-source datasets, and we are persisting in refining and iterating upon it.<br/>
+            <a href='https://recognize-anything.github.io/' target='_blank'>Recognize Anything: A Strong Image Tagging Model</a>
+            |
+            <a href='https://https://tag2text.github.io/' target='_blank'>Tag2Text: Guiding Language-Image Model via Image Tagging</a>
+            |
+            <a href='https://github.com/xinyu1205/Tag2Text' target='_blank'>Github Repo</a>
+        </p>
+    """  # noqa
+    def inference_with_ram(img):
+        res = inference(img, "Recognize Anything Model", None)
+        return res[0], res[1]
+    def inference_with_t2t(img, input_tags):
+        res = inference(img, "Tag2Text Model", input_tags)
+        return res[0], res[2]
+    with gr.Blocks(title="Recognize Anything Model") as demo:
+        ###############
+        # components
+        ###############
+        gr.HTML(description)
+        with gr.Tab(label="Recognize Anything Model"):
+            with gr.Row():
+                with gr.Column():
+                    ram_in_img = gr.Image(type="pil")
+                    with gr.Row():
+                        ram_btn_run = gr.Button(value="Run")
+                        ram_btn_clear = gr.Button(value="Clear")
+                with gr.Column():
+                    ram_out_tag = gr.Textbox(label="Tags")
+                    ram_out_biaoqian = gr.Textbox(label="标签")
+            gr.Examples(
+                examples=[
+                    ["images/demo1.jpg"],
+                    ["images/demo2.jpg"],
+                    ["images/demo4.jpg"],
+                ],
+                fn=inference_with_ram,
+                inputs=[ram_in_img],
+                outputs=[ram_out_tag, ram_out_biaoqian],
+                cache_examples=True
+            )
+        with gr.Tab(label="Tag2Text Model"):
+            with gr.Row():
+                with gr.Column():
+                    t2t_in_img = gr.Image(type="pil")
+                    t2t_in_tag = gr.Textbox(label="User Specified Tags (Optional, separated by comma)")
+                    with gr.Row():
+                        t2t_btn_run = gr.Button(value="Run")
+                        t2t_btn_clear = gr.Button(value="Clear")
+                with gr.Column():
+                    t2t_out_tag = gr.Textbox(label="Tags")
+                    t2t_out_cap = gr.Textbox(label="Caption")
+            gr.Examples(
+                examples=[
+                    ["images/demo4.jpg", ""],
+                    ["images/demo4.jpg", "power line"],
+                    ["images/demo4.jpg", "track, train"],
+                ],
+                fn=inference_with_t2t,
+                inputs=[t2t_in_img, t2t_in_tag],
+                outputs=[t2t_out_tag, t2t_out_cap],
+                cache_examples=True
+            )
+        gr.HTML(article)
+        ###############
+        # events
+        ###############
+        # run inference
+        ram_btn_run.click(
+            fn=inference_with_ram,
+            inputs=[ram_in_img],
+            outputs=[ram_out_tag, ram_out_biaoqian]
+        )
+        t2t_btn_run.click(
+            fn=inference_with_t2t,
+            inputs=[t2t_in_img, t2t_in_tag],
+            outputs=[t2t_out_tag, t2t_out_cap]
+        )
+        # # images of two image panels should keep the same
+        # # and clear old outputs when image changes
+        # # slow due to internet latency when deployed on huggingface, comment out
+        # def sync_img(v):
+        #     return [gr.update(value=v)] + [gr.update(value="")] * 4
+        # ram_in_img.upload(fn=sync_img, inputs=[ram_in_img], outputs=[
+        #     t2t_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
+        # ])
+        # ram_in_img.clear(fn=sync_img, inputs=[ram_in_img], outputs=[
+        #     t2t_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
+        # ])
+        # t2t_in_img.clear(fn=sync_img, inputs=[t2t_in_img], outputs=[
+        #     ram_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
+        # ])
+        # t2t_in_img.upload(fn=sync_img, inputs=[t2t_in_img], outputs=[
+        #     ram_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
+        # ])
+        # clear all
+        def clear_all():
+            return [gr.update(value=None)] * 2 + [gr.update(value="")] * 5
+        ram_btn_clear.click(fn=clear_all, inputs=[], outputs=[
+            ram_in_img, t2t_in_img,
+            ram_out_tag, ram_out_biaoqian, t2t_in_tag, t2t_out_tag, t2t_out_cap
+        ])
+        t2t_btn_clear.click(fn=clear_all, inputs=[], outputs=[
+            ram_in_img, t2t_in_img,
+            ram_out_tag, ram_out_biaoqian, t2t_in_tag, t2t_out_tag, t2t_out_cap
+        ])
+    return demo
+if __name__ == "__main__":
+    demo = build_gui()
+    demo.launch(enable_queue=True)