Upload 10 files

	@@ -0,0 +1,131 @@

+import re
+import gradio as gr
+from model import ToyModel
+"""
+Model specification
+"""
+model = ToyModel()
+def chat(image_input, text_input):
+    image_output, text_output = model.chat(image_input, text_input)
+    return image_output, text_output
+"""
+Gradio
+"""
+def gradio_taskselect(idx):
+    prompt_list = [
+        '',
+        '[grounding] describe this image in detail',
+        '[refer] ',
+        '[detection] ',
+        '[identify] what is this ',
+        '[vqa] '
+    ]
+    instruct_list = [
+        '**Hint:** Type in whatever you want',
+        '**Hint:** Send the command to generate a grounded image description',
+        '**Hint:** Type in a phrase about an object in the image and send the command',
+        '**Hint:** Type in a caption or phrase, and see object locations in the image',
+        '**Hint:** Draw a bounding box on the uploaded image then send the command. Click the "clear" botton on the '
+        'top right of the image before redraw',
+        '**Hint:** Send a question to get a short answer',
+    ]
+    return prompt_list[idx], instruct_list[idx]
+title = """<h1 align="center">RS-Visual Perception Demo</h1>"""
+description = 'Welcome to Our RS-Visual Perception Demo!'
+introduction = '''
+For Abilities Involving Visual Grounding:
+1. Grounding: CLICK **Send** to generate a grounded image description.
+2. Refer: Input a referring object and CLICK **Send**.
+3. Detection: Write a caption or phrase, and CLICK **Send**.
+4. Identify: Draw the bounding box on the uploaded image window and CLICK **Send** to generate the bounding box. (CLICK "clear" button before re-drawing next time).
+5. VQA: Input a visual question and CLICK **Send**.
+6. No Tag: Input whatever you want and CLICK **Send** without any tagging
+You can also simply chat in free form!
+'''
+with gr.Blocks() as demo:
+    gr.Markdown(title)
+    gr.Markdown(description)
+    with gr.Row():
+        with gr.Column(scale=0.5):
+            image_input = gr.Image(type="pil", label="Input Image")
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.5,
+                value=0.6,
+                step=0.1,
+                interactive=True,
+                label="Temperature",
+            )
+            dataset = gr.Dataset(
+                components=[gr.Textbox(visible=False)],
+                samples=[['No Tag'], ['Grounding'], ['Refer'], ['Detection'], ['Identify'], ['VQA']],
+                type="index",
+                label='Task Shortcuts',
+            )
+            task_inst = gr.Markdown('**Hint:** Upload your image and chat')
+            text_input = gr.Textbox(label='Input text', placeholder='Upload your image and chat', interactive=True, )
+            submit_button = gr.Button("Submit", variant='primary', size='sm', scale=1)
+            gr.Markdown(introduction)
+        with gr.Column():
+            image_output = gr.Image(type="pil", label='Output image')
+            text_output = gr.Textbox(label='Output text', interactive=True)
+    with gr.Row():
+        with gr.Column():
+            gr.Examples(examples=[
+                ["examples_v2/office.jpg", "[grounding] describe this image in detail"],
+                ["examples_v2/sofa.jpg", "[detection] sofas"],
+                ["examples_v2/2000x1372_wmkn_0012149409555.jpg", "[refer] the world cup"],
+                ["examples_v2/KFC-20-for-20-Nuggets.jpg", "[identify] what is this {<4><50><30><65>}"],
+            ], inputs=[image_input, text_input], fn=chat,
+                outputs=[image_output, text_output])
+        with gr.Column():
+            gr.Examples(examples=[
+                ["examples_v2/glip_test.jpg", "[vqa] where should I hide in this room when playing hide and seek"],
+                ["examples_v2/float.png", "Please write a poem about the image"],
+                ["examples_v2/thief.png", "Is the weapon fateful"],
+                ["examples_v2/cockdial.png", "What might happen in this image in the next second"],
+            ], inputs=[image_input, text_input], fn=chat,
+                outputs=[image_output, text_output])
+    dataset.click(
+        gradio_taskselect,
+        inputs=[dataset],
+        outputs=[text_input, task_inst],
+        show_progress="hidden",
+        postprocess=False,
+        queue=False,
+    )
+    text_input.submit(
+        chat,
+        inputs=[image_input, text_input],
+        outputs=[image_output, text_output],
+    )
+    submit_button.click(
+        chat,
+        inputs=[image_input, text_input],
+        outputs=[image_output, text_output],
+    )
+demo.launch()

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples_v2/cockdial.png filter=lfs diff=lfs merge=lfs -text
+examples_v2/float.png filter=lfs diff=lfs merge=lfs -text

+class ToyModel():
+    def __init__(self):
+        ...
+    def chat(self, image_input, text_input):
+        return image_input, text_input

Spaces:

shuangzhiaishang
/

RS-VL-Perception

Runtime error

Git LFS Details

Git LFS Details

	@@ -0,0 +1,8 @@