shuangzhiaishang commited on
Commit
16b2893
·
verified ·
1 Parent(s): 38f3cc6

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples_v2/cockdial.png filter=lfs diff=lfs merge=lfs -text
37
+ examples_v2/float.png filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import gradio as gr
4
+
5
+ from model import ToyModel
6
+
7
+ """
8
+ Model specification
9
+ """
10
+
11
+ model = ToyModel()
12
+
13
+
14
+ def chat(image_input, text_input):
15
+ image_output, text_output = model.chat(image_input, text_input)
16
+ return image_output, text_output
17
+
18
+
19
+ """
20
+ Gradio
21
+ """
22
+
23
+
24
+ def gradio_taskselect(idx):
25
+ prompt_list = [
26
+ '',
27
+ '[grounding] describe this image in detail',
28
+ '[refer] ',
29
+ '[detection] ',
30
+ '[identify] what is this ',
31
+ '[vqa] '
32
+ ]
33
+ instruct_list = [
34
+ '**Hint:** Type in whatever you want',
35
+ '**Hint:** Send the command to generate a grounded image description',
36
+ '**Hint:** Type in a phrase about an object in the image and send the command',
37
+ '**Hint:** Type in a caption or phrase, and see object locations in the image',
38
+ '**Hint:** Draw a bounding box on the uploaded image then send the command. Click the "clear" botton on the '
39
+ 'top right of the image before redraw',
40
+ '**Hint:** Send a question to get a short answer',
41
+ ]
42
+ return prompt_list[idx], instruct_list[idx]
43
+
44
+
45
+ title = """<h1 align="center">RS-Visual Perception Demo</h1>"""
46
+ description = 'Welcome to Our RS-Visual Perception Demo!'
47
+
48
+ introduction = '''
49
+ For Abilities Involving Visual Grounding:
50
+ 1. Grounding: CLICK **Send** to generate a grounded image description.
51
+ 2. Refer: Input a referring object and CLICK **Send**.
52
+ 3. Detection: Write a caption or phrase, and CLICK **Send**.
53
+ 4. Identify: Draw the bounding box on the uploaded image window and CLICK **Send** to generate the bounding box. (CLICK "clear" button before re-drawing next time).
54
+ 5. VQA: Input a visual question and CLICK **Send**.
55
+ 6. No Tag: Input whatever you want and CLICK **Send** without any tagging
56
+ You can also simply chat in free form!
57
+ '''
58
+
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown(title)
61
+ gr.Markdown(description)
62
+
63
+ with gr.Row():
64
+ with gr.Column(scale=0.5):
65
+ image_input = gr.Image(type="pil", label="Input Image")
66
+
67
+ temperature = gr.Slider(
68
+ minimum=0.1,
69
+ maximum=1.5,
70
+ value=0.6,
71
+ step=0.1,
72
+ interactive=True,
73
+ label="Temperature",
74
+ )
75
+
76
+ dataset = gr.Dataset(
77
+ components=[gr.Textbox(visible=False)],
78
+ samples=[['No Tag'], ['Grounding'], ['Refer'], ['Detection'], ['Identify'], ['VQA']],
79
+ type="index",
80
+ label='Task Shortcuts',
81
+ )
82
+ task_inst = gr.Markdown('**Hint:** Upload your image and chat')
83
+ text_input = gr.Textbox(label='Input text', placeholder='Upload your image and chat', interactive=True, )
84
+ submit_button = gr.Button("Submit", variant='primary', size='sm', scale=1)
85
+
86
+ gr.Markdown(introduction)
87
+
88
+ with gr.Column():
89
+ image_output = gr.Image(type="pil", label='Output image')
90
+ text_output = gr.Textbox(label='Output text', interactive=True)
91
+
92
+ with gr.Row():
93
+ with gr.Column():
94
+ gr.Examples(examples=[
95
+ ["examples_v2/office.jpg", "[grounding] describe this image in detail"],
96
+ ["examples_v2/sofa.jpg", "[detection] sofas"],
97
+ ["examples_v2/2000x1372_wmkn_0012149409555.jpg", "[refer] the world cup"],
98
+ ["examples_v2/KFC-20-for-20-Nuggets.jpg", "[identify] what is this {<4><50><30><65>}"],
99
+ ], inputs=[image_input, text_input], fn=chat,
100
+ outputs=[image_output, text_output])
101
+ with gr.Column():
102
+ gr.Examples(examples=[
103
+ ["examples_v2/glip_test.jpg", "[vqa] where should I hide in this room when playing hide and seek"],
104
+ ["examples_v2/float.png", "Please write a poem about the image"],
105
+ ["examples_v2/thief.png", "Is the weapon fateful"],
106
+ ["examples_v2/cockdial.png", "What might happen in this image in the next second"],
107
+ ], inputs=[image_input, text_input], fn=chat,
108
+ outputs=[image_output, text_output])
109
+
110
+ dataset.click(
111
+ gradio_taskselect,
112
+ inputs=[dataset],
113
+ outputs=[text_input, task_inst],
114
+ show_progress="hidden",
115
+ postprocess=False,
116
+ queue=False,
117
+ )
118
+
119
+ text_input.submit(
120
+ chat,
121
+ inputs=[image_input, text_input],
122
+ outputs=[image_output, text_output],
123
+ )
124
+
125
+ submit_button.click(
126
+ chat,
127
+ inputs=[image_input, text_input],
128
+ outputs=[image_output, text_output],
129
+ )
130
+
131
+ demo.launch()
examples_v2/2000x1372_wmkn_0012149409555.jpg ADDED
examples_v2/KFC-20-for-20-Nuggets.jpg ADDED
examples_v2/cockdial.png ADDED

Git LFS Details

  • SHA256: 48e6fcd1994b733174bb2484038a6eba18c36922686e9bffaaa6216ac704ea6e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.53 MB
examples_v2/float.png ADDED

Git LFS Details

  • SHA256: ee6365239cec6f1cceb156273ba30b43295bf92eef9b3e44f854eec335fa0646
  • Pointer size: 132 Bytes
  • Size of remote file: 1.25 MB
examples_v2/glip_test.jpg ADDED
examples_v2/office.jpg ADDED
examples_v2/sofa.jpg ADDED
examples_v2/thief.png ADDED
model.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ class ToyModel():
3
+ def __init__(self):
4
+ ...
5
+
6
+ def chat(self, image_input, text_input):
7
+ return image_input, text_input
8
+