Spaces:
Runtime error
Runtime error
Dongxu Li
commited on
Commit
·
4ecd25d
1
Parent(s):
0314a2f
change ui
Browse files
app.py
CHANGED
@@ -125,12 +125,20 @@ def inference_caption(
|
|
125 |
return output[0]
|
126 |
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
title = """<h1 align="center">BLIP-2</h1>"""
|
129 |
-
description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them
|
130 |
-
<
|
131 |
article = """<strong>Paper</strong>: <a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
|
132 |
<br> <strong>Code</strong>: BLIP2 is now integrated into GitHub repo: <a href='https://github.com/salesforce/LAVIS' target='_blank'>LAVIS: a One-stop Library for Language and Vision</a>
|
133 |
<br> <strong>Project Page</strong>: <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'> BLIP2 on LAVIS</a>
|
|
|
134 |
"""
|
135 |
|
136 |
endpoint = Endpoint()
|
@@ -147,6 +155,7 @@ with gr.Blocks() as iface:
|
|
147 |
gr.Markdown(title)
|
148 |
gr.Markdown(description)
|
149 |
gr.Markdown(article)
|
|
|
150 |
with gr.Row():
|
151 |
with gr.Column():
|
152 |
image_input = gr.Image(type="pil")
|
@@ -189,54 +198,61 @@ with gr.Blocks() as iface:
|
|
189 |
with gr.Column():
|
190 |
|
191 |
with gr.Column():
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
with gr.
|
210 |
-
|
211 |
-
with gr.Row():
|
212 |
chatbot = gr.Chatbot(label="Chat Output (from FlanT5)")
|
213 |
-
|
214 |
-
|
215 |
-
with gr.
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
examples = gr.Examples(
|
242 |
examples=examples,
|
|
|
125 |
return output[0]
|
126 |
|
127 |
|
128 |
+
def clear_fn(image_input, chatbot, chat_input, caption_output, state):
|
129 |
+
if image_input is None:
|
130 |
+
return (None, "", "", [])
|
131 |
+
else:
|
132 |
+
return chatbot, chat_input, caption_output, state
|
133 |
+
|
134 |
+
|
135 |
title = """<h1 align="center">BLIP-2</h1>"""
|
136 |
+
description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them.
|
137 |
+
<br> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected."""
|
138 |
article = """<strong>Paper</strong>: <a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
|
139 |
<br> <strong>Code</strong>: BLIP2 is now integrated into GitHub repo: <a href='https://github.com/salesforce/LAVIS' target='_blank'>LAVIS: a One-stop Library for Language and Vision</a>
|
140 |
<br> <strong>Project Page</strong>: <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'> BLIP2 on LAVIS</a>
|
141 |
+
<br> <strong>Description</strong>: Captioning results from <strong>BLIP2_OPT_6.7B</strong>. Chat results from <strong>BLIP2_FlanT5xxl</strong>.
|
142 |
"""
|
143 |
|
144 |
endpoint = Endpoint()
|
|
|
155 |
gr.Markdown(title)
|
156 |
gr.Markdown(description)
|
157 |
gr.Markdown(article)
|
158 |
+
|
159 |
with gr.Row():
|
160 |
with gr.Column():
|
161 |
image_input = gr.Image(type="pil")
|
|
|
198 |
with gr.Column():
|
199 |
|
200 |
with gr.Column():
|
201 |
+
caption_output = gr.Textbox(lines=1, label="Caption Output")
|
202 |
+
caption_button = gr.Button(
|
203 |
+
value="Caption it!", interactive=True, variant="primary"
|
204 |
+
)
|
205 |
+
caption_button.click(
|
206 |
+
inference_caption,
|
207 |
+
[
|
208 |
+
image_input,
|
209 |
+
sampling,
|
210 |
+
temperature,
|
211 |
+
len_penalty,
|
212 |
+
rep_penalty,
|
213 |
+
],
|
214 |
+
[caption_output],
|
215 |
+
)
|
216 |
+
|
217 |
+
gr.Markdown("""Trying prompting your input for chat; e.g. recommended prompt for QA, \"Question: {} Answer:\"""")
|
218 |
+
with gr.Row():
|
219 |
+
with gr.Column():
|
|
|
220 |
chatbot = gr.Chatbot(label="Chat Output (from FlanT5)")
|
221 |
+
|
222 |
+
# with gr.Row():
|
223 |
+
with gr.Column():
|
224 |
+
chat_input = gr.Textbox(lines=1, label="Chat Input")
|
225 |
+
|
226 |
+
with gr.Row():
|
227 |
+
clear_button = gr.Button(value="Clear", interactive=True)
|
228 |
+
clear_button.click(
|
229 |
+
lambda: ("", [], []),
|
230 |
+
[],
|
231 |
+
[chat_input, chatbot, state],
|
232 |
+
)
|
233 |
+
|
234 |
+
submit_button = gr.Button(
|
235 |
+
value="Submit", interactive=True, variant="primary"
|
236 |
+
)
|
237 |
+
submit_button.click(
|
238 |
+
inference_chat,
|
239 |
+
[
|
240 |
+
image_input,
|
241 |
+
chat_input,
|
242 |
+
sampling,
|
243 |
+
temperature,
|
244 |
+
len_penalty,
|
245 |
+
rep_penalty,
|
246 |
+
state,
|
247 |
+
],
|
248 |
+
[chatbot, state],
|
249 |
+
)
|
250 |
+
|
251 |
+
image_input.change(
|
252 |
+
clear_fn,
|
253 |
+
[image_input, chatbot, chat_input, caption_output, state],
|
254 |
+
[chatbot, chat_input, caption_output, state]
|
255 |
+
)
|
256 |
|
257 |
examples = gr.Examples(
|
258 |
examples=examples,
|