prismer / app_vqa.py
shikunl's picture
Final test
1aa8228
#!/usr/bin/env python
from __future__ import annotations
import os
import pathlib
import gradio as gr
from prismer_model import Model
def create_demo() -> gr.Blocks:
model = Model()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
image = gr.Image(label='Input', type='filepath')
model_name = gr.Dropdown(label='Model', choices=['Prismer-Base', 'Prismer-Large'], value='Prismer-Base')
question = gr.Text(label='Question')
run_button = gr.Button('Run')
with gr.Column(scale=1.5):
answer = gr.Text(label='Model Prediction')
with gr.Row():
depth = gr.Image(label='Depth')
edge = gr.Image(label='Edge')
normals = gr.Image(label='Normals')
with gr.Row():
segmentation = gr.Image(label='Segmentation')
object_detection = gr.Image(label='Object Detection')
ocr = gr.Image(label='OCR Detection')
inputs = [image, model_name, question]
outputs = [answer, depth, edge, normals, segmentation, object_detection, ocr]
paths = sorted(pathlib.Path('prismer/images').glob('*'))
ex_questions = ['What is the man on the left doing?',
'What is this person doing?',
'How many cows are in this image?',
'What is the type of animal in this image?',
'What toy is it?']
examples = [[path.as_posix(), 'Prismer-Base', ex_questions[i]] for i, path in enumerate(paths)]
gr.Examples(examples=examples,
inputs=inputs,
outputs=outputs,
fn=model.run_vqa,
cache_examples=os.getenv('SYSTEM') == 'spaces')
run_button.click(fn=model.run_vqa, inputs=inputs, outputs=outputs)
return demo
if __name__ == '__main__':
demo = create_demo()
demo.queue().launch()