#!/usr/bin/env python from __future__ import annotations import os import gradio as gr from app_caption import create_demo as create_demo_caption from app_vqa import create_demo as create_demo_vqa from prismer_model import build_deformable_conv, download_models # Prepare model checkpoints download_models() build_deformable_conv() # Demo file here description = """ # Prismer The official demo for **Prismer: A Vision-Language Model with Multi-Task Experts**. Please refer to our [project page](https://shikun.io/projects/prismer) or [github](https://github.com/NVlabs/prismer) for more details. Expert labels will be only computed once for the same image checked with md5sum. """ if (SPACE_ID := os.getenv('SPACE_ID')) is not None: description += f'For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.

' with gr.Blocks(theme='sudeepshouche/minimalist') as demo: gr.Markdown(description) with gr.Tabs(): with gr.TabItem('Zero-shot Image Captioning'): create_demo_caption() with gr.TabItem('Visual Question Answering'): create_demo_vqa() demo.queue(api_open=False).launch()