#!/usr/bin/env python from __future__ import annotations import os import shutil import subprocess import gradio as gr if os.getenv('SYSTEM') == 'spaces': with open('patch') as f: subprocess.run('patch -p1'.split(), cwd='prismer', stdin=f) shutil.copytree('prismer/helpers/images', 'prismer/images', dirs_exist_ok=True) from app_caption import create_demo as create_demo_caption from app_vqa import create_demo as create_demo_vqa from prismer_model import build_deformable_conv, download_models # Prepare model checkpoints download_models() build_deformable_conv() # Demo file here description = """ # Prismer The official demo for **Prismer: A Vision-Language Model with An Ensemble of Experts**. Please refer to our [project page](https://shikun.io/projects/prismer) or [github](https://github.com/NVlabs/prismer) for more details. """ if (SPACE_ID := os.getenv('SPACE_ID')) is not None: description += f'For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.

' with gr.Blocks() as demo: gr.Markdown(description) with gr.Tabs(): with gr.TabItem('Zero-shot Image Captioning'): create_demo_caption() with gr.TabItem('Visual Question Answering'): create_demo_vqa() demo.queue(api_open=False).launch()