File size: 1,563 Bytes
7617596
 
e82dff2
 
582115f
7617596
 
b734d92
c83f375
 
b734d92
 
 
 
 
 
 
7617596
19327c9
7617596
64fb58a
b734d92
818a4f8
3b61cce
 
 
818a4f8
 
 
 
 
 
 
64fb58a
582115f
818a4f8
 
582115f
61a0078
818a4f8
7617596
818a4f8
7617596
19327c9
 
64fb58a
3b61cce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python

from __future__ import annotations

import os
import shutil
import subprocess

import gradio as gr

if os.getenv('SYSTEM') == 'spaces':
    with open('patch') as f:
        subprocess.run('patch -p1'.split(), cwd='prismer', stdin=f)
    shutil.copytree('prismer/helpers/images',
                    'prismer/images',
                    dirs_exist_ok=True)

from app_caption import create_demo as create_demo_caption
from app_vqa import create_demo as create_demo_vqa
from prismer_model import build_deformable_conv, download_models


# Prepare model checkpoints
download_models()
build_deformable_conv()


# Demo file here
description = """
# Prismer 
The official demo for **Prismer: A Vision-Language Model with An Ensemble of Experts**.
Please refer to our [project page](https://shikun.io/projects/prismer) or [github](https://github.com/NVlabs/prismer) for more details.
"""

if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
    description += f'For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>'


with gr.Blocks() as demo:
    gr.Markdown(description)
    with gr.Tabs():
        with gr.TabItem('Zero-shot Image Captioning'):
            create_demo_caption()
        with gr.TabItem('Visual Question Answering'):
            create_demo_vqa()

demo.queue(api_open=False).launch()