Merge pull request #4 from TRI-ML/master
Browse files- .gitignore +1 -1
- serve/__init__.py +5 -5
- serve/gradio_web_server.py +2 -2
.gitignore
CHANGED
@@ -104,7 +104,7 @@ celerybeat.pid
|
|
104 |
# Logs
|
105 |
serve_images/
|
106 |
*conv.json
|
107 |
-
|
108 |
|
109 |
# Environments
|
110 |
.env
|
|
|
104 |
# Logs
|
105 |
serve_images/
|
106 |
*conv.json
|
107 |
+
*.log*
|
108 |
|
109 |
# Environments
|
110 |
.env
|
serve/__init__.py
CHANGED
@@ -6,19 +6,19 @@ MODEL_ID_TO_NAME = OrderedDict(
|
|
6 |
[
|
7 |
(
|
8 |
"prism-dinosiglip+13b",
|
9 |
-
"
|
10 |
),
|
11 |
(
|
12 |
"prism-dinosiglip+7b",
|
13 |
-
"
|
14 |
),
|
15 |
(
|
16 |
"prism-dinosiglip-controlled+13b",
|
17 |
-
"
|
18 |
),
|
19 |
(
|
20 |
"prism-dinosiglip-controlled+7b",
|
21 |
-
"
|
22 |
),
|
23 |
("llava-v1.5-13b", "LLaVA 1.5 13B"),
|
24 |
("llava-v1.5-7b", "LLaVA 1.5 7B"),
|
@@ -34,4 +34,4 @@ INTERACTION_MODES_MAP = OrderedDict(
|
|
34 |
("Visual Question Answering", "vqa"),
|
35 |
("True/False Visual Question Answering", "true_false"),
|
36 |
]
|
37 |
-
)
|
|
|
6 |
[
|
7 |
(
|
8 |
"prism-dinosiglip+13b",
|
9 |
+
"Prism 13B",
|
10 |
),
|
11 |
(
|
12 |
"prism-dinosiglip+7b",
|
13 |
+
"Prism 7B",
|
14 |
),
|
15 |
(
|
16 |
"prism-dinosiglip-controlled+13b",
|
17 |
+
"Prism 13B (Controlled)",
|
18 |
),
|
19 |
(
|
20 |
"prism-dinosiglip-controlled+7b",
|
21 |
+
"Prism 7B (Controlled)",
|
22 |
),
|
23 |
("llava-v1.5-13b", "LLaVA 1.5 13B"),
|
24 |
("llava-v1.5-7b", "LLaVA 1.5 7B"),
|
|
|
34 |
("Visual Question Answering", "vqa"),
|
35 |
("True/False Visual Question Answering", "true_false"),
|
36 |
]
|
37 |
+
)
|
serve/gradio_web_server.py
CHANGED
@@ -247,8 +247,8 @@ def http_bot(state, model_selector, interaction_mode, temperature, max_new_token
|
|
247 |
|
248 |
title_markdown = """
|
249 |
# Prismatic VLMs: Investigating the Design Space of Visually-Conditioned Language Models
|
250 |
-
[[
|
251 |
-
[[
|
252 |
| π [[Paper](https://arxiv.org/abs/2402.07865)]
|
253 |
"""
|
254 |
|
|
|
247 |
|
248 |
title_markdown = """
|
249 |
# Prismatic VLMs: Investigating the Design Space of Visually-Conditioned Language Models
|
250 |
+
[[Training Code](github.com/TRI-ML/prismatic-vlms)]
|
251 |
+
[[Evaluation Code](github.com/TRI-ML/vlm-evaluation)]
|
252 |
| π [[Paper](https://arxiv.org/abs/2402.07865)]
|
253 |
"""
|
254 |
|