tttoaster commited on
Commit
20e4287
1 Parent(s): e5bd87d

Upload 12 files

Browse files
__pycache__/constants.cpython-38.pyc CHANGED
Binary files a/__pycache__/constants.cpython-38.pyc and b/__pycache__/constants.cpython-38.pyc differ
 
app.py CHANGED
@@ -154,11 +154,25 @@ def get_baseline_df():
154
  df = df[present_columns]
155
  return df
156
 
 
 
 
 
 
 
 
 
 
157
  def get_all_df():
158
  df = pd.read_csv(CSV_DIR)
159
  df = df.sort_values(by="Avg. All", ascending=False)
160
  return df
161
 
 
 
 
 
 
162
  block = gr.Blocks()
163
 
164
 
@@ -167,7 +181,82 @@ with block:
167
  LEADERBORAD_INTRODUCTION
168
  )
169
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
170
- with gr.TabItem("🏅 SEED Benchmark", elem_id="seed-benchmark-tab-table", id=0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  with gr.Row():
172
  with gr.Accordion("Citation", open=False):
173
  citation_button = gr.Textbox(
@@ -182,11 +271,21 @@ with block:
182
 
183
  # selection for column part:
184
  checkbox_group = gr.CheckboxGroup(
185
- choices=TASK_INFO_v2,
186
  value=AVG_INFO,
187
- label="Select options",
 
 
 
 
 
 
 
 
 
188
  interactive=True,
189
  )
 
190
 
191
  # 创建数据帧组件
192
  data_component = gr.components.Dataframe(
@@ -200,7 +299,7 @@ with block:
200
 
201
  def on_checkbox_group_change(selected_columns):
202
  # pdb.set_trace()
203
- selected_columns = [item for item in TASK_INFO_v2 if item in selected_columns]
204
  present_columns = MODEL_INFO + selected_columns
205
  updated_data = get_all_df()[present_columns]
206
  updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)
 
154
  df = df[present_columns]
155
  return df
156
 
157
+ def get_baseline_v2_df():
158
+ # pdb.set_trace()
159
+ df = pd.read_csv(CSV_V2_DIR)
160
+ df = df.sort_values(by="Avg. P1", ascending=False)
161
+ present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
162
+ # pdb.set_trace()
163
+ df = df[present_columns]
164
+ return df
165
+
166
  def get_all_df():
167
  df = pd.read_csv(CSV_DIR)
168
  df = df.sort_values(by="Avg. All", ascending=False)
169
  return df
170
 
171
+ def get_all_v2_df():
172
+ df = pd.read_csv(CSV_V2_DIR)
173
+ df = df.sort_values(by="Avg. P1", ascending=False)
174
+ return df
175
+
176
  block = gr.Blocks()
177
 
178
 
 
181
  LEADERBORAD_INTRODUCTION
182
  )
183
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
184
+ with gr.TabItem("🏅 SEED Benchmark v2", elem_id="seed-benchmark-tab-table", id=0):
185
+ with gr.Row():
186
+ with gr.Accordion("Citation", open=False):
187
+ citation_button = gr.Textbox(
188
+ value=CITATION_BUTTON_TEXT,
189
+ label=CITATION_BUTTON_LABEL,
190
+ elem_id="citation-button",
191
+ ).style(show_copy_button=True)
192
+
193
+ gr.Markdown(
194
+ TABLE_INTRODUCTION
195
+ )
196
+
197
+ # selection for column part:
198
+ checkbox_group_v2 = gr.CheckboxGroup(
199
+ choices=TASK_V2_INFO,
200
+ value=AVG_V2_INFO,
201
+ label="Evaluation Dimension",
202
+ interactive=True,
203
+ )
204
+
205
+ '''
206
+ # selection for model size part:
207
+ filter_model_size = gr.CheckboxGroup(
208
+ choices=MODEL_SIZE,
209
+ value=MODEL_SIZE,
210
+ label="Model Size",
211
+ interactive=True,
212
+ )
213
+
214
+ filter_dimension_level = gr.CheckboxGroup(
215
+ choices=DIMENSION_LEVEL,
216
+ label="Model level",
217
+ multiselect=False,
218
+ value=DIMENSION_LEVEL[1],
219
+ interactive=True,
220
+ )
221
+ '''
222
+
223
+ # 创建数据帧组件
224
+ data_component_v2 = gr.components.Dataframe(
225
+ value=get_baseline_v2_df,
226
+ headers=COLUMN_V2_NAMES,
227
+ type="pandas",
228
+ datatype=DATA_TITILE_V2_TYPE,
229
+ interactive=False,
230
+ visible=True,
231
+ )
232
+
233
+ def on_checkbox_group_v2_change(selected_columns):
234
+ # pdb.set_trace()
235
+ selected_columns = [item for item in TASK_V2_INFO if item in selected_columns]
236
+ present_columns = MODEL_INFO_V2 + selected_columns
237
+ updated_data = get_all_v2_df()[present_columns]
238
+ updated_data = updated_data.sort_values(by=present_columns[2], ascending=False)
239
+ updated_headers = present_columns
240
+ # pdb.set_trace()
241
+ update_datatype = [DATA_TITILE_V2_TYPE[COLUMN_V2_NAMES.index(x)] for x in updated_headers]
242
+
243
+ filter_component = gr.components.Dataframe(
244
+ value=updated_data,
245
+ headers=updated_headers,
246
+ type="pandas",
247
+ datatype=update_datatype,
248
+ interactive=False,
249
+ visible=True,
250
+ )
251
+ # pdb.set_trace()
252
+
253
+ return filter_component.value
254
+
255
+ # 将复选框组关联到处理函数
256
+ checkbox_group_v2.change(fn=on_checkbox_group_v2_change, inputs=checkbox_group_v2, outputs=data_component_v2)
257
+
258
+ # table seed-bench-v1
259
+ with gr.TabItem("🏅 SEED Benchmark v1", elem_id="seed-benchmark-tab-table", id=1):
260
  with gr.Row():
261
  with gr.Accordion("Citation", open=False):
262
  citation_button = gr.Textbox(
 
271
 
272
  # selection for column part:
273
  checkbox_group = gr.CheckboxGroup(
274
+ choices=TASK_INFO,
275
  value=AVG_INFO,
276
+ label="Evaluation Dimension",
277
+ interactive=True,
278
+ )
279
+
280
+ '''
281
+ # selection for model size part:
282
+ filter_model_size = gr.CheckboxGroup(
283
+ choices=MODEL_SIZE,
284
+ value=MODEL_SIZE,
285
+ label="Model Size",
286
  interactive=True,
287
  )
288
+ '''
289
 
290
  # 创建数据帧组件
291
  data_component = gr.components.Dataframe(
 
299
 
300
  def on_checkbox_group_change(selected_columns):
301
  # pdb.set_trace()
302
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
303
  present_columns = MODEL_INFO + selected_columns
304
  updated_data = get_all_df()[present_columns]
305
  updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)
constants.py CHANGED
@@ -1,36 +1,25 @@
1
  # this is .py for store constants
2
  MODEL_INFO = ["Model Type", "Model", "Language Model"]
3
- TASK_INFO = ["Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Avg. Img", "Action Recognition", "Action Prediction", "Procedure Understanding", "Avg. Video", "Avg. All"]
4
- TASK_INFO_v2 = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
 
 
 
 
5
 
6
  AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
 
 
7
  DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
 
8
  CSV_DIR = "./file/result.csv"
 
9
 
10
- # COLUMN_NAMES = MODEL_INFO + TASK_INFO
11
- COLUMN_NAMES = MODEL_INFO + TASK_INFO_v2
12
 
13
  DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
14
-
15
- UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5 & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
16
- LLM & Vicuna & Vicuna-7B &23.4 &30.7 &29.7 &30.9 &30.8 &28.6 &29.8 &18.5 &13.4 &27.3 &34.5 &23.8 \\
17
- LLM & LLaMA & LLaMA-7B &26.3 &27.4 &26.2 &28.3 &25.1 &28.8 &19.2 &37.0 & 9.0 &33.0 &23.1 &26.2 \\
18
- ImageLLM & BLIP-2 & Flan-T5-XL &59.1 &53.9 &49.2 &42.3 &43.2 &36.7 &55.7 &45.6 &25.9 &32.6 &47.5 &24.0 \\
19
- ImageLLM & InstructBLIP & Flan-T5-XL &60.3 &58.5 &63.4 &40.6 &58.4 &38.7 &51.6 &45.9 &25.9 &33.1 &49.1 &27.1 \\
20
- ImageLLM & InstructBLIP-Vicuna & Vicuna-7B &60.2 &58.9 &65.6 &43.6 &57.2 &40.3 &52.6 &47.7 &43.5 &34.5 &49.6 &23.1 \\
21
- ImageLLM & LLaVA & LLaMA-7B &42.7 &34.9 &33.5 &28.4 &41.9 &30.8 &27.8 &46.8 &27.7 &29.7 &21.4 &19.1 \\
22
- ImageLLM & MiniGPT-4 & Flan-T5-XL &56.3 &49.2 &45.8 &37.9 &45.3 &32.6 &47.4 &57.1 &11.8 &38.2 &24.5 &27.1 \\
23
- ImageLLM & VPGTrans & LLaMA-7B &51.9 &44.1 &39.9 &36.1 &33.7 &36.4 &32.0 &53.2 &30.6 &39.5 &24.3 &31.9 \\
24
- ImageLLM & MultiModal-GPT & LLaMA-7B &43.6 &37.9 &31.5 &30.8 &27.3 &30.1 &29.9 &51.4 &18.8 &36.9 &25.8 &24.0 \\
25
- ImageLLM & Otter & LLaMA-7B &44.9 &38.6 &32.2 &30.9 &26.3 &31.8 &32.0 &51.4 &31.8 &37.9 &27.2 &24.8 \\
26
- ImageLLM & OpenFlamingo & LLaMA-7B &43.9 &38.1 &31.3 &30.1 &27.3 &30.6 &29.9 &50.2 &20.0 &37.2 &25.4 &24.2 \\
27
- ImageLLM & LLaMA-Adapter V2 & LLaMA-7B &45.2 &38.5 &29.3 &33.0 &29.7 &35.5 &39.2 &52.0 &24.7 &38.6 &18.5 &19.6 \\
28
- ImageLLM & GVT & Vicuna-7B &41.7 &35.5 &31.8 &29.5 &36.2 &32.0 &32.0 &51.1 &27.1 &33.9 &25.4 &23.0 \\
29
- ImageLLM & mPLUG-Owl & LLaMA-7B &49.7 &45.3 &32.5 &36.7 &27.3 &32.7 &44.3 &54.7 &28.8 &26.7 &17.9 &26.5 \\
30
- VideoLLM & VideoChat & Vicuna-7B &47.1 &43.8 &34.9 &40.0 &32.8 &34.6 &42.3 &50.5 &17.7 &34.9 &36.4 &27.3 \\
31
- VideoLLM & Video-ChatGPT & LLaMA-7B &37.2 &31.4 &33.2 &28.4 &35.5 &29.5 &23.7 &42.3 &25.9 &27.6 &21.3 &21.1 \\
32
- VideoLLM & Valley & LLaMA-13B &39.3 &32.9 &31.6 &27.9 &24.2 &30.1 &27.8 &43.8 &11.8 &31.3 &23.2 &20.7 \\'''
33
-
34
 
35
  LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
36
 
@@ -76,8 +65,6 @@ LEADERBORAD_INFO = """
76
  By revealing the limitations of existing MLLMs through evaluation results, we aim for SEED-Bench to provide insights for motivating future research.
77
  """
78
 
79
-
80
-
81
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
82
  CITATION_BUTTON_TEXT = r"""@article{li2023seed,
83
  title={SEED-Bench: Benchmarking Multimodal LLMs with Generative Comprehension},
 
1
  # this is .py for store constants
2
  MODEL_INFO = ["Model Type", "Model", "Language Model"]
3
+ MODEL_INFO_V2 = ["Model", "Language Model"]
4
+ MODEL_SIZE = ["<10B", ">=10B"]
5
+ DIMENSION_LEVEL = ["L1", "L2", "L3"]
6
+ LEADERBOARD_VERSION = ["Version1", "Version2"]
7
+ TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
8
+ TASK_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
9
 
10
  AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
11
+ AVG_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3"]
12
+
13
  DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
14
+ DATA_TITILE_V2_TYPE = ["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
15
  CSV_DIR = "./file/result.csv"
16
+ CSV_V2_DIR = "./file/result_v2.csv"
17
 
18
+ COLUMN_NAMES = MODEL_INFO + TASK_INFO
19
+ COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
20
 
21
  DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
22
+ DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
25
 
 
65
  By revealing the limitations of existing MLLMs through evaluation results, we aim for SEED-Bench to provide insights for motivating future research.
66
  """
67
 
 
 
68
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
69
  CITATION_BUTTON_TEXT = r"""@article{li2023seed,
70
  title={SEED-Bench: Benchmarking Multimodal LLMs with Generative Comprehension},
file/result.csv CHANGED
@@ -22,9 +22,9 @@ ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,56.3,62.3,39.1,7
22
  ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
23
  ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
24
  ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
25
- ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13b,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
26
  ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
27
- ImageLLM,[LLaMA-VID-7B](),LLaMA-7B,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
28
  ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,0,67.0,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0,0,0
29
  ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
30
  ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
 
22
  ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
23
  ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
24
  ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
25
+ ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
26
  ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
27
+ ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
28
  ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,0,67.0,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0,0,0
29
  ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
30
  ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
file/result_v2.csv ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Language Model,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
2
+ [BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,41,35.3,0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
3
+ [InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,42.2,35.7,0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
4
+ [InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,41.4,29.7,0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
5
+ [LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,38.7,30.2,0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
6
+ [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,39.4,34.1,0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
7
+ [VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,36.2,23.9,0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
8
+ [MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,37.4,34.9,0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
9
+ [Otter](https://github.com/Luodian/Otter),LLaMA-7B,36.4,36.6,0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
10
+ [OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,37.3,35.5,0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
11
+ [LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,37.5,0,0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
12
+ [GVT](https://github.com/TencentARC/GVT),Vicuna-7B,34.4,38.6,0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
13
+ [mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,39.4,28.9,0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
14
+ [Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,46.3,23.3,0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
15
+ [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,43.1,35.5,0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
16
+ [LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,47.3,30.8,0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
17
+ [IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,38,40.3,0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
18
+ [InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,59.2,32.1,0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
19
+ [Emu](https://github.com/baaivision/Emu),LLaMA-13B,42.5,41.1,41.4,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
20
+ [Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
21
+ [seed-llama](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,43.9,43.4,52.3,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
22
+ [VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,37,35.3,0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
23
+ [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,36.4,31,0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
24
+ [Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,34.5,32.2,0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0
src/__pycache__/utils_display.cpython-38.pyc CHANGED
Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ