tttoaster commited on
Commit
cde8217
1 Parent(s): 6451ae4

Upload 15 files

Browse files
__pycache__/constants.cpython-38.pyc CHANGED
Binary files a/__pycache__/constants.cpython-38.pyc and b/__pycache__/constants.cpython-38.pyc differ
 
app.py CHANGED
@@ -289,17 +289,22 @@ def add_new_eval(
289
  csv_data = csv_data.to_csv(CSV_V2_DIR, index=False)
290
  return 0
291
 
292
- def get_baseline_df():
293
- # pdb.set_trace()
294
- df = pd.read_csv(CSV_DIR)
 
 
295
  df = df.sort_values(by="Avg. All", ascending=False)
296
  present_columns = MODEL_INFO + checkbox_group.value
297
  df = df[present_columns]
298
  return df
299
 
300
- def get_baseline_v2_df():
301
  # pdb.set_trace()
302
- df = pd.read_csv(CSV_V2_DIR)
 
 
 
303
  df = df.sort_values(by="Avg. P1", ascending=False)
304
  present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
305
  # pdb.set_trace()
@@ -316,6 +321,10 @@ def get_all_v2_df():
316
  df = df.sort_values(by="Avg. P1", ascending=False)
317
  return df
318
 
 
 
 
 
319
  block = gr.Blocks()
320
 
321
 
@@ -345,25 +354,28 @@ with block:
345
  interactive=True,
346
  )
347
 
348
- # selection for model size part:
349
- model_size_v2 = gr.CheckboxGroup(
350
- choices=MODEL_SIZE,
351
- value=MODEL_SIZE,
352
- label="Model Size",
353
- interactive=True,
354
- )
 
355
 
356
- # selection for model size part:
357
- evaluation_method_v2 = gr.CheckboxGroup(
358
- choices=EVALUATION_METHOD,
359
- value=EVALUATION_METHOD,
360
- label="Evaluation Method",
361
- interactive=True,
362
- )
 
 
363
 
364
  # 创建数据帧组件
365
  data_component_v2 = gr.components.Dataframe(
366
- value=get_baseline_v2_df,
367
  headers=COLUMN_V2_NAMES,
368
  type="pandas",
369
  datatype=DATA_TITILE_V2_TYPE,
@@ -415,7 +427,11 @@ with block:
415
  # pdb.set_trace()
416
 
417
  return filter_component.value
 
 
 
418
 
 
419
  model_size_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
420
  evaluation_method_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
421
  checkbox_group_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
@@ -442,31 +458,36 @@ with block:
442
  interactive=True,
443
  )
444
 
445
- # selection for model size part:
446
- model_size = gr.CheckboxGroup(
447
- choices=MODEL_SIZE,
448
- value=MODEL_SIZE,
449
- label="Model Size",
450
- interactive=True,
451
- )
 
 
 
 
 
 
 
 
 
 
 
452
 
453
- # selection for model size part:
454
- evaluation_method = gr.CheckboxGroup(
455
- choices=EVALUATION_METHOD,
456
- value=EVALUATION_METHOD,
457
- label="Evaluation Method",
458
- interactive=True,
459
- )
460
 
461
  # 创建数据帧组件
462
  data_component = gr.components.Dataframe(
463
- value=get_baseline_df,
464
  headers=COLUMN_NAMES,
465
  type="pandas",
466
  datatype=DATA_TITILE_TYPE,
467
  interactive=False,
468
  visible=True,
469
  )
 
470
 
471
  def on_filter_model_size_method_change(selected_model_size, selected_evaluation_method, selected_columns):
472
 
@@ -512,7 +533,11 @@ with block:
512
  # pdb.set_trace()
513
 
514
  return filter_component.value
 
 
 
515
 
 
516
  model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
517
  evaluation_method.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
518
  checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
@@ -625,8 +650,8 @@ with block:
625
 
626
 
627
  def refresh_data():
628
- value1 = get_baseline_df()
629
- value2 = get_baseline_v2_df()
630
 
631
  return value1, value2
632
 
 
289
  csv_data = csv_data.to_csv(CSV_V2_DIR, index=False)
290
  return 0
291
 
292
+ def get_baseline_df(average_type):
293
+ if average_type == 'All Average':
294
+ df = pd.read_csv(CSV_DIR)
295
+ else:
296
+ df = pd.read_csv(CSV_TASK_DIR)
297
  df = df.sort_values(by="Avg. All", ascending=False)
298
  present_columns = MODEL_INFO + checkbox_group.value
299
  df = df[present_columns]
300
  return df
301
 
302
+ def get_baseline_v2_df(average_type):
303
  # pdb.set_trace()
304
+ if average_type == 'All Average':
305
+ df = pd.read_csv(CSV_V2_DIR)
306
+ else:
307
+ df = pd.read_csv(CSV_V2_TASK_DIR)
308
  df = df.sort_values(by="Avg. P1", ascending=False)
309
  present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
310
  # pdb.set_trace()
 
321
  df = df.sort_values(by="Avg. P1", ascending=False)
322
  return df
323
 
324
+
325
+ def switch_version(version):
326
+ return f"当前版本: {version}"
327
+
328
  block = gr.Blocks()
329
 
330
 
 
354
  interactive=True,
355
  )
356
 
357
+ with gr.Row():
358
+ # selection for model size part:
359
+ model_size_v2 = gr.CheckboxGroup(
360
+ choices=MODEL_SIZE,
361
+ value=MODEL_SIZE,
362
+ label="Model Size",
363
+ interactive=True,
364
+ )
365
 
366
+ # selection for model size part:
367
+ evaluation_method_v2 = gr.CheckboxGroup(
368
+ choices=EVALUATION_METHOD,
369
+ value=EVALUATION_METHOD,
370
+ label="Evaluation Method",
371
+ interactive=True,
372
+ )
373
+
374
+ average_type_v2 = gr.Radio(AVERAGE_TYPE, label="Performance Average Type", value="All Average")
375
 
376
  # 创建数据帧组件
377
  data_component_v2 = gr.components.Dataframe(
378
+ value=get_baseline_v2_df(average_type_v2.value),
379
  headers=COLUMN_V2_NAMES,
380
  type="pandas",
381
  datatype=DATA_TITILE_V2_TYPE,
 
427
  # pdb.set_trace()
428
 
429
  return filter_component.value
430
+
431
+ def on_average_type_v2_change(average_type_v2):
432
+ return get_baseline_v2_df(average_type_v2)
433
 
434
+ average_type_v2.change(fn=on_average_type_v2_change, inputs=[average_type_v2], outputs=data_component_v2)
435
  model_size_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
436
  evaluation_method_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
437
  checkbox_group_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
 
458
  interactive=True,
459
  )
460
 
461
+ with gr.Row():
462
+ # selection for model size part:
463
+ model_size = gr.CheckboxGroup(
464
+ choices=MODEL_SIZE,
465
+ value=MODEL_SIZE,
466
+ label="Model Size",
467
+ interactive=True,
468
+ )
469
+
470
+ # selection for model size part:
471
+ evaluation_method = gr.CheckboxGroup(
472
+ choices=EVALUATION_METHOD,
473
+ value=EVALUATION_METHOD,
474
+ label="Evaluation Method",
475
+ interactive=True,
476
+ )
477
+
478
+ average_type = gr.Radio(AVERAGE_TYPE, label="Performance Average Type", value="All Average")
479
 
 
 
 
 
 
 
 
480
 
481
  # 创建数据帧组件
482
  data_component = gr.components.Dataframe(
483
+ value=get_baseline_df(average_type.value),
484
  headers=COLUMN_NAMES,
485
  type="pandas",
486
  datatype=DATA_TITILE_TYPE,
487
  interactive=False,
488
  visible=True,
489
  )
490
+
491
 
492
  def on_filter_model_size_method_change(selected_model_size, selected_evaluation_method, selected_columns):
493
 
 
533
  # pdb.set_trace()
534
 
535
  return filter_component.value
536
+
537
+ def on_average_type_change(average_type):
538
+ return get_baseline_df(average_type)
539
 
540
+ average_type.change(fn=on_average_type_change, inputs=[average_type], outputs=data_component)
541
  model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
542
  evaluation_method.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
543
  checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
 
650
 
651
 
652
  def refresh_data():
653
+ value1 = get_baseline_df(average_type)
654
+ value2 = get_baseline_v2_df(average_type_v2)
655
 
656
  return value1, value2
657
 
constants.py CHANGED
@@ -6,15 +6,17 @@ EVALUATION_METHOD = ["PPL", "PPL for A/B/C/D", "Generate", "NG"]
6
  DIMENSION_LEVEL = ["L1", "L2", "L3"]
7
  LEADERBOARD_VERSION = ["Version1", "Version2"]
8
  TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
9
- TASK_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
10
 
11
  AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
12
- AVG_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3"]
13
 
14
  DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
15
- DATA_TITILE_V2_TYPE = ["markdown", "markdown","markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
16
  CSV_DIR = "./file/result.csv"
 
17
  CSV_V2_DIR = "./file/result_v2.csv"
 
18
 
19
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
20
  COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
@@ -22,6 +24,9 @@ COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
22
  DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1509, 1225, 1023]
23
  DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
24
 
 
 
 
25
  LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
26
 
27
  Welcome to the leaderboard of the SEED-Bench! 🏆
@@ -81,9 +86,9 @@ SUBMIT_INTRODUCTION = """# Submit on SEED Benchmark Introduction
81
  TABLE_INTRODUCTION = """In the table below, we summarize each task performance of all the models.
82
  We use accurancy(%) as the primary evaluation metric for each tasks.
83
 
84
- SEED-Bench-1 calculates the overall accuracy by dividing the total number of correct QA answers by the total number of QA questions.
85
 
86
- SEED-Bench-2 represents the overall accuracy using the average accuracy of each dimension.
87
 
88
  For PPL evaluation method, we count the loss for each candidate and select the lowest loss candidate. For detail, please refer [InternLM_Xcomposer_VL_interface](https://github.com/AILab-CVC/SEED-Bench/blob/387a067b6ba99ae5e8231f39ae2d2e453765765c/SEED-Bench-2/model/InternLM_Xcomposer_VL_interface.py#L74).
89
 
 
6
  DIMENSION_LEVEL = ["L1", "L2", "L3"]
7
  LEADERBOARD_VERSION = ["Version1", "Version2"]
8
  TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
9
+ TASK_V2_INFO = ["Avg. Single", "Avg. Multi", "Avg. Video", "Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
10
 
11
  AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
12
+ AVG_V2_INFO = ["Avg. Single", "Avg. Multi", "Avg. Video", "Avg. P1", "Avg. P2", "Avg. P3"]
13
 
14
  DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
15
+ DATA_TITILE_V2_TYPE = ["markdown", "markdown","markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
16
  CSV_DIR = "./file/result.csv"
17
+ CSV_TASK_DIR = './file/result_task.csv'
18
  CSV_V2_DIR = "./file/result_v2.csv"
19
+ CSV_V2_TASK_DIR = './file/result_v2_task.csv'
20
 
21
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
22
  COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
 
24
  DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1509, 1225, 1023]
25
  DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
26
 
27
+ LEADERBORAD_VERSION = ["SEED-Bench-1", "SEED-Bench-2"]
28
+ AVERAGE_TYPE = ["All Average", "Task Average"]
29
+
30
  LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
31
 
32
  Welcome to the leaderboard of the SEED-Bench! 🏆
 
86
  TABLE_INTRODUCTION = """In the table below, we summarize each task performance of all the models.
87
  We use accurancy(%) as the primary evaluation metric for each tasks.
88
 
89
+ Performance Average Type is All Average means that calculates the overall accuracy by dividing the total number of correct QA answers by the total number of QA questions.
90
 
91
+ Performance Average Type is Task Average represents that the overall accuracy using the average accuracy of each dimension.
92
 
93
  For PPL evaluation method, we count the loss for each candidate and select the lowest loss candidate. For detail, please refer [InternLM_Xcomposer_VL_interface](https://github.com/AILab-CVC/SEED-Bench/blob/387a067b6ba99ae5e8231f39ae2d2e453765765c/SEED-Bench-2/model/InternLM_Xcomposer_VL_interface.py#L74).
94
 
file/result.csv CHANGED
@@ -1,46 +1,46 @@
1
  Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
2
- LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,27.7,27.3,28.6,23.0,29.0,32.8,31.8,20.5,31.8,33.0,18.2,19.4,23.2,34.9,25.4
3
  LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
4
- LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37.0,9.0,33.0,23.1,26.2
5
- ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24.0
6
  ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
7
  ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
8
- ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77.0,60.5,48.9,41.1,36.6
9
- ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,62.4,68.2,40.5,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36.0,35.7
10
- ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,62.8,68.9,39.5,75.2,71.4,72.0,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
11
  ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
12
- ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32.0,53.2,30.6,39.5,24.3,31.9
13
- ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24.0
14
- ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32.0,51.4,31.8,37.9,27.2,24.8
15
  ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
16
- ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20.0,37.2,25.4,24.2
17
- ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,40.9,42.7,35.7,53.2,45.3,40.0,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
18
- ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,32.7,35.2,25.8,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,24.7,38.6,18.5,19.6
19
- ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,27.1,33.9,25.4,23.0
20
- ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,34.0,37.9,23.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
21
- ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,50.0,54.4,37.5,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27.0
22
  ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
23
- ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32.0
24
- ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,66.8,72.7,44.7,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41.0
25
- ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,0.0,44.5,0.0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0.0,0.0,0.0
26
- ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,0.0,53.2,0.0,64.0,52.6,50.8,48.3,46.1,45.5,62.9,68.0,51.8,0.0,0.0,0.0
27
- ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,0.0,66.9,0.0,75.0,71.7,67.6,60.8,56.2,55.3,74.4,77.0,48.5,0.0,0.0,0.0
28
- ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
29
- ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
30
  ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
31
- ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,0.0,67.0,0.0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0.0,0.0,0.0
32
- ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,62.3,69.6,41.5,75.5,73.0,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39.0,33.9
33
- ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,0.0,69.7,0.0,75.3,71.4,72.3,63.1,62.0,53.9,70.1,79.8,54.7,0.0,0.0,0.0
34
- ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,0.0,70.8,0.0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0.0,0.0,0.0
35
- ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,0.0,68.6,0.0,75.4,72.8,69.0,64.5,60.6,55.1,72.2,77.9,41.9,0.0,0.0,0.0
36
- ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63.9,71.6,34.7,75.4,72.2,75.1,64.2,68.2,49.3,66.0,78.6,62.4,41.2,33.9,26.1
37
  ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,67.5,74.8,39.8,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
38
  ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,67.3,69.1,60.5,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
39
- VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.6,39.0,33.7,47.1,43.8,34.9,40.0,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
40
  VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
41
- VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,30.3,32.0,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
42
- Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,60.5,65.6,46.0,70.7,69.0,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34.0
43
- Other,[Unified-IO-2 7B](),from scratch,7B,PPL,60.4,65.5,46.0,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58.0,42.7,34.0
44
- Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,60.2,64.1,45.6,69.0,66.6,66.5,54.3,62.0,42.3,50.5,65.3,44.2,57.5,36.2,39.4
45
- Other,[Unified-IO-2 3B](),from scratch,3B,PPL,58.7,63.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64.0,41.9,57.5,36.0,39.0
46
- Other,[Unified-IO-2 1B](),from scratch,1B,PPL,49.6,55.1,34.0,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6
 
1
  Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
2
+ LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,27.7,27.3,28.6,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
3
  LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
4
+ LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
5
+ ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
6
  ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
7
  ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
8
+ ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
9
+ ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,62.4,68.2,40.5,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36,35.7
10
+ ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,62.8,68.9,39.5,75.2,71.4,72,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
11
  ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
12
+ ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
13
+ ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
14
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
15
  ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
16
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
17
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,40.9,42.7,35.7,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
18
+ ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,32.7,35.2,25.8,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
19
+ ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
20
+ ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,34,37.9,23,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
21
+ ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,50,54.4,37.5,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
22
  ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
23
+ ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
24
+ ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,66.8,72.7,44.7,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
25
+ ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
26
+ ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
27
+ ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
28
+ ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
29
+ ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46,38.7,32.9
30
  ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
31
+ ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,0,67,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50,0,0,0
32
+ ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
33
+ ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
34
+ ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,0,70.8,0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0,0,0
35
+ ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,0,68.6,0,75.4,72.8,69,64.5,60.6,55.1,72.2,77.9,41.9,0,0,0
36
+ ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63.9,71.6,34.7,75.4,72.2,75.1,64.2,68.2,49.3,66,78.6,62.4,41.2,33.9,26.1
37
  ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,67.5,74.8,39.8,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
38
  ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,67.3,69.1,60.5,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
39
+ VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.6,39,33.7,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
40
  VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
41
+ VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,30.3,32,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
42
+ Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,60.5,65.6,46,70.7,69,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34
43
+ Other,[Unified-IO-2 7B](),from scratch,7B,PPL,60.4,65.5,46,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58,42.7,34
44
+ Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,60.2,64.1,45.6,69,66.6,66.5,54.3,62,42.3,50.5,65.3,44.2,57.5,36.2,39.4
45
+ Other,[Unified-IO-2 3B](),from scratch,3B,PPL,58.7,63.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64,41.9,57.5,36,39
46
+ Other,[Unified-IO-2 1B](),from scratch,1B,PPL,49.6,55.1,34,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6
file/result_task.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
2
+ LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,26.9,26.6,27.8,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
3
+ LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,26.8,26.2,28.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
4
+ LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,25.8,25.3,27.4,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
5
+ ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,43.0,45.7,34.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
6
+ ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.1,49.3,36.4,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
7
+ ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,48.1,52.2,35.7,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
8
+ ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,60.7,66.9,42.2,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
9
+ ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,57.4,63.3,39.7,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36,35.7
10
+ ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,58.3,64.8,38.6,75.2,71.4,72,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
11
+ ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,39.4,42.6,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
12
+ ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,37.8,39.8,31.9,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
13
+ ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,32.3,33.5,28.9,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
14
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,34.2,35.5,30.0,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
15
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,37.6,40.1,29.9,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
16
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,32.4,33.5,28.9,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
17
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,38.3,39.4,34.8,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
18
+ ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,33.7,36.3,25.6,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
19
+ ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.3,35.2,27.4,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
20
+ ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,35.3,39.1,23.7,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
21
+ ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,46.1,49.4,36.2,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
22
+ ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,55.6,61.9,36.6,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
23
+ ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,54.3,59.6,38.4,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
24
+ ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,62.6,68.8,43.9,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
25
+ ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,32.3,43.0,0.0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
26
+ ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,40.8,54.4,0.0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
27
+ ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,48.9,65.2,0.0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
28
+ ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,46.9,51.0,34.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
29
+ ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,55.1,60.4,39.2,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46,38.7,32.9
30
+ ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,58.5,65.4,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
31
+ ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,48.0,64.0,0.0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50,0,0,0
32
+ ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,59.4,65.5,40.8,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
33
+ ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,50.2,67.0,0.0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
34
+ ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,50.6,67.4,0.0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0,0,0
35
+ ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,49.1,65.5,0.0,75.4,72.8,69,64.5,60.6,55.1,72.2,77.9,41.9,0,0,0
36
+ ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,59.4,67.9,33.7,75.4,72.2,75.1,64.2,68.2,49.3,66,78.6,62.4,41.2,33.9,26.1
37
+ ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,64.2,72.7,38.6,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
38
+ ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,65.7,67.5,60.3,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
39
+ VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,36.9,38.2,32.9,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
40
+ VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,29.8,31.9,23.3,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
41
+ VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,28.7,29.9,25.1,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
42
+ Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,57.6,61.8,44.9,70.7,69,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34
43
+ Other,[Unified-IO-2 7B](),from scratch,7B,PPL,57.8,62.0,44.9,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58,42.7,34
44
+ Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,54.5,57.9,44.4,69,66.6,66.5,54.3,62,42.3,50.5,65.3,44.2,57.5,36.2,39.4
45
+ Other,[Unified-IO-2 3B](),from scratch,3B,PPL,54.4,57.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64,41.9,57.5,36,39
46
+ Other,[Unified-IO-2 1B](),from scratch,1B,PPL,46.8,51.4,33.0,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6
file/result_v2.csv CHANGED
@@ -1,28 +1,28 @@
1
- Model,Language Model,Model Size,Evaluation Method,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
2
- [BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,41.0,35.3,0.0,58.5,48.6,49.0,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22.0,17.8,38.6,42.5,37.7,36.2,22.9,40.0,30.6,0.0,0.0,0.0
3
- [InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,42.2,35.7,0.0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0.0,0.0,0.0
4
- [InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,41.4,29.7,0.0,53.6,43.9,49.0,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23.0,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0.0,0.0,0.0
5
- [LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,38.7,30.2,0.0,53.8,47.5,38.3,34.2,42.0,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27.0,50.0,44.1,36.2,25.1,18.6,40.0,20.4,0.0,0.0,0.0
6
- [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,39.4,34.1,0.0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25.0,19.0,46.7,39.0,38.7,27.4,28.6,45.8,22.5,0.0,0.0,0.0
7
- [VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,36.2,23.9,0.0,46.9,38.6,33.6,35.6,27.5,34.4,33.0,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44.0,37.8,38.2,20.9,33.5,19.2,28.6,0.0,0.0,0.0
8
- [MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,37.4,34.9,0.0,46.9,42.5,32.0,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0.0,0.0,0.0
9
- [Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,36.4,36.6,0.0,45.9,39.7,31.9,31.6,26.4,32.0,33.0,49.2,39.3,59.7,53.0,23.6,41.2,36.1,37.3,22.0,27.4,46.7,36.6,37.9,26.0,24.8,42.5,30.6,0.0,0.0,0.0
10
- [OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,37.3,35.5,0.0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39.0,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0.0,0.0,0.0
11
- [LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,37.5,0.0,0.0,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0.0,0.0,0.0,0.0,0.0
12
- [GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,34.4,38.6,0.0,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,35.2,39.4,36.4,25.0,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0.0,0.0,0.0
13
- [mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,39.4,28.9,0.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44.0,32.5,23.5,33.5,54.9,42.0,37.8,18.3,19.3,29.2,28.6,0.0,0.0,0.0
14
- [Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,46.3,23.3,0.0,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28.0,25.2,42.8,48.5,40.8,39.5,30.0,24.2,22.5,0.0,0.0,0.0
15
- [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,43.1,35.5,0.0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55.0,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0.0,0.0,0.0
16
- [Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,65.3,30.5,0.0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97.0,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48.0,28.3,32.7,0.0,0.0,0.0
17
- [LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,47.3,30.8,0.0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69.0,60.6,49.8,25.0,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0.0,0.0,0.0
18
- [IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,38.0,40.3,0.0,48.2,38.2,37.8,32.9,29.0,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0.0,0.0,0.0
19
- [InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,59.2,32.1,0.0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78.0,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0.0,0.0,0.0
20
- [SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,56.4,31.9,0.0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60.0,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0.0,0.0,0.0
21
- [SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,60.2,37.0,0.0,77.5,78.5,76.6,69.0,71.0,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0.0,0.0,0.0
22
- [Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,42.5,41.1,41.4,59.0,50.0,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19.0,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
23
- [Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31.0,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39.0,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
24
- [SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,43.9,43.4,52.3,64.0,55.0,51.3,45.4,43.3,37.9,56.7,59.2,57.0,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
25
- [GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,69.2,44.2,0.0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0.0,0.0,0.0
26
- [VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.0,35.3,0.0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34.0,30.6,27.4,40.0,30.6,0.0,0.0,0.0
27
- [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,36.4,31.0,0.0,44.1,37.0,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22.0,33.2,37.2,22.4,25.0,46.1,61.4,42.6,32.2,27.0,19.0,37.5,24.5,0.0,0.0,0.0
28
- [Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,34.5,32.2,0.0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52.0,35.2,44.9,43.4,23.8,33.2,37.2,26.0,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0.0,0.0,0.0
 
1
+ Model,Language Model,Model Size,Evaluation Method,Avg. Single,Avg. Multi,Avg. Video,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
2
+ [BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.8,22.8,36.0,44.2,37.3,0.0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
3
+ [InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.4,25.8,36.5,48.6,36.1,0.0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
4
+ [InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,47.5,41.1,33.0,44.2,28.4,0.0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
5
+ [LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,42.4,32.5,32.6,40.2,34.3,0.0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
6
+ [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,45.0,25.7,34.3,42.5,39.0,0.0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
7
+ [VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,36.6,29.3,33.2,35.8,21.9,0.0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
8
+ [MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,36.7,44.1,32.6,35.9,36.7,0.0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
9
+ [Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,36.0,32.0,32.3,35.2,39.0,0.0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
10
+ [OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,36.6,43.5,32.4,35.8,36.7,0.0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
11
+ [LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,36.0,34.7,31.4,35.1,0.0,0.0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
12
+ [GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,34.9,45.8,31.0,34.2,40.2,0.0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
13
+ [mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,38.6,38.7,31.1,36.9,29.0,0.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
14
+ [Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,52.4,29.4,40.7,49.6,23.7,0.0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
15
+ [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,50.3,37.4,34.3,46.6,38.5,0.0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
16
+ [Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,71.9,70.3,46.7,66.1,29.6,0.0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48,28.3,32.7,0,0,0
17
+ [LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,58.3,39.2,36.9,53.3,34.4,0.0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
18
+ [IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,38.8,54.5,32.9,37.5,42.6,0.0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
19
+ [InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,65.4,49.8,44.9,60.6,30.2,0.0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
20
+ [SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,68.5,37.7,40.0,61.7,32.5,0.0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0,0,0
21
+ [SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,72.1,44.8,44.2,65.4,35.5,0.0,77.5,78.5,76.6,69,71,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0,0,0
22
+ [Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,46.4,31.2,37.4,44.2,45.6,45.7,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
23
+ [Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,31.0,27.8,30.7,31.0,40.3,42.8,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
24
+ [SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,49.9,32.4,39.0,47.3,48.0,50.6,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
25
+ [GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,69.8,73.1,61.7,68.1,37.9,0.0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0,0,0
26
+ [VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,36.7,35.4,34.2,36.2,37.3,0.0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
27
+ [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,38.3,49.8,31.6,36.9,33.7,0.0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
28
+ [Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,35.3,40.7,28.5,33.9,33.7,0.0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0
file/result_v2_task.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Language Model,Model Size,Evaluation Method,Avg. Single,Avg. Multi,Avg. Video,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
2
+ [BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,44.1,28.2,34.8,41,35.3,0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
3
+ [InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,45.5,29,35.7,42.2,35.7,0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
4
+ [InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,43.4,46,31.5,41.4,29.7,0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
5
+ [LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,40.6,38.5,31,38.7,30.2,0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
6
+ [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,41.7,32.9,33.4,39.4,34.1,0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
7
+ [VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,37.4,34.3,32.6,36.2,23.9,0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
8
+ [MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,37.5,48.3,31.5,37.4,34.9,0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
9
+ [Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,37.6,37.1,31.3,36.4,36.6,0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
10
+ [OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,37.5,47.4,31.3,37.3,35.5,0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
11
+ [LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,39,40.7,29.8,37.5,0,0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
12
+ [GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,50.4,30.1,34.4,38.6,0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
13
+ [mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,41.3,44.2,29.4,39.4,28.9,0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
14
+ [Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,49.5,34,39.7,46.3,23.3,0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
15
+ [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,46,40.8,32.8,43.1,35.5,0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
16
+ [Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,69,74.3,45.9,65.3,30.5,0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48,28.3,32.7,0,0,0
17
+ [LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,50.8,43,35.8,47.3,30.8,0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
18
+ [IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,37.9,52.5,31.4,38,40.3,0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
19
+ [InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,64.2,52.2,42.8,59.2,32.1,0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
20
+ [SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63,39.7,38.4,56.4,31.9,0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0,0,0
21
+ [SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,66.7,43.9,42.6,60.2,37,0,77.5,78.5,76.6,69,71,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0,0,0
22
+ [Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,45.3,33.2,36.1,42.5,41.1,41.4,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
23
+ [Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,30.8,31.6,29.9,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
24
+ [SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,46.5,35.5,37.6,43.9,43.4,52.3,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
25
+ [GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,70,78.6,61.3,69.2,44.2,0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0,0,0
26
+ [VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.4,41,33.4,37,35.3,0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
27
+ [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,35.8,53.8,30.2,36.4,31,0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
28
+ [Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,34.9,44.7,28,34.5,32.2,0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0
src/__pycache__/utils_display.cpython-38.pyc CHANGED
Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ