lovodkin93
commited on
Commit
•
a2ed20e
1
Parent(s):
0db4205
Upload 4 files
Browse files
visitbench_leaderboard_Single~Image_Nov072023.tsv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Category Model Elo # Matches Win vs. Reference (w/ # ratings)
|
2 |
+
Single Image GPT4V 1349 677 65.44% (n=136)
|
3 |
+
Single Image Human Verified Reference 1338 6480 ---
|
4 |
+
Single Image LLaVA-Plus 1187 812 30.15% (n=136)
|
5 |
+
Single Image LLaVA 13B 1091 5574 18.53% (n=475)
|
6 |
+
Single Image LlamaAdapter-v2 1066 5573 14.14% (n=488)
|
7 |
+
Single Image mPLUG-Owl 1025 5561 15.83% (n=480)
|
8 |
+
Single Image idefics9b 997 940 9.72% (n=144)
|
9 |
+
Single Image Lynx(8B) 990 929 11.43% (n=140)
|
10 |
+
Single Image InstructBLIP 964 5612 14.12% (n=503)
|
11 |
+
Single Image Otter 947 5597 7.01% (n=499)
|
12 |
+
Single Image Octopus V2 920 913 8.90% (n=146)
|
13 |
+
Single Image VisualGPT 911 5585 1.57% (n=510)
|
14 |
+
Single Image MiniGPT-4 900 5560 3.36% (n=506)
|
15 |
+
Single Image OpenFlamingo 845 5591 2.95% (n=509)
|
16 |
+
Single Image PandaGPT 13b 786 5573 2.70% (n=519)
|
17 |
+
Single Image MMGPT 718 5604 0.19% (n=527)
|
visitbench_leaderboard_Single~Image_Oct282023.tsv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Category Model Elo # Matches Win vs. Reference (w/ # ratings)
|
2 |
+
Single Image human_verified_reference 1361 6030 ---
|
3 |
+
Single Image LLaVA-Plus 1206 724 30.15% (n=136)
|
4 |
+
Single Image LLaVA 13B 1091 5474 18.53% (n=475)
|
5 |
+
Single Image Lynx 7B V2 1078 708 15.15% (n=132)
|
6 |
+
Single Image mPLUG-Owl 1076 5465 16.04% (n=480)
|
7 |
+
Single Image LlamaAdapter-v2 1055 5485 14.14% (n=488)
|
8 |
+
Single Image idefics9b 1030 842 9.72% (n=144)
|
9 |
+
Single Image Lynx(8B) 1012 827 11.43% (n=140)
|
10 |
+
Single Image InstructBLIP 995 5505 14.12% (n=503)
|
11 |
+
Single Image otter 970 5495 7.01% (n=499)
|
12 |
+
Single Image visual_gpt_davinci003 937 5486 1.57% (n=510)
|
13 |
+
Single Image Octopus V2 936 820 8.90% (n=146)
|
14 |
+
Single Image MiniGPT-4 899 5473 3.36% (n=506)
|
15 |
+
Single Image openflamingo 831 5490 2.95% (n=509)
|
16 |
+
Single Image panda_gpt_13b 767 5480 2.70% (n=519)
|
17 |
+
Single Image MMGPT 757 5504 0.19% (n=527)
|
visitbench_leaderboard_Single~Image_Sep132023.tsv
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Category Model Elo # Matches Win vs. Reference (w/ # ratings)
|
2 |
+
Single Image human_verified_reference 1402 5597 ---
|
3 |
+
Single Image llava13b_output 1128 5399 18.35% (n=474)
|
4 |
+
Single Image mPLUG-Owl prediction 1117 5390 15.87% (n=479)
|
5 |
+
Single Image LlamaAdapter-v2 prediction 1084 5416 14.17% (n=487)
|
6 |
+
Single Image Lynx(8B) predictions 1046 758 11.76% (n=136)
|
7 |
+
Single Image instruct_blip_output 1021 5396 14.14% (n=502)
|
8 |
+
Single Image otter 962 5397 7.03% (n=498)
|
9 |
+
Single Image visual_gpt_davinci003_output 953 5414 1.57% (n=509)
|
10 |
+
Single Image Octopus V2 prediction 952 994 5.29% (n=170)
|
11 |
+
Single Image MiniGPT-4 prediction 938 5393 3.37% (n=505)
|
12 |
+
Single Image openflamingo 851 5397 2.95% (n=508)
|
13 |
+
Single Image panda_gpt_13b_output 801 5397 2.70% (n=518)
|
14 |
+
Single Image mmgpt_output 747 5402 0.19% (n=526)
|
visitbench_leaderboard_Single~Image_Sep252023.tsv
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Category Model Elo # Matches Win vs. Reference (w/ # ratings)
|
2 |
+
Single Image Human Verified Reference 1382 5880 ---
|
3 |
+
Single Image LLaVA-Plus (13B) 🥇 1203 678 35.07% (n=134)
|
4 |
+
Single Image LLaVA (13B) 🥈 1095 5420 18.53% (n=475)
|
5 |
+
Single Image mPLUG-Owl 🥉 1087 5440 15.83% (n=480)
|
6 |
+
Single Image LlamaAdapter-v2 1066 5469 14.14% (n=488)
|
7 |
+
Single Image Lynx(8B) 1037 787 11.43% (n=140)
|
8 |
+
Single Image idefics (9B) 1020 794 9.72% (n=144)
|
9 |
+
Single Image InstructBLIP 1000 5469 14.12% (n=503)
|
10 |
+
Single Image Otter 962 5443 7.01% (n=499)
|
11 |
+
Single Image Visual Gpt (Davinci003) 941 5437 1.57% (n=510)
|
12 |
+
Single Image MiniGPT-4 926 5448 3.36% (n=506)
|
13 |
+
Single Image Octopus V2 925 790 8.90% (n=146)
|
14 |
+
Single Image OpenFlamingo V1 851 5479 2.95% (n=509)
|
15 |
+
Single Image PandaGPT (13B) 775 5465 2.70% (n=519)
|
16 |
+
Single Image MultimodalGPT 731 5471 0.19% (n=527)
|