lovodkin93 commited on
Commit
bfeafb5
1 Parent(s): 731c118

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +23 -0
  2. visit_bench_leaderboard.tsv +16 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ # df = pd.read_table("visit_bench_leaderboard.tsv")
5
+ df = pd.read_table('visitbench_leaderboard_Single~Image_Nov072023.tsv')
6
+
7
+ headline = """# VisIT-Bench Leaderboard
8
+
9
+ To submit your results to the leaderboard, you can run our auto-evaluation code, following the instructions [here](https://github.com/Hritikbansal/visit_bench_sandbox). Once you are happy with the results, you can send to [this mail](mailto:yonatanbitton1@gmail.com).
10
+ Please include in your email 1) a name for your model, 2) your team name (including your affiliation), and optionally, 3) a github repo or paper link. Please also attach your predictions: you can add a "predictions" column to [this csv](https://huggingface.co/datasets/mlfoundations/VisIT-Bench/raw/main/test/metadata.csv).
11
+ """
12
+ demo = gr.Blocks()
13
+ with demo:
14
+ with gr.Row():
15
+ gr.Markdown(headline)
16
+
17
+ with gr.Column():
18
+ leaderboard_df = gr.components.DataFrame(
19
+ value=df,
20
+ datatype=["markdown", "markdown", "number", "number", "number"]
21
+ )
22
+
23
+ demo.launch()
visit_bench_leaderboard.tsv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Category Model Elo matches Win vs. Reference (w/ # ratings)
2
+ Single Image Human Verified GPT-4 Reference 1370 5442 -
3
+ Single Image LLaVA (13B) 1106 5446 17.81% (n=494)
4
+ Single Image LlamaAdapter-v2 (7B) 1082 5445 13.75% (n=502)
5
+ Single Image mPLUG-Owl (7B) 1081 5452 15.29% (n=497)
6
+ Single Image InstructBLIP (13B) 1011 5444 13.73% (n=517)
7
+ Single Image Otter (9B) 991 5450 6.84% (n=512)
8
+ Single Image VisualGPT (Da Vinci 003) 972 5445 1.52% (n=527)
9
+ Single Image MiniGPT-4 (7B) 921 5442 3.26% (n=522)
10
+ Single Image OpenFlamingo (9B) 877 5449 2.86% (n=524)
11
+ Single Image PandaGPT (13B) 826 5441 2.63% (n=533)
12
+ Single Image Multimodal GPT 763 5450 0.18% (n=544)
13
+ Multiple Images Human Verified GPT-4 Reference 1192 180 -
14
+ Multiple Images mPLUG-Owl 995 180 6.67% (n=60)
15
+ Multiple Images Otter 911 180 1.69% (n=59)
16
+ Multiple Images OpenFlamingo 902 180 1.67% (n=60)