kennymckormick commited on
Commit
3307746
β€’
1 Parent(s): 5c63389

update app.py

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. meta_data.py +3 -2
app.py CHANGED
@@ -22,7 +22,7 @@ with gr.Blocks() as demo:
22
  with gr.Tabs(elem_classes='tab-buttons') as tabs:
23
  with gr.TabItem('πŸ… OpenVLM Main Leaderboard', elem_id='main', id=0):
24
  gr.Markdown(LEADERBOARD_MD['MAIN'])
25
- table, check_box = BUILD_L1_DF(results, MAIN_FIELDS)
26
  type_map = check_box['type_map']
27
  checkbox_group = gr.CheckboxGroup(
28
  choices=check_box['all'],
 
22
  with gr.Tabs(elem_classes='tab-buttons') as tabs:
23
  with gr.TabItem('πŸ… OpenVLM Main Leaderboard', elem_id='main', id=0):
24
  gr.Markdown(LEADERBOARD_MD['MAIN'])
25
+ table, check_box = BUILD_L1_DF(results, DEFAULT_BENCH)
26
  type_map = check_box['type_map']
27
  checkbox_group = gr.CheckboxGroup(
28
  choices=check_box['all'],
meta_data.py CHANGED
@@ -24,7 +24,8 @@ META_FIELDS = ['Method', 'Parameters (B)', 'Language Model', 'Vision Model', 'Op
24
  MAIN_FIELDS = [
25
  'MMBench_V11', 'MMStar', 'MME',
26
  'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
27
- 'HallusionBench', 'SEEDBench_IMG', 'MMVet', 'LLaVABench', 'CCBench', 'RealWorldQA'
 
28
  ]
29
  DEFAULT_BENCH = [
30
  'MMBench_V11', 'MMStar', 'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
@@ -43,7 +44,7 @@ LEADERBOARD_MD['MAIN'] = f"""
43
  - Metrics:
44
  - Avg Score: The average score on all VLM Benchmarks (normalized to 0 - 100, the higher the better).
45
  - Avg Rank: The average rank on all VLM Benchmarks (the lower the better).
46
- - Avg Score & Rank are calculated based on selected benchmark.
47
  - By default, we present the overall evaluation results based on {len(DEFAULT_BENCH)} VLM benchmarks, sorted by the descending order of Avg Score.
48
  - The following datasets are included in the main results: {', '.join(DEFAULT_BENCH)}.
49
  - Detailed evaluation results for each dataset (included or not included in main) are provided in the consequent tabs.
 
24
  MAIN_FIELDS = [
25
  'MMBench_V11', 'MMStar', 'MME',
26
  'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
27
+ 'HallusionBench', 'SEEDBench_IMG', 'MMVet',
28
+ 'LLaVABench', 'CCBench', 'RealWorldQA', 'POPE', 'ScienceQA_TEST'
29
  ]
30
  DEFAULT_BENCH = [
31
  'MMBench_V11', 'MMStar', 'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
 
44
  - Metrics:
45
  - Avg Score: The average score on all VLM Benchmarks (normalized to 0 - 100, the higher the better).
46
  - Avg Rank: The average rank on all VLM Benchmarks (the lower the better).
47
+ - Avg Score & Rank are calculated based on selected benchmark. **When results for some selected benchmarks are missing, Avg Score / Rank will be None!!!**
48
  - By default, we present the overall evaluation results based on {len(DEFAULT_BENCH)} VLM benchmarks, sorted by the descending order of Avg Score.
49
  - The following datasets are included in the main results: {', '.join(DEFAULT_BENCH)}.
50
  - Detailed evaluation results for each dataset (included or not included in main) are provided in the consequent tabs.