update leaderboard
Browse files- meta_data.py +10 -2
meta_data.py
CHANGED
@@ -14,8 +14,16 @@ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
|
14 |
# CONSTANTS-TEXT
|
15 |
LEADERBORAD_INTRODUCTION = """# Open LMM Reasoning Leaderboard
|
16 |
|
17 |
-
This leaderboard aims at providing a comprehensive evaluation of the reasoning capabilities of LMMs.
|
18 |
-
Currently, it is a collection of evaluation results on multiple multi-modal mathematical reasoning benchmarks.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"""
|
20 |
|
21 |
# CONSTANTS-FIELDS
|
|
|
14 |
# CONSTANTS-TEXT
|
15 |
LEADERBORAD_INTRODUCTION = """# Open LMM Reasoning Leaderboard
|
16 |
|
17 |
+
This leaderboard aims at providing a comprehensive evaluation of the reasoning capabilities of LMMs.
|
18 |
+
Currently, it is a collection of evaluation results on multiple multi-modal mathematical reasoning benchmarks.
|
19 |
+
We obtain all evaluation results based on the [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), with the corresponding dataset names:
|
20 |
+
|
21 |
+
1. MathVista_MINI: The Test Mini split of MathVista dataset, around 1000 samples.
|
22 |
+
2. MathVision: The Full test set of MathVision, around 3000 samples.
|
23 |
+
3. MathVerse_MINI_Vision_Only: The Test Mini split of MathVerse, using the "Vision Only" mode, around 700 samples.
|
24 |
+
4. DynaMath: The Full test set of DynaMath, around 5000 samples (501 original questions x 10 variants).
|
25 |
+
|
26 |
+
To suggest new models or benchmarks for this leaderboard, please contact dhd.efz@gmail.com.
|
27 |
"""
|
28 |
|
29 |
# CONSTANTS-FIELDS
|