Spaces:
Running
Running
Přidány kategorie do `tasks_metadata.json`
Browse files- .gitignore +21 -0
- app.py +2 -2
- compare_significance.py +1 -1
- server.py +2 -3
- tasks_metadata.json +280 -202
.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.eric6project/
|
2 |
+
.eric7project/
|
3 |
+
.ropeproject/
|
4 |
+
.jedi/
|
5 |
+
.directory/
|
6 |
+
*.pyc
|
7 |
+
*.pyo
|
8 |
+
*.orig
|
9 |
+
*.bak
|
10 |
+
*.rej
|
11 |
+
*~
|
12 |
+
cur/
|
13 |
+
tmp/
|
14 |
+
__pycache__/
|
15 |
+
__pypackages__
|
16 |
+
*.DS_Store
|
17 |
+
.pytest_cache/
|
18 |
+
venv/
|
19 |
+
.venv/
|
20 |
+
env/
|
21 |
+
.env/
|
app.py
CHANGED
@@ -8,7 +8,7 @@ from gradio_modal import Modal
|
|
8 |
from content import (HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN,
|
9 |
MODAL_SUBMIT_MARKDOWN,
|
10 |
SUBMISSION_DETAILS_MARKDOWN, RANKING_AFTER_SUBMISSION_MARKDOWN, MORE_DETAILS_MARKDOWN)
|
11 |
-
from server import LeaderboardServer
|
12 |
|
13 |
leaderboard_server = LeaderboardServer()
|
14 |
|
@@ -157,7 +157,7 @@ with (gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css) as main
|
|
157 |
with gr.Tab("Overall"):
|
158 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
159 |
visible=True, elem_classes="leaderboard-table")
|
160 |
-
for c in
|
161 |
with gr.Tab(c):
|
162 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
163 |
visible=True, elem_classes="leaderboard-table")
|
|
|
8 |
from content import (HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN,
|
9 |
MODAL_SUBMIT_MARKDOWN,
|
10 |
SUBMISSION_DETAILS_MARKDOWN, RANKING_AFTER_SUBMISSION_MARKDOWN, MORE_DETAILS_MARKDOWN)
|
11 |
+
from server import LeaderboardServer
|
12 |
|
13 |
leaderboard_server = LeaderboardServer()
|
14 |
|
|
|
157 |
with gr.Tab("Overall"):
|
158 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
159 |
visible=True, elem_classes="leaderboard-table")
|
160 |
+
for c in leaderboard_server.tasks_categories:
|
161 |
with gr.Tab(c):
|
162 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
163 |
visible=True, elem_classes="leaderboard-table")
|
compare_significance.py
CHANGED
@@ -176,7 +176,7 @@ def read_json(file_path):
|
|
176 |
with open(METADATA_FILE, "r") as f:
|
177 |
metadata = json.load(f)
|
178 |
|
179 |
-
all_tasks = list(metadata
|
180 |
all_missing_tasks = []
|
181 |
for task in all_tasks:
|
182 |
if task not in data:
|
|
|
176 |
with open(METADATA_FILE, "r") as f:
|
177 |
metadata = json.load(f)
|
178 |
|
179 |
+
all_tasks = list(metadata.keys())
|
180 |
all_missing_tasks = []
|
181 |
for task in all_tasks:
|
182 |
if task not in data:
|
server.py
CHANGED
@@ -18,8 +18,6 @@ REPO = f"{ORG}/LLM_benchmark_data"
|
|
18 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
19 |
TASKS_METADATA_PATH = "./tasks_metadata.json"
|
20 |
|
21 |
-
categories = ['Czech Math Reasoning', 'General Reasoning', 'Historical', 'Knowledge', 'Language Modeling', 'NER', 'NLI', 'Sentiment', 'Summarization', 'Syntactical Reasoning', 'Topic Classification']
|
22 |
-
|
23 |
class LeaderboardServer:
|
24 |
def __init__(self):
|
25 |
self.server_address = REPO
|
@@ -27,7 +25,8 @@ class LeaderboardServer:
|
|
27 |
self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
|
28 |
local_dir="./")
|
29 |
self.submisssion_id_to_file = {} # Map submission ids to file paths
|
30 |
-
self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))
|
|
|
31 |
self.submission_ids = set()
|
32 |
self.fetch_existing_models()
|
33 |
self.tournament_results = self.load_tournament_results()
|
|
|
18 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
19 |
TASKS_METADATA_PATH = "./tasks_metadata.json"
|
20 |
|
|
|
|
|
21 |
class LeaderboardServer:
|
22 |
def __init__(self):
|
23 |
self.server_address = REPO
|
|
|
25 |
self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
|
26 |
local_dir="./")
|
27 |
self.submisssion_id_to_file = {} # Map submission ids to file paths
|
28 |
+
self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))
|
29 |
+
self.tasks_categories = {self.tasks_metadata[task]["category"] for task in self.tasks_metadata}
|
30 |
self.submission_ids = set()
|
31 |
self.fetch_existing_models()
|
32 |
self.tournament_results = self.load_tournament_results()
|
tasks_metadata.json
CHANGED
@@ -1,204 +1,282 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
"
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
"
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
"
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
"
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
"
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
"
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
"
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
"
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
"
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
"
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
"
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
"
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
"
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
"
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
"
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
"
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
"
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
"
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
"
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
"
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
"
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
"
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
"
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
"
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
"
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
"
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
"
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
"
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
}
|
204 |
-
}
|
|
|
1 |
{
|
2 |
+
"benczechmark_propaganda_argumentace": {
|
3 |
+
"name": "Propaganda - Argumentace",
|
4 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_argumentace",
|
5 |
+
"short_name": "P-Argumentace",
|
6 |
+
"category": "NLI",
|
7 |
+
"abbreviation": "P-ARG"
|
8 |
+
},
|
9 |
+
"benczechmark_propaganda_fabulace": {
|
10 |
+
"name": "Propaganda - Fabulace",
|
11 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_fabulace",
|
12 |
+
"short_name": "P-Fabulace",
|
13 |
+
"category": "NLI",
|
14 |
+
"abbreviation": "P-FAB"
|
15 |
+
},
|
16 |
+
"benczechmark_propaganda_nazor": {
|
17 |
+
"name": "Propaganda - Nazor",
|
18 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_nazor",
|
19 |
+
"short_name": "P-Názor",
|
20 |
+
"category": "NLI",
|
21 |
+
"abbreviation": "P-NAZOR"
|
22 |
+
},
|
23 |
+
"benczechmark_propaganda_strach": {
|
24 |
+
"name": "Propaganda - Strach",
|
25 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_strach",
|
26 |
+
"short_name": "P-Strach",
|
27 |
+
"category": "NLI",
|
28 |
+
"abbreviation": "P-STCH"
|
29 |
+
},
|
30 |
+
"benczechmark_propaganda_zamereni": {
|
31 |
+
"name": "Propaganda - Zamereni",
|
32 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_zamereni",
|
33 |
+
"short_name": "P-Zaměření",
|
34 |
+
"category": "NLI",
|
35 |
+
"abbreviation": "P-MER"
|
36 |
+
},
|
37 |
+
"benczechmark_propaganda_demonizace": {
|
38 |
+
"name": "Propaganda - Demonizace",
|
39 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_demonizace",
|
40 |
+
"short_name": "P-Demonizace",
|
41 |
+
"category": "NLI",
|
42 |
+
"abbreviation": "P-DEMON"
|
43 |
+
},
|
44 |
+
"benczechmark_propaganda_lokace": {
|
45 |
+
"name": "Propaganda - Lokace",
|
46 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_lokace",
|
47 |
+
"short_name": "P-Lokace",
|
48 |
+
"category": "NLI",
|
49 |
+
"abbreviation": "P-LOK"
|
50 |
+
},
|
51 |
+
"benczechmark_propaganda_relativizace": {
|
52 |
+
"name": "Propaganda - Relativizace",
|
53 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_relativizace",
|
54 |
+
"short_name": "P-Relativizace",
|
55 |
+
"category": "NLI",
|
56 |
+
"abbreviation": "P-REL"
|
57 |
+
},
|
58 |
+
"benczechmark_propaganda_vina": {
|
59 |
+
"name": "Propaganda - Vina",
|
60 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_vina",
|
61 |
+
"short_name": "P-Vina",
|
62 |
+
"category": "NLI",
|
63 |
+
"abbreviation": "P-VINA"
|
64 |
+
},
|
65 |
+
"benczechmark_propaganda_zanr": {
|
66 |
+
"name": "Propaganda - Zanr",
|
67 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_zanr",
|
68 |
+
"short_name": "P-Žánr",
|
69 |
+
"category": "NLI",
|
70 |
+
"abbreviation": "P-ZANR"
|
71 |
+
},
|
72 |
+
"benczechmark_propaganda_emoce": {
|
73 |
+
"name": "Propaganda - Emoce",
|
74 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_emoce",
|
75 |
+
"short_name": "P-Emoce",
|
76 |
+
"category": "NLI",
|
77 |
+
"abbreviation": "P-EMOCE"
|
78 |
+
},
|
79 |
+
"benczechmark_propaganda_nalepkovani": {
|
80 |
+
"name": "Propaganda - Nalepkovani",
|
81 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_nalepkovani",
|
82 |
+
"short_name": "P-Nalepkování",
|
83 |
+
"category": "NLI",
|
84 |
+
"abbreviation": "P-LEP"
|
85 |
+
},
|
86 |
+
"benczechmark_propaganda_rusko": {
|
87 |
+
"name": "Propaganda - Rusko",
|
88 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_rusko",
|
89 |
+
"short_name": "P-Rusko",
|
90 |
+
"category": "NLI",
|
91 |
+
"abbreviation": "P-RUS"
|
92 |
+
},
|
93 |
+
"benczechmark_sentiment_mall": {
|
94 |
+
"name": "CzechSentiment MALL",
|
95 |
+
"source_url": "https://huggingface.co/datasets/CZLC/mall_sentiment_balanced",
|
96 |
+
"short_name": "S-Mall",
|
97 |
+
"category": "Sentiment",
|
98 |
+
"abbreviation": "S-MALL"
|
99 |
+
},
|
100 |
+
"benczechmark_sentiment_fb": {
|
101 |
+
"name": "CzechSentiment FB",
|
102 |
+
"source_url": "https://huggingface.co/datasets/CZLC/fb_sentiment_balanced",
|
103 |
+
"short_name": "S-FB",
|
104 |
+
"category": "Sentiment",
|
105 |
+
"abbreviation": "S-FB"
|
106 |
+
},
|
107 |
+
"benczechmark_sentiment_csfd": {
|
108 |
+
"name": "CzechSentiment CSFD",
|
109 |
+
"source_url": "https://huggingface.co/datasets/CZLC/csfd_sentiment_balanced",
|
110 |
+
"short_name": "S-CSFD",
|
111 |
+
"category": "Sentiment",
|
112 |
+
"abbreviation": "S-CSFD"
|
113 |
+
},
|
114 |
+
"benczechmark_summarization": {
|
115 |
+
"name": "SUMECZECH",
|
116 |
+
"source_url": "https://huggingface.co/datasets/CZLC/sumeczech_downsampled",
|
117 |
+
"short_name": "Summarization",
|
118 |
+
"category": "Summarization",
|
119 |
+
"abbreviation": "SUM"
|
120 |
+
},
|
121 |
+
"benczechmark_grammarerrorcorrection": {
|
122 |
+
"name": "GrammarErrorCorrection",
|
123 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_gec",
|
124 |
+
"short_name": "Grammar Error Correction",
|
125 |
+
"category": "Syntactical Reasoning",
|
126 |
+
"abbreviation": "GEC"
|
127 |
+
},
|
128 |
+
"benczechmark_cs_naturalquestions": {
|
129 |
+
"name": "NaturalQuestions-CZ",
|
130 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_naturalquestions",
|
131 |
+
"short_name": "CS Natural Questions",
|
132 |
+
"category": "Knowledge",
|
133 |
+
"abbreviation": "NQ"
|
134 |
+
},
|
135 |
+
"benczechmark_cs_sqad32": {
|
136 |
+
"name": "SQAD3.2",
|
137 |
+
"source_url": "https://huggingface.co/datasets/CZLC/SQAD_3.2",
|
138 |
+
"short_name": "CS SQAD 3.2",
|
139 |
+
"category": "Knowledge",
|
140 |
+
"abbreviation": "SQAD32"
|
141 |
+
},
|
142 |
+
"benczechmark_cs_triviaQA": {
|
143 |
+
"name": "TriviaQA-CZ",
|
144 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_triviaqa",
|
145 |
+
"short_name": "CS TriviaQA",
|
146 |
+
"category": "Knowledge",
|
147 |
+
"abbreviation": "TQA"
|
148 |
+
},
|
149 |
+
"benczechmark_csfever_nli": {
|
150 |
+
"name": "CSFEVER",
|
151 |
+
"source_url": "https://huggingface.co/datasets/CZLC/ctu-aic/csfever_nli",
|
152 |
+
"short_name": "CSFever NLI",
|
153 |
+
"category": "NLI",
|
154 |
+
"abbreviation": "CFR"
|
155 |
+
},
|
156 |
+
"benczechmark_ctkfacts_nli": {
|
157 |
+
"name": "CTKFACTS",
|
158 |
+
"source_url": "https://huggingface.co/datasets/CZLC/ctu-aic/ctkfacts_nli",
|
159 |
+
"short_name": "CTKFacts NLI",
|
160 |
+
"category": "NLI",
|
161 |
+
"abbreviation": "CTK"
|
162 |
+
},
|
163 |
+
"benczechmark_cs_ner": {
|
164 |
+
"name": "CZECH NER CORPUS 2.0",
|
165 |
+
"source_url": "https://huggingface.co/datasets/CZLC/fewshot-goes-multilingual/cs_czech-named-entity-corpus_2.0",
|
166 |
+
"short_name": "CS NER",
|
167 |
+
"category": "NER",
|
168 |
+
"abbreviation": "CZNERC"
|
169 |
+
},
|
170 |
+
"benczechmark_hellaswag": {
|
171 |
+
"name": "HellaSwag-CZ",
|
172 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_hellaswag",
|
173 |
+
"short_name": "HellaSwag",
|
174 |
+
"category": "Language Modeling",
|
175 |
+
"abbreviation": "HASG"
|
176 |
+
},
|
177 |
+
"benczechmark_histcorpus": {
|
178 |
+
"name": "Historical Corpus",
|
179 |
+
"source_url": "https://huggingface.co/datasets/CZLC/benczechmark_histcorpus",
|
180 |
+
"short_name": "HistCorpus",
|
181 |
+
"category": "Language Modeling",
|
182 |
+
"abbreviation": "HIST"
|
183 |
+
},
|
184 |
+
"benczechmark_klokan_qa": {
|
185 |
+
"name": "Klokan QA",
|
186 |
+
"source_url": "https://huggingface.co/datasets/hynky/klokan-qa",
|
187 |
+
"short_name": "Klokan QA",
|
188 |
+
"category": "Czech Math Reasoning",
|
189 |
+
"abbreviation": "KQA"
|
190 |
+
},
|
191 |
+
"benczechmark_cs_court_decisions_ner": {
|
192 |
+
"name": "Czech Court Decisions",
|
193 |
+
"source_url": "https://huggingface.co/datasets/CZLC/fewshot-goes-multilingual/cs_czech-court-decisions-ner",
|
194 |
+
"short_name": "CS Court Decisions NER",
|
195 |
+
"category": "NER",
|
196 |
+
"abbreviation": "CCDNER"
|
197 |
+
},
|
198 |
+
"benczechmark_umimeto_biology": {
|
199 |
+
"name": "Umimeto.cz - Biology",
|
200 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-biology",
|
201 |
+
"short_name": "Umimeto.cz - Biology",
|
202 |
+
"category": "General Reasoning",
|
203 |
+
"abbreviation": "UT-BIO"
|
204 |
+
},
|
205 |
+
"benczechmark_umimeto_chemistry": {
|
206 |
+
"name": "Umimeto.cz - Chemistry",
|
207 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-chemistry",
|
208 |
+
"short_name": "Umimeto.cz - Chemistry",
|
209 |
+
"category": "General Reasoning",
|
210 |
+
"abbreviation": "UT-CHEM"
|
211 |
+
},
|
212 |
+
"benczechmark_umimeto_czech": {
|
213 |
+
"name": "Umimeto.cz - Czech Language",
|
214 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-czech",
|
215 |
+
"short_name": "Umimeto.cz - Czech",
|
216 |
+
"category": "General Reasoning",
|
217 |
+
"abbreviation": "UT-CZEL"
|
218 |
+
},
|
219 |
+
"benczechmark_umimeto_history": {
|
220 |
+
"name": "Umimeto.cz - History",
|
221 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-history",
|
222 |
+
"short_name": "Umimeto.cz - History",
|
223 |
+
"category": "General Reasoning",
|
224 |
+
"abbreviation": "UT-HIST"
|
225 |
+
},
|
226 |
+
"benczechmark_umimeto_informatics": {
|
227 |
+
"name": "Umimeto.cz - Informatics",
|
228 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-informatics",
|
229 |
+
"short_name": "Umimeto.cz - Informatics",
|
230 |
+
"category": "General Reasoning",
|
231 |
+
"abbreviation": "UT-IT"
|
232 |
+
},
|
233 |
+
"benczechmark_umimeto_math": {
|
234 |
+
"name": "Umimeto.cz - Math",
|
235 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-math",
|
236 |
+
"short_name": "Umimeto.cz - Math",
|
237 |
+
"category": "Czech Math Reasoning",
|
238 |
+
"abbreviation": "UT-MATH"
|
239 |
+
},
|
240 |
+
"benczechmark_umimeto_physics": {
|
241 |
+
"name": "Umimeto.cz - Physics",
|
242 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-physics",
|
243 |
+
"short_name": "Umimeto.cz - Physics",
|
244 |
+
"category": "General Reasoning",
|
245 |
+
"abbreviation": "UT-PHYS"
|
246 |
+
},
|
247 |
+
"benczechmark_cermat_czmath_mc": {
|
248 |
+
"name": "CERMAT - Czech Math - MC",
|
249 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_math_mc",
|
250 |
+
"short_name": "Cermat Czech Math MC",
|
251 |
+
"category": "Czech Math Reasoning",
|
252 |
+
"abbreviation": "CCM-MC"
|
253 |
+
},
|
254 |
+
"benczechmark_cermat_czmath_open": {
|
255 |
+
"name": "CERMAT - Czech Math - OPEN",
|
256 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_math_open",
|
257 |
+
"short_name": "Cermat Czech Math Open",
|
258 |
+
"category": "Czech Math Reasoning",
|
259 |
+
"abbreviation": "CCM-OPEN"
|
260 |
+
},
|
261 |
+
"benczechmark_cermat_czech_tf": {
|
262 |
+
"name": "CERMAT - Czech Language - TF",
|
263 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_czech_tf",
|
264 |
+
"short_name": "Cermat Czech Language TF",
|
265 |
+
"category": "General Reasoning",
|
266 |
+
"abbreviation": "CCL-TF"
|
267 |
+
},
|
268 |
+
"benczechmark_cermat_czech_mc": {
|
269 |
+
"name": "CERMAT - Czech Language - MC",
|
270 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_czech_mc",
|
271 |
+
"short_name": "Cermat Czech Language MC",
|
272 |
+
"category": "General Reasoning",
|
273 |
+
"abbreviation": "CCL-MC"
|
274 |
+
},
|
275 |
+
"benczechmark_cermat_czech_open": {
|
276 |
+
"name": "CERMAT - Czech Language - OPEN",
|
277 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_czech_open",
|
278 |
+
"short_name": "Cermat Czech Language Open",
|
279 |
+
"category": "General Reasoning",
|
280 |
+
"abbreviation": "CCL-OPEN"
|
281 |
}
|
282 |
+
}
|