TTimur commited on
Commit
4b9403f
·
1 Parent(s): 6c91170

code update

Browse files
Files changed (2) hide show
  1. src/display/about.py +0 -104
  2. src/display/utils.py +1 -1
src/display/about.py CHANGED
@@ -87,110 +87,6 @@ If everything is done, check you can launch your evaluation locally (you can add
87
  """
88
 
89
 
90
- CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
91
- CITATION_BUTTON_TEXT = r"""
92
- @article{KyrgyzLLM-Bench,
93
- title={Bridging the Gap in Less-Resourced Languages: Building a Benchmark for Kyrgyz Language Models},
94
- author={Timur Turatali, Aida Turdubaeva, Islam Zhenishbekov, Zhoomart Suranbaev, Anton Alekseev, Rustem Izmailov},
95
- year={2025},
96
- url={https://huggingface.co/datasets/TTimur/kyrgyzMMLU,
97
- https://huggingface.co/datasets/TTimur/kyrgyzRC,
98
- https://huggingface.co/datasets/TTimur/winogrande_kg,
99
- https://huggingface.co/datasets/TTimur/boolq_kg,
100
- https://huggingface.co/datasets/TTimur/truthfulqa_kg,
101
- https://huggingface.co/datasets/TTimur/gsm8k_kg,
102
- https://huggingface.co/datasets/TTimur/hellaswag_kg}
103
- }
104
- """
105
-
106
- from dataclasses import dataclass
107
- from enum import Enum
108
-
109
- @dataclass
110
- class Task:
111
- benchmark: str
112
- metric: str
113
- col_name: str
114
-
115
-
116
- # Init: to update with your specific keys
117
- class Tasks(Enum):
118
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
119
- task0 = Task("KyrgyzMMLU", "metric_name", "KyrgyzMMLU")
120
- task1 = Task("KyrgyzRC", "metric_name", "KyrgyzRC")
121
- task2 = Task("WinoGrande", "metric_name", "WinoGrande")
122
- task3 = Task("BoolQ", "metric_name", "BoolQ")
123
- task4 = Task("HellaSwag", "metric_name", "HellaSwag")
124
- task5 = Task("GSM8K", "metric_name", "GSM8K")
125
- task6 = Task("TruthfulQA", "metric_name", "TruthfulQA")
126
-
127
-
128
- # Your leaderboard name
129
- TITLE = """<h1 align="center" id="space-title"> OpenLLM Kyrgyz Leaderboard v0.1</h1>"""
130
-
131
- # What does your leaderboard evaluate?
132
- INTRODUCTION_TEXT = """
133
- Welcome to the Kyrgyz LLM Leaderboard — a dedicated platform for evaluating Large Language Models on Kyrgyz benchmarks. This space highlights models that perform well in Kyrgyz and helps advance research and tooling for low-resource languages.
134
-
135
- Benchmarks include native Kyrgyz tasks (KyrgyzMMLU, KyrgyzRC) and carefully translated sets (WinoGrande, BoolQ, HellaSwag, GSM8K, TruthfulQA). Scores are comparable across models and settings.
136
-
137
- 🚀 Submit Your Model 🚀
138
-
139
- Have a Kyrgyz-capable model? Submit it for evaluation (currently manual) and help build a stronger Kyrgyz NLP ecosystem. See the About tab for details.
140
- """
141
-
142
- # Which evaluations are you running? how can people reproduce what you have?
143
- LLM_BENCHMARKS_TEXT = f"""
144
- ## How it works
145
-
146
- ## Reproducibility
147
-
148
- This leaderboard aggregates results from Kyrgyz benchmarks. Datasets are hosted on the Hugging Face Hub under `TTimur`:
149
- - KyrgyzMMLU: `TTimur/kyrgyzMMLU`
150
- - KyrgyzRC: `TTimur/kyrgyzRC`
151
- - WinoGrande (KY): `TTimur/winogrande_kg`
152
- - BoolQ (KY): `TTimur/boolq_kg`
153
- - HellaSwag (KY): `TTimur/hellaswag_kg`
154
- - GSM8K (KY): `TTimur/gsm8k_kg`
155
- - TruthfulQA (KY): `TTimur/truthfulqa_kg`
156
-
157
- You can evaluate using your preferred evaluation harness (e.g., Lighteval or EleutherAI's lm-evaluation-harness) with Kyrgyz tasks enabled and then upload the resulting JSON to the results dataset for this Space.
158
-
159
- Notes:
160
- - Metrics reported are accuracy (or QEM for GSM8K), aligned with the dataset conventions.
161
- - Ensure you use consistent few-shot settings when comparing models.
162
- """
163
-
164
- EVALUATION_QUEUE_TEXT = """
165
- ## Some good practices before submitting a model
166
-
167
- ### 1) Make sure you can load your model and tokenizer using AutoClasses:
168
- ```python
169
- from transformers import AutoConfig, AutoModel, AutoTokenizer
170
- config = AutoConfig.from_pretrained("your model name", revision=revision)
171
- model = AutoModel.from_pretrained("your model name", revision=revision)
172
- tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
173
- ```
174
- If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
175
-
176
- Note: make sure your model is public!
177
- Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
178
-
179
- ### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
180
- It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
181
-
182
- ### 3) Make sure your model has an open license!
183
- This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
184
-
185
- ### 4) Fill up your model card
186
- When we add extra information about models to the leaderboard, it will be automatically taken from the model card
187
-
188
- ## In case of model failure
189
- If your model is displayed in the `FAILED` category, its execution stopped.
190
- Make sure you have followed the above steps first.
191
- If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
192
- """
193
-
194
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
195
  CITATION_BUTTON_TEXT = r"""
196
  @article{KyrgyzLLM-Bench,
 
87
  """
88
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
91
  CITATION_BUTTON_TEXT = r"""
92
  @article{KyrgyzLLM-Bench,
src/display/utils.py CHANGED
@@ -29,7 +29,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
29
  #Scores
30
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
31
  for task in Tasks:
32
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
33
  # Model information
34
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
35
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
 
29
  #Scores
30
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
31
  for task in Tasks:
32
+ auto_eval_column_dict.append([task.value.col_name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
33
  # Model information
34
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
35
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])