Update README.md
Browse files
README.md
CHANGED
@@ -177,3 +177,14 @@ As of writing this, the number 1 ranking model in IFEval for any model under 2 b
|
|
177 |
|MuSR (0-shot) | 1.92|
|
178 |
|MMLU-PRO (5-shot) | 7.89|
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|MuSR (0-shot) | 1.92|
|
178 |
|MMLU-PRO (5-shot) | 7.89|
|
179 |
|
180 |
+
# Citation
|
181 |
+
|
182 |
+
@misc{alrashed2024smoltuluhigherlearningrate,
|
183 |
+
title={SmolTulu: Higher Learning Rate to Batch Size Ratios Can Lead to Better Reasoning in SLMs},
|
184 |
+
author={Sultan Alrashed},
|
185 |
+
year={2024},
|
186 |
+
eprint={2412.08347},
|
187 |
+
archivePrefix={arXiv},
|
188 |
+
primaryClass={cs.CL},
|
189 |
+
url={https://arxiv.org/abs/2412.08347},
|
190 |
+
}
|