Update README.md
Browse files
README.md
CHANGED
|
@@ -168,31 +168,4 @@ The model is trained to follow a reasoning-first format:
|
|
| 168 |
|
| 169 |
---
|
| 170 |
|
| 171 |
-
Happy reasoning! 🔍✨
|
| 172 |
-
|
| 173 |
-
## Citations
|
| 174 |
-
|
| 175 |
-
Cite GRPO as:
|
| 176 |
-
|
| 177 |
-
```bibtex
|
| 178 |
-
@article{zhihong2024deepseekmath,
|
| 179 |
-
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
|
| 180 |
-
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
|
| 181 |
-
year = 2024,
|
| 182 |
-
eprint = {arXiv:2402.03300},
|
| 183 |
-
}
|
| 184 |
-
|
| 185 |
-
```
|
| 186 |
-
|
| 187 |
-
Cite TRL as:
|
| 188 |
-
|
| 189 |
-
```bibtex
|
| 190 |
-
@misc{vonwerra2022trl,
|
| 191 |
-
title = {{TRL: Transformer Reinforcement Learning}},
|
| 192 |
-
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
|
| 193 |
-
year = 2020,
|
| 194 |
-
journal = {GitHub repository},
|
| 195 |
-
publisher = {GitHub},
|
| 196 |
-
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 197 |
-
}
|
| 198 |
-
```
|
|
|
|
| 168 |
|
| 169 |
---
|
| 170 |
|
| 171 |
+
Happy reasoning! 🔍✨
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|