arvindanand commited on
Commit
8ced5df
1 Parent(s): 0bd224d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -94
README.md CHANGED
@@ -68,97 +68,3 @@ print(outputs[0]["generated_text"])
68
  ```
69
 
70
 
71
-
72
-
73
- @misc{open-llm-leaderboard,
74
- author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
75
- title = {Open LLM Leaderboard},
76
- year = {2023},
77
- publisher = {Hugging Face},
78
- howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
79
- }
80
- @software{eval-harness,
81
- author = {Gao, Leo and
82
- Tow, Jonathan and
83
- Biderman, Stella and
84
- Black, Sid and
85
- DiPofi, Anthony and
86
- Foster, Charles and
87
- Golding, Laurence and
88
- Hsu, Jeffrey and
89
- McDonell, Kyle and
90
- Muennighoff, Niklas and
91
- Phang, Jason and
92
- Reynolds, Laria and
93
- Tang, Eric and
94
- Thite, Anish and
95
- Wang, Ben and
96
- Wang, Kevin and
97
- Zou, Andy},
98
- title = {A framework for few-shot language model evaluation},
99
- month = sep,
100
- year = 2021,
101
- publisher = {Zenodo},
102
- version = {v0.0.1},
103
- doi = {10.5281/zenodo.5371628},
104
- url = {https://doi.org/10.5281/zenodo.5371628}
105
- }
106
- @misc{clark2018think,
107
- title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
108
- author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
109
- year={2018},
110
- eprint={1803.05457},
111
- archivePrefix={arXiv},
112
- primaryClass={cs.AI}
113
- }
114
- @misc{zellers2019hellaswag,
115
- title={HellaSwag: Can a Machine Really Finish Your Sentence?},
116
- author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
117
- year={2019},
118
- eprint={1905.07830},
119
- archivePrefix={arXiv},
120
- primaryClass={cs.CL}
121
- }
122
- @misc{hendrycks2021measuring,
123
- title={Measuring Massive Multitask Language Understanding},
124
- author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
125
- year={2021},
126
- eprint={2009.03300},
127
- archivePrefix={arXiv},
128
- primaryClass={cs.CY}
129
- }
130
- @misc{lin2022truthfulqa,
131
- title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
132
- author={Stephanie Lin and Jacob Hilton and Owain Evans},
133
- year={2022},
134
- eprint={2109.07958},
135
- archivePrefix={arXiv},
136
- primaryClass={cs.CL}
137
- }
138
- @misc{DBLP:journals/corr/abs-1907-10641,
139
- title={{WINOGRANDE:} An Adversarial Winograd Schema Challenge at Scale},
140
- author={Keisuke Sakaguchi and Ronan Le Bras and Chandra Bhagavatula and Yejin Choi},
141
- year={2019},
142
- eprint={1907.10641},
143
- archivePrefix={arXiv},
144
- primaryClass={cs.CL}
145
- }
146
- @misc{DBLP:journals/corr/abs-2110-14168,
147
- title={Training Verifiers to Solve Math Word Problems},
148
- author={Karl Cobbe and
149
- Vineet Kosaraju and
150
- Mohammad Bavarian and
151
- Mark Chen and
152
- Heewoo Jun and
153
- Lukasz Kaiser and
154
- Matthias Plappert and
155
- Jerry Tworek and
156
- Jacob Hilton and
157
- Reiichiro Nakano and
158
- Christopher Hesse and
159
- John Schulman},
160
- year={2021},
161
- eprint={2110.14168},
162
- archivePrefix={arXiv},
163
- primaryClass={cs.CL}
164
- }
 
68
  ```
69
 
70