arvindanand commited on
Commit
0bd224d
1 Parent(s): a6b218f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +97 -1
README.md CHANGED
@@ -65,4 +65,100 @@ pipeline = transformers.pipeline(
65
 
66
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
67
  print(outputs[0]["generated_text"])
68
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
67
  print(outputs[0]["generated_text"])
68
+ ```
69
+
70
+
71
+
72
+
73
+ @misc{open-llm-leaderboard,
74
+ author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
75
+ title = {Open LLM Leaderboard},
76
+ year = {2023},
77
+ publisher = {Hugging Face},
78
+ howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
79
+ }
80
+ @software{eval-harness,
81
+ author = {Gao, Leo and
82
+ Tow, Jonathan and
83
+ Biderman, Stella and
84
+ Black, Sid and
85
+ DiPofi, Anthony and
86
+ Foster, Charles and
87
+ Golding, Laurence and
88
+ Hsu, Jeffrey and
89
+ McDonell, Kyle and
90
+ Muennighoff, Niklas and
91
+ Phang, Jason and
92
+ Reynolds, Laria and
93
+ Tang, Eric and
94
+ Thite, Anish and
95
+ Wang, Ben and
96
+ Wang, Kevin and
97
+ Zou, Andy},
98
+ title = {A framework for few-shot language model evaluation},
99
+ month = sep,
100
+ year = 2021,
101
+ publisher = {Zenodo},
102
+ version = {v0.0.1},
103
+ doi = {10.5281/zenodo.5371628},
104
+ url = {https://doi.org/10.5281/zenodo.5371628}
105
+ }
106
+ @misc{clark2018think,
107
+ title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
108
+ author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
109
+ year={2018},
110
+ eprint={1803.05457},
111
+ archivePrefix={arXiv},
112
+ primaryClass={cs.AI}
113
+ }
114
+ @misc{zellers2019hellaswag,
115
+ title={HellaSwag: Can a Machine Really Finish Your Sentence?},
116
+ author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
117
+ year={2019},
118
+ eprint={1905.07830},
119
+ archivePrefix={arXiv},
120
+ primaryClass={cs.CL}
121
+ }
122
+ @misc{hendrycks2021measuring,
123
+ title={Measuring Massive Multitask Language Understanding},
124
+ author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
125
+ year={2021},
126
+ eprint={2009.03300},
127
+ archivePrefix={arXiv},
128
+ primaryClass={cs.CY}
129
+ }
130
+ @misc{lin2022truthfulqa,
131
+ title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
132
+ author={Stephanie Lin and Jacob Hilton and Owain Evans},
133
+ year={2022},
134
+ eprint={2109.07958},
135
+ archivePrefix={arXiv},
136
+ primaryClass={cs.CL}
137
+ }
138
+ @misc{DBLP:journals/corr/abs-1907-10641,
139
+ title={{WINOGRANDE:} An Adversarial Winograd Schema Challenge at Scale},
140
+ author={Keisuke Sakaguchi and Ronan Le Bras and Chandra Bhagavatula and Yejin Choi},
141
+ year={2019},
142
+ eprint={1907.10641},
143
+ archivePrefix={arXiv},
144
+ primaryClass={cs.CL}
145
+ }
146
+ @misc{DBLP:journals/corr/abs-2110-14168,
147
+ title={Training Verifiers to Solve Math Word Problems},
148
+ author={Karl Cobbe and
149
+ Vineet Kosaraju and
150
+ Mohammad Bavarian and
151
+ Mark Chen and
152
+ Heewoo Jun and
153
+ Lukasz Kaiser and
154
+ Matthias Plappert and
155
+ Jerry Tworek and
156
+ Jacob Hilton and
157
+ Reiichiro Nakano and
158
+ Christopher Hesse and
159
+ John Schulman},
160
+ year={2021},
161
+ eprint={2110.14168},
162
+ archivePrefix={arXiv},
163
+ primaryClass={cs.CL}
164
+ }