thomwolf HF staff commited on
Commit
e61a555
1 Parent(s): b3f0642

adding citations

Browse files
Files changed (1) hide show
  1. content.py +58 -0
content.py CHANGED
@@ -68,5 +68,63 @@ CITATION_BUTTON_TEXT = r"""@misc{open-llm-leaderboard,
68
  publisher = {Hugging Face},
69
  howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }"""
72
 
 
68
  publisher = {Hugging Face},
69
  howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
70
 
71
+ }
72
+ @software{eval-harness,
73
+ author = {Gao, Leo and
74
+ Tow, Jonathan and
75
+ Biderman, Stella and
76
+ Black, Sid and
77
+ DiPofi, Anthony and
78
+ Foster, Charles and
79
+ Golding, Laurence and
80
+ Hsu, Jeffrey and
81
+ McDonell, Kyle and
82
+ Muennighoff, Niklas and
83
+ Phang, Jason and
84
+ Reynolds, Laria and
85
+ Tang, Eric and
86
+ Thite, Anish and
87
+ Wang, Ben and
88
+ Wang, Kevin and
89
+ Zou, Andy},
90
+ title = {A framework for few-shot language model evaluation},
91
+ month = sep,
92
+ year = 2021,
93
+ publisher = {Zenodo},
94
+ version = {v0.0.1},
95
+ doi = {10.5281/zenodo.5371628},
96
+ url = {https://doi.org/10.5281/zenodo.5371628}
97
+ }
98
+ @misc{clark2018think,
99
+ title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
100
+ author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
101
+ year={2018},
102
+ eprint={1803.05457},
103
+ archivePrefix={arXiv},
104
+ primaryClass={cs.AI}
105
+ }
106
+ @misc{zellers2019hellaswag,
107
+ title={HellaSwag: Can a Machine Really Finish Your Sentence?},
108
+ author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
109
+ year={2019},
110
+ eprint={1905.07830},
111
+ archivePrefix={arXiv},
112
+ primaryClass={cs.CL}
113
+ }
114
+ @misc{hendrycks2021measuring,
115
+ title={Measuring Massive Multitask Language Understanding},
116
+ author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
117
+ year={2021},
118
+ eprint={2009.03300},
119
+ archivePrefix={arXiv},
120
+ primaryClass={cs.CY}
121
+ }
122
+ @misc{lin2022truthfulqa,
123
+ title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
124
+ author={Stephanie Lin and Jacob Hilton and Owain Evans},
125
+ year={2022},
126
+ eprint={2109.07958},
127
+ archivePrefix={arXiv},
128
+ primaryClass={cs.CL}
129
  }"""
130