Update app.py
Browse files
app.py
CHANGED
@@ -284,7 +284,7 @@ By [Princeton Language and Intelligence (PLI), Princeton University](https://pli
|
|
284 |
|
285 |
### This is a demonstration of the Skill-Mix evaluation.
|
286 |
|
287 |
-
Paper link: [
|
288 |
|
289 |
### Samples are generated using 10% of the full set of skills and topics. Click the second tab for comparison between two generations.
|
290 |
|
@@ -427,9 +427,32 @@ Coming soon: generation by more models; grading by LLaMA-2.
|
|
427 |
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3]).then(fn_list[4], input_list[4], output_list[4]).then(fn_list[5], input_list[5], output_list[5])
|
428 |
else:
|
429 |
raise NotImplementedError
|
430 |
-
gr.Markdown('''
|
431 |
```
|
432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
```
|
434 |
''')
|
435 |
return demo
|
|
|
284 |
|
285 |
### This is a demonstration of the Skill-Mix evaluation.
|
286 |
|
287 |
+
Paper link: [https://arxiv.org/abs/2310.17567](https://arxiv.org/abs/2310.17567)
|
288 |
|
289 |
### Samples are generated using 10% of the full set of skills and topics. Click the second tab for comparison between two generations.
|
290 |
|
|
|
427 |
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3]).then(fn_list[4], input_list[4], output_list[4]).then(fn_list[5], input_list[5], output_list[5])
|
428 |
else:
|
429 |
raise NotImplementedError
|
430 |
+
gr.Markdown('''Please consider citing
|
431 |
```
|
432 |
+
@article{yu2023skillmix,
|
433 |
+
title={Skill-Mix: a Flexible and Expandable Family of Evaluations for AI models},
|
434 |
+
author={Yu, Dingli and Kaur, Simran and Gupta, Arushi and Brown-Cohen, Jonah and Goyal, Anirudh and Arora, Sanjeev},
|
435 |
+
journal={arXiv preprint arXiv:2310.17567},
|
436 |
+
year={2023}
|
437 |
+
}
|
438 |
+
```
|
439 |
+
```
|
440 |
+
@misc{openai2023gpt4,
|
441 |
+
title={GPT-4 Technical Report},
|
442 |
+
author={OpenAI},
|
443 |
+
year={2023},
|
444 |
+
eprint={2303.08774},
|
445 |
+
archivePrefix={arXiv},
|
446 |
+
primaryClass={cs.CL}
|
447 |
+
}
|
448 |
+
```
|
449 |
+
```
|
450 |
+
@article{touvron2023llama,
|
451 |
+
title={Llama 2: Open foundation and fine-tuned chat models},
|
452 |
+
author={Touvron, Hugo and Martin, Louis and Stone, Kevin and Albert, Peter and Almahairi, Amjad and Babaei, Yasmine and Bashlykov, Nikolay and Batra, Soumya and Bhargava, Prajjwal and Bhosale, Shruti and others},
|
453 |
+
journal={arXiv preprint arXiv:2307.09288},
|
454 |
+
year={2023}
|
455 |
+
}
|
456 |
```
|
457 |
''')
|
458 |
return demo
|