Spaces:
Running
Running
update
Browse files- leaderboard.py +6 -29
leaderboard.py
CHANGED
@@ -24,14 +24,7 @@ def make_default_md_2():
|
|
24 |
return leaderboard_md
|
25 |
|
26 |
leaderboard_md = """
|
27 |
-
Three benchmarks are displayed: **
|
28 |
-
- [Chatbot Arena](https://chat.lmsys.org/?arena) - a crowdsourced, randomized battle platform. We use 500K+ user votes to compute model strength.
|
29 |
-
- [MT-Bench](https://arxiv.org/abs/2306.05685): a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
|
30 |
-
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot): a test to measure a model's multitask accuracy on 57 tasks.
|
31 |
-
|
32 |
-
💻 Code: The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge).
|
33 |
-
The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval).
|
34 |
-
Higher values are better for all benchmarks. Empty cells mean not available.
|
35 |
"""
|
36 |
|
37 |
acknowledgment_md = """
|
@@ -43,33 +36,17 @@ The service is a research preview. It only provides limited safety measures and
|
|
43 |
It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
|
44 |
Please do not upload any private information.
|
45 |
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license.
|
46 |
-
|
47 |
-
### Acknowledgment
|
48 |
-
We thank [UC Berkeley SkyLab](https://sky.cs.berkeley.edu/), [Kaggle](https://www.kaggle.com/), [MBZUAI](https://mbzuai.ac.ae/), [a16z](https://www.a16z.com/), [Together AI](https://www.together.ai/), [Hyperbolic](https://hyperbolic.xyz/), [Anyscale](https://www.anyscale.com/), [HuggingFace](https://huggingface.co/) for their generous [sponsorship](https://lmsys.org/donations/).
|
49 |
-
|
50 |
-
<div class="sponsor-image-about">
|
51 |
-
<img src="https://storage.googleapis.com/public-arena-asset/skylab.png" alt="SkyLab">
|
52 |
-
<img src="https://storage.googleapis.com/public-arena-asset/kaggle.png" alt="Kaggle">
|
53 |
-
<img src="https://storage.googleapis.com/public-arena-asset/mbzuai.jpeg" alt="MBZUAI">
|
54 |
-
<img src="https://storage.googleapis.com/public-arena-asset/a16z.jpeg" alt="a16z">
|
55 |
-
<img src="https://storage.googleapis.com/public-arena-asset/together.png" alt="Together AI">
|
56 |
-
<img src="https://storage.googleapis.com/public-arena-asset/hyperbolic_logo.png" alt="Hyperbolic">
|
57 |
-
<img src="https://storage.googleapis.com/public-arena-asset/anyscale.png" alt="AnyScale">
|
58 |
-
<img src="https://storage.googleapis.com/public-arena-asset/huggingface.png" alt="HuggingFace">
|
59 |
-
</div>
|
60 |
"""
|
61 |
|
62 |
citation_md = """
|
63 |
### Citation
|
64 |
Please cite the following paper if you find our leaderboard or dataset helpful.
|
65 |
```
|
66 |
-
@
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
archivePrefix={arXiv},
|
72 |
-
primaryClass={cs.AI}
|
73 |
}
|
74 |
"""
|
75 |
|
|
|
24 |
return leaderboard_md
|
25 |
|
26 |
leaderboard_md = """
|
27 |
+
Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"""
|
29 |
|
30 |
acknowledgment_md = """
|
|
|
36 |
It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
|
37 |
Please do not upload any private information.
|
38 |
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
"""
|
40 |
|
41 |
citation_md = """
|
42 |
### Citation
|
43 |
Please cite the following paper if you find our leaderboard or dataset helpful.
|
44 |
```
|
45 |
+
@article{huang2024effibench,
|
46 |
+
title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code},
|
47 |
+
author={Huang, Dong and Zhang, Jie M and Qing, Yuhao and Cui, Heming},
|
48 |
+
journal={arXiv preprint arXiv:2402.02037},
|
49 |
+
year={2024}
|
|
|
|
|
50 |
}
|
51 |
"""
|
52 |
|