delete dataset short name
Browse files
app.py
CHANGED
@@ -48,7 +48,6 @@ def get_task_summary(results):
|
|
48 |
"ARC":
|
49 |
{"dataset_type":"ai2_arc",
|
50 |
"dataset_name":"AI2 Reasoning Challenge (25-Shot)",
|
51 |
-
"dataset_short_name": "ARC (25-shot)",
|
52 |
"metric_type":"acc_norm",
|
53 |
"metric_value":results["ARC"],
|
54 |
"dataset_config":"ARC-Challenge",
|
@@ -60,7 +59,6 @@ def get_task_summary(results):
|
|
60 |
"HellaSwag":
|
61 |
{"dataset_type":"hellaswag",
|
62 |
"dataset_name":"HellaSwag (10-Shot)",
|
63 |
-
"dataset_short_name": "HellaSwag (10-shot)",
|
64 |
"metric_type":"acc_norm",
|
65 |
"metric_value":results["HellaSwag"],
|
66 |
"dataset_config":None,
|
@@ -73,7 +71,6 @@ def get_task_summary(results):
|
|
73 |
{
|
74 |
"dataset_type":"cais/mmlu",
|
75 |
"dataset_name":"MMLU (5-Shot)",
|
76 |
-
"dataset_short_name": "MMLU (5-Shot)",
|
77 |
"metric_type":"acc",
|
78 |
"metric_value":results["MMLU"],
|
79 |
"dataset_config":"all",
|
@@ -86,7 +83,6 @@ def get_task_summary(results):
|
|
86 |
{
|
87 |
"dataset_type":"truthful_qa",
|
88 |
"dataset_name":"TruthfulQA (0-shot)",
|
89 |
-
"dataset_short_name": "TruthfulQA (0-shot)",
|
90 |
"metric_type":"mc2",
|
91 |
"metric_value":results["TruthfulQA"],
|
92 |
"dataset_config":"multiple_choice",
|
@@ -99,7 +95,6 @@ def get_task_summary(results):
|
|
99 |
{
|
100 |
"dataset_type":"winogrande",
|
101 |
"dataset_name":"Winogrande (5-shot)",
|
102 |
-
"dataset_short_name": "Winogrande (5-shot)",
|
103 |
"metric_type":"acc",
|
104 |
"metric_value":results["Winogrande"],
|
105 |
"dataset_config":"winogrande_xl",
|
@@ -111,7 +106,6 @@ def get_task_summary(results):
|
|
111 |
{
|
112 |
"dataset_type":"gsm8k",
|
113 |
"dataset_name":"GSM8k (5-shot)",
|
114 |
-
"dataset_short_name": "GSM8k (5-shot)",
|
115 |
"metric_type":"acc",
|
116 |
"metric_value":results["GSM8K"],
|
117 |
"dataset_config":"main",
|
@@ -204,9 +198,9 @@ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://hugg
|
|
204 |
|
205 |
## 🤝 Acknowledgements
|
206 |
|
207 |
-
- Special thanks to Clémentine Fourrier (clefourrier) for her help and contributions to the code.
|
208 |
|
209 |
-
- Special thanks to [Lucain Pouget (Wauplin)](https://huggingface.co/
|
210 |
"""
|
211 |
|
212 |
demo = gr.Interface(title=gradio_title, description=gradio_desc, fn=commit, inputs=["text", "text"], outputs="text")
|
|
|
48 |
"ARC":
|
49 |
{"dataset_type":"ai2_arc",
|
50 |
"dataset_name":"AI2 Reasoning Challenge (25-Shot)",
|
|
|
51 |
"metric_type":"acc_norm",
|
52 |
"metric_value":results["ARC"],
|
53 |
"dataset_config":"ARC-Challenge",
|
|
|
59 |
"HellaSwag":
|
60 |
{"dataset_type":"hellaswag",
|
61 |
"dataset_name":"HellaSwag (10-Shot)",
|
|
|
62 |
"metric_type":"acc_norm",
|
63 |
"metric_value":results["HellaSwag"],
|
64 |
"dataset_config":None,
|
|
|
71 |
{
|
72 |
"dataset_type":"cais/mmlu",
|
73 |
"dataset_name":"MMLU (5-Shot)",
|
|
|
74 |
"metric_type":"acc",
|
75 |
"metric_value":results["MMLU"],
|
76 |
"dataset_config":"all",
|
|
|
83 |
{
|
84 |
"dataset_type":"truthful_qa",
|
85 |
"dataset_name":"TruthfulQA (0-shot)",
|
|
|
86 |
"metric_type":"mc2",
|
87 |
"metric_value":results["TruthfulQA"],
|
88 |
"dataset_config":"multiple_choice",
|
|
|
95 |
{
|
96 |
"dataset_type":"winogrande",
|
97 |
"dataset_name":"Winogrande (5-shot)",
|
|
|
98 |
"metric_type":"acc",
|
99 |
"metric_value":results["Winogrande"],
|
100 |
"dataset_config":"winogrande_xl",
|
|
|
106 |
{
|
107 |
"dataset_type":"gsm8k",
|
108 |
"dataset_name":"GSM8k (5-shot)",
|
|
|
109 |
"metric_type":"acc",
|
110 |
"metric_value":results["GSM8K"],
|
111 |
"dataset_config":"main",
|
|
|
198 |
|
199 |
## 🤝 Acknowledgements
|
200 |
|
201 |
+
- Special thanks to [Clémentine Fourrier (clefourrier)](https://huggingface.co/clefourrier) for her help and contributions to the code.
|
202 |
|
203 |
+
- Special thanks to [Lucain Pouget (Wauplin)](https://huggingface.co/Wauplin) for assisting with the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
|
204 |
"""
|
205 |
|
206 |
demo = gr.Interface(title=gradio_title, description=gradio_desc, fn=commit, inputs=["text", "text"], outputs="text")
|