lucyknada leaderboard-pr-bot commited on
Commit
c4a7165
1 Parent(s): f8f8502

Adding Evaluation Results (#12)

Browse files

- Adding Evaluation Results (e25e9ea0a20f73d95ebc79af910be83924ace781)


Co-authored-by: Open LLM Leaderboard PR Bot <leaderboard-pr-bot@users.noreply.huggingface.co>

Files changed (1) hide show
  1. README.md +33 -1
README.md CHANGED
@@ -3,9 +3,9 @@ language:
3
  - en
4
  - zh
5
  license: other
6
- base_model: Qwen/Qwen2-72B-Instruct
7
  tags:
8
  - chat
 
9
  license_name: tongyi-qianwen
10
  license_link: https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE
11
  pipeline_tag: text-generation
@@ -21,6 +21,9 @@ model-index:
21
  args:
22
  num_few_shot: 0
23
  metrics:
 
 
 
24
  - type: inst_level_strict_acc and prompt_level_strict_acc
25
  value: 76.06
26
  name: strict accuracy
@@ -36,6 +39,9 @@ model-index:
36
  args:
37
  num_few_shot: 3
38
  metrics:
 
 
 
39
  - type: acc_norm
40
  value: 57.65
41
  name: normalized accuracy
@@ -51,6 +57,9 @@ model-index:
51
  args:
52
  num_few_shot: 4
53
  metrics:
 
 
 
54
  - type: exact_match
55
  value: 35.27
56
  name: exact match
@@ -66,6 +75,9 @@ model-index:
66
  args:
67
  num_few_shot: 0
68
  metrics:
 
 
 
69
  - type: acc_norm
70
  value: 18.79
71
  name: acc_norm
@@ -81,6 +93,9 @@ model-index:
81
  args:
82
  num_few_shot: 0
83
  metrics:
 
 
 
84
  - type: acc_norm
85
  value: 15.62
86
  name: acc_norm
@@ -101,6 +116,9 @@ model-index:
101
  - type: acc
102
  value: 49.64
103
  name: accuracy
 
 
 
104
  source:
105
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=alpindale/magnum-72b-v1
106
  name: Open LLM Leaderboard
@@ -152,3 +170,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
152
  |MuSR (0-shot) |15.62|
153
  |MMLU-PRO (5-shot) |49.64|
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  - en
4
  - zh
5
  license: other
 
6
  tags:
7
  - chat
8
+ base_model: Qwen/Qwen2-72B-Instruct
9
  license_name: tongyi-qianwen
10
  license_link: https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE
11
  pipeline_tag: text-generation
 
21
  args:
22
  num_few_shot: 0
23
  metrics:
24
+ - type: inst_level_strict_acc and prompt_level_strict_acc
25
+ value: 76.06
26
+ name: strict accuracy
27
  - type: inst_level_strict_acc and prompt_level_strict_acc
28
  value: 76.06
29
  name: strict accuracy
 
39
  args:
40
  num_few_shot: 3
41
  metrics:
42
+ - type: acc_norm
43
+ value: 57.65
44
+ name: normalized accuracy
45
  - type: acc_norm
46
  value: 57.65
47
  name: normalized accuracy
 
57
  args:
58
  num_few_shot: 4
59
  metrics:
60
+ - type: exact_match
61
+ value: 35.27
62
+ name: exact match
63
  - type: exact_match
64
  value: 35.27
65
  name: exact match
 
75
  args:
76
  num_few_shot: 0
77
  metrics:
78
+ - type: acc_norm
79
+ value: 18.79
80
+ name: acc_norm
81
  - type: acc_norm
82
  value: 18.79
83
  name: acc_norm
 
93
  args:
94
  num_few_shot: 0
95
  metrics:
96
+ - type: acc_norm
97
+ value: 15.62
98
+ name: acc_norm
99
  - type: acc_norm
100
  value: 15.62
101
  name: acc_norm
 
116
  - type: acc
117
  value: 49.64
118
  name: accuracy
119
+ - type: acc
120
+ value: 49.85
121
+ name: accuracy
122
  source:
123
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=alpindale/magnum-72b-v1
124
  name: Open LLM Leaderboard
 
170
  |MuSR (0-shot) |15.62|
171
  |MMLU-PRO (5-shot) |49.64|
172
 
173
+
174
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
175
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_anthracite-org__magnum-v1-72b)
176
+
177
+ | Metric |Value|
178
+ |-------------------|----:|
179
+ |Avg. |42.21|
180
+ |IFEval (0-Shot) |76.06|
181
+ |BBH (3-Shot) |57.65|
182
+ |MATH Lvl 5 (4-Shot)|35.27|
183
+ |GPQA (0-shot) |18.79|
184
+ |MuSR (0-shot) |15.62|
185
+ |MMLU-PRO (5-shot) |49.85|
186
+