Files changed (1) hide show
  1. README.md +110 -2
README.md CHANGED
@@ -1,11 +1,106 @@
1
  ---
2
- license: apache-2.0
3
  language:
4
  - en
5
  - ko
6
  - ja
7
  - zh
8
  - es
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  > [!TIP]
@@ -54,4 +149,17 @@ Unless required by applicable law or agreed to in writing, software
54
  distributed under the License is distributed on an "AS IS" BASIS,
55
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
56
  See the License for the specific language governing permissions and
57
- limitations under the License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
 
2
  language:
3
  - en
4
  - ko
5
  - ja
6
  - zh
7
  - es
8
+ license: apache-2.0
9
+ model-index:
10
+ - name: Qwen2-7B-Multilingual-RP
11
+ results:
12
+ - task:
13
+ type: text-generation
14
+ name: Text Generation
15
+ dataset:
16
+ name: IFEval (0-Shot)
17
+ type: HuggingFaceH4/ifeval
18
+ args:
19
+ num_few_shot: 0
20
+ metrics:
21
+ - type: inst_level_strict_acc and prompt_level_strict_acc
22
+ value: 43.47
23
+ name: strict accuracy
24
+ source:
25
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=maywell/Qwen2-7B-Multilingual-RP
26
+ name: Open LLM Leaderboard
27
+ - task:
28
+ type: text-generation
29
+ name: Text Generation
30
+ dataset:
31
+ name: BBH (3-Shot)
32
+ type: BBH
33
+ args:
34
+ num_few_shot: 3
35
+ metrics:
36
+ - type: acc_norm
37
+ value: 30.54
38
+ name: normalized accuracy
39
+ source:
40
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=maywell/Qwen2-7B-Multilingual-RP
41
+ name: Open LLM Leaderboard
42
+ - task:
43
+ type: text-generation
44
+ name: Text Generation
45
+ dataset:
46
+ name: MATH Lvl 5 (4-Shot)
47
+ type: hendrycks/competition_math
48
+ args:
49
+ num_few_shot: 4
50
+ metrics:
51
+ - type: exact_match
52
+ value: 20.62
53
+ name: exact match
54
+ source:
55
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=maywell/Qwen2-7B-Multilingual-RP
56
+ name: Open LLM Leaderboard
57
+ - task:
58
+ type: text-generation
59
+ name: Text Generation
60
+ dataset:
61
+ name: GPQA (0-shot)
62
+ type: Idavidrein/gpqa
63
+ args:
64
+ num_few_shot: 0
65
+ metrics:
66
+ - type: acc_norm
67
+ value: 6.26
68
+ name: acc_norm
69
+ source:
70
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=maywell/Qwen2-7B-Multilingual-RP
71
+ name: Open LLM Leaderboard
72
+ - task:
73
+ type: text-generation
74
+ name: Text Generation
75
+ dataset:
76
+ name: MuSR (0-shot)
77
+ type: TAUR-Lab/MuSR
78
+ args:
79
+ num_few_shot: 0
80
+ metrics:
81
+ - type: acc_norm
82
+ value: 6.23
83
+ name: acc_norm
84
+ source:
85
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=maywell/Qwen2-7B-Multilingual-RP
86
+ name: Open LLM Leaderboard
87
+ - task:
88
+ type: text-generation
89
+ name: Text Generation
90
+ dataset:
91
+ name: MMLU-PRO (5-shot)
92
+ type: TIGER-Lab/MMLU-Pro
93
+ config: main
94
+ split: test
95
+ args:
96
+ num_few_shot: 5
97
+ metrics:
98
+ - type: acc
99
+ value: 31.77
100
+ name: accuracy
101
+ source:
102
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=maywell/Qwen2-7B-Multilingual-RP
103
+ name: Open LLM Leaderboard
104
  ---
105
 
106
  > [!TIP]
 
149
  distributed under the License is distributed on an "AS IS" BASIS,
150
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
151
  See the License for the specific language governing permissions and
152
+ limitations under the License.
153
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
154
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_maywell__Qwen2-7B-Multilingual-RP)
155
+
156
+ | Metric |Value|
157
+ |-------------------|----:|
158
+ |Avg. |23.15|
159
+ |IFEval (0-Shot) |43.47|
160
+ |BBH (3-Shot) |30.54|
161
+ |MATH Lvl 5 (4-Shot)|20.62|
162
+ |GPQA (0-shot) | 6.26|
163
+ |MuSR (0-shot) | 6.23|
164
+ |MMLU-PRO (5-shot) |31.77|
165
+