Files changed (1) hide show
  1. README.md +52 -27
README.md CHANGED
@@ -1,18 +1,24 @@
1
  ---
2
- datasets:
3
- - CultriX/uptodate-small
4
- - ajibawa-2023/Python-Code-23k-ShareGPT
5
- - isaiahbjork/chain-of-thought-sharegpt
6
- - shibing624/sharegpt_gpt4
7
- - theblackcat102/sharegpt-english
8
- base_model:
9
- - CultriX/Qwen2.5-14B-Wernicke
10
  tags:
11
  - medical
12
  - gpt4
13
  - sft
14
  - mergekit
15
  - merge
 
 
 
 
 
 
 
 
 
 
 
16
  model-index:
17
  - name: Qwen2.5-14B-Wernicke
18
  results:
@@ -28,9 +34,11 @@ model-index:
28
  - type: inst_level_strict_acc and prompt_level_strict_acc
29
  value: 52.35
30
  name: strict accuracy
 
 
 
31
  source:
32
- url: >-
33
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
34
  name: Open LLM Leaderboard
35
  - task:
36
  type: text-generation
@@ -44,9 +52,11 @@ model-index:
44
  - type: acc_norm
45
  value: 50.64
46
  name: normalized accuracy
 
 
 
47
  source:
48
- url: >-
49
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
50
  name: Open LLM Leaderboard
51
  - task:
52
  type: text-generation
@@ -60,9 +70,11 @@ model-index:
60
  - type: exact_match
61
  value: 30.06
62
  name: exact match
 
 
 
63
  source:
64
- url: >-
65
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
66
  name: Open LLM Leaderboard
67
  - task:
68
  type: text-generation
@@ -76,9 +88,11 @@ model-index:
76
  - type: acc_norm
77
  value: 19.13
78
  name: acc_norm
 
 
 
79
  source:
80
- url: >-
81
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
82
  name: Open LLM Leaderboard
83
  - task:
84
  type: text-generation
@@ -92,9 +106,11 @@ model-index:
92
  - type: acc_norm
93
  value: 18.25
94
  name: acc_norm
 
 
 
95
  source:
96
- url: >-
97
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
98
  name: Open LLM Leaderboard
99
  - task:
100
  type: text-generation
@@ -110,16 +126,12 @@ model-index:
110
  - type: acc
111
  value: 49.15
112
  name: accuracy
 
 
 
113
  source:
114
- url: >-
115
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
116
  name: Open LLM Leaderboard
117
- license: apache-2.0
118
- language:
119
- - en
120
- metrics:
121
- - accuracy
122
- pipeline_tag: text-generation
123
  ---
124
  # merge
125
 
@@ -166,4 +178,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
166
  |MATH Lvl 5 (4-Shot)|30.06|
167
  |GPQA (0-shot) |19.13|
168
  |MuSR (0-shot) |18.25|
169
- |MMLU-PRO (5-shot) |49.15|
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
 
 
 
 
 
5
  tags:
6
  - medical
7
  - gpt4
8
  - sft
9
  - mergekit
10
  - merge
11
+ base_model:
12
+ - CultriX/Qwen2.5-14B-Wernicke
13
+ datasets:
14
+ - CultriX/uptodate-small
15
+ - ajibawa-2023/Python-Code-23k-ShareGPT
16
+ - isaiahbjork/chain-of-thought-sharegpt
17
+ - shibing624/sharegpt_gpt4
18
+ - theblackcat102/sharegpt-english
19
+ metrics:
20
+ - accuracy
21
+ pipeline_tag: text-generation
22
  model-index:
23
  - name: Qwen2.5-14B-Wernicke
24
  results:
 
34
  - type: inst_level_strict_acc and prompt_level_strict_acc
35
  value: 52.35
36
  name: strict accuracy
37
+ - type: inst_level_strict_acc and prompt_level_strict_acc
38
+ value: 49.37
39
+ name: strict accuracy
40
  source:
41
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
 
42
  name: Open LLM Leaderboard
43
  - task:
44
  type: text-generation
 
52
  - type: acc_norm
53
  value: 50.64
54
  name: normalized accuracy
55
+ - type: acc_norm
56
+ value: 49.33
57
+ name: normalized accuracy
58
  source:
59
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
 
60
  name: Open LLM Leaderboard
61
  - task:
62
  type: text-generation
 
70
  - type: exact_match
71
  value: 30.06
72
  name: exact match
73
+ - type: exact_match
74
+ value: 35.8
75
+ name: exact match
76
  source:
77
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
 
78
  name: Open LLM Leaderboard
79
  - task:
80
  type: text-generation
 
88
  - type: acc_norm
89
  value: 19.13
90
  name: acc_norm
91
+ - type: acc_norm
92
+ value: 13.87
93
+ name: acc_norm
94
  source:
95
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
 
96
  name: Open LLM Leaderboard
97
  - task:
98
  type: text-generation
 
106
  - type: acc_norm
107
  value: 18.25
108
  name: acc_norm
109
+ - type: acc_norm
110
+ value: 7.55
111
+ name: acc_norm
112
  source:
113
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
 
114
  name: Open LLM Leaderboard
115
  - task:
116
  type: text-generation
 
126
  - type: acc
127
  value: 49.15
128
  name: accuracy
129
+ - type: acc
130
+ value: 45.22
131
+ name: accuracy
132
  source:
133
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
 
134
  name: Open LLM Leaderboard
 
 
 
 
 
 
135
  ---
136
  # merge
137
 
 
178
  |MATH Lvl 5 (4-Shot)|30.06|
179
  |GPQA (0-shot) |19.13|
180
  |MuSR (0-shot) |18.25|
181
+ |MMLU-PRO (5-shot) |49.15|
182
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
183
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_CultriX__Qwen2.5-14B-Wernicke-SFT)
184
+
185
+ | Metric |Value|
186
+ |-------------------|----:|
187
+ |Avg. |33.52|
188
+ |IFEval (0-Shot) |49.37|
189
+ |BBH (3-Shot) |49.33|
190
+ |MATH Lvl 5 (4-Shot)|35.80|
191
+ |GPQA (0-shot) |13.87|
192
+ |MuSR (0-shot) | 7.55|
193
+ |MMLU-PRO (5-shot) |45.22|
194
+