Files changed (1) hide show
  1. README.md +64 -73
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
4
  license: mit
5
  datasets:
6
  - HuggingFaceH4/ultrachat_200k
@@ -9,26 +10,25 @@ language:
9
  - en
10
  base_model: mistralai/Mistral-7B-v0.1
11
  widget:
12
- - example_title: Pirate!
13
- messages:
14
- - role: system
15
- content: You are a pirate chatbot who always responds with Arr!
16
- - role: user
17
- content: "There's a llama on my lawn, how can I get rid of him?"
18
- output:
19
- text: >-
20
- Arr! 'Tis a puzzlin' matter, me hearty! A llama on yer lawn be a rare
21
- sight, but I've got a plan that might help ye get rid of 'im. Ye'll need
22
- to gather some carrots and hay, and then lure the llama away with the
23
- promise of a tasty treat. Once he's gone, ye can clean up yer lawn and
24
- enjoy the peace and quiet once again. But beware, me hearty, for there
25
- may be more llamas where that one came from! Arr!
26
  pipeline_tag: text-generation
27
  model-index:
28
  - name: zephyr-7b-beta
29
  results:
30
- # AI2 Reasoning Challenge (25-Shot)
31
- - task:
32
  type: text-generation
33
  name: Text Generation
34
  dataset:
@@ -39,15 +39,14 @@ model-index:
39
  args:
40
  num_few_shot: 25
41
  metrics:
42
- - type: acc_norm
43
- name: normalized accuracy
44
- value: 62.03071672354948
45
  source:
46
  name: Open LLM Leaderboard
47
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
48
-
49
- # HellaSwag (10-shot)
50
- - task:
51
  type: text-generation
52
  name: Text Generation
53
  dataset:
@@ -57,15 +56,14 @@ model-index:
57
  args:
58
  num_few_shot: 10
59
  metrics:
60
- - type: acc_norm
61
- name: normalized accuracy
62
- value: 84.35570603465445
63
  source:
64
  name: Open LLM Leaderboard
65
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
66
-
67
- # DROP (3-shot)
68
- - task:
69
  type: text-generation
70
  name: Text Generation
71
  dataset:
@@ -75,15 +73,14 @@ model-index:
75
  args:
76
  num_few_shot: 3
77
  metrics:
78
- - type: f1
79
- name: f1 score
80
- value: 9.662437080536909
81
  source:
82
  name: Open LLM Leaderboard
83
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
84
-
85
- # TruthfulQA (0-shot)
86
- - task:
87
  type: text-generation
88
  name: Text Generation
89
  dataset:
@@ -94,14 +91,13 @@ model-index:
94
  args:
95
  num_few_shot: 0
96
  metrics:
97
- - type: mc2
98
- value: 57.44916942762855
99
  source:
100
  name: Open LLM Leaderboard
101
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
102
-
103
- # GSM8k (5-shot)
104
- - task:
105
  type: text-generation
106
  name: Text Generation
107
  dataset:
@@ -112,15 +108,14 @@ model-index:
112
  args:
113
  num_few_shot: 5
114
  metrics:
115
- - type: acc
116
- name: accuracy
117
- value: 12.736921910538287
118
  source:
119
  name: Open LLM Leaderboard
120
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
121
-
122
- # MMLU (5-Shot)
123
- - task:
124
  type: text-generation
125
  name: Text Generation
126
  dataset:
@@ -131,15 +126,14 @@ model-index:
131
  args:
132
  num_few_shot: 5
133
  metrics:
134
- - type: acc
135
- name: accuracy
136
- value: 61.07
137
  source:
138
  name: Open LLM Leaderboard
139
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
140
-
141
- # Winogrande (5-shot)
142
- - task:
143
  type: text-generation
144
  name: Text Generation
145
  dataset:
@@ -150,38 +144,35 @@ model-index:
150
  args:
151
  num_few_shot: 5
152
  metrics:
153
- - type: acc
154
- name: accuracy
155
- value: 77.74269928966061
156
  source:
157
  name: Open LLM Leaderboard
158
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
159
-
160
- # AlpacaEval (taken from model card)
161
- - task:
162
  type: text-generation
163
  name: Text Generation
164
  dataset:
165
  name: AlpacaEval
166
  type: tatsu-lab/alpaca_eval
167
  metrics:
168
- - type: unknown
169
- name: win rate
170
- value: 0.9060
171
  source:
172
  url: https://tatsu-lab.github.io/alpaca_eval/
173
-
174
- # MT-Bench (taken from model card)
175
- - task:
176
  type: text-generation
177
  name: Text Generation
178
  dataset:
179
  name: MT-Bench
180
  type: unknown
181
  metrics:
182
- - type: unknown
183
- name: score
184
- value: 7.34
185
  source:
186
  url: https://huggingface.co/spaces/lmsys/mt-bench
187
  ---
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ - not-for-all-audiences
5
  license: mit
6
  datasets:
7
  - HuggingFaceH4/ultrachat_200k
 
10
  - en
11
  base_model: mistralai/Mistral-7B-v0.1
12
  widget:
13
+ - example_title: Pirate!
14
+ messages:
15
+ - role: system
16
+ content: You are a pirate chatbot who always responds with Arr!
17
+ - role: user
18
+ content: There's a llama on my lawn, how can I get rid of him?
19
+ output:
20
+ text: >-
21
+ Arr! 'Tis a puzzlin' matter, me hearty! A llama on yer lawn be a rare
22
+ sight, but I've got a plan that might help ye get rid of 'im. Ye'll need
23
+ to gather some carrots and hay, and then lure the llama away with the
24
+ promise of a tasty treat. Once he's gone, ye can clean up yer lawn and
25
+ enjoy the peace and quiet once again. But beware, me hearty, for there may
26
+ be more llamas where that one came from! Arr!
27
  pipeline_tag: text-generation
28
  model-index:
29
  - name: zephyr-7b-beta
30
  results:
31
+ - task:
 
32
  type: text-generation
33
  name: Text Generation
34
  dataset:
 
39
  args:
40
  num_few_shot: 25
41
  metrics:
42
+ - type: acc_norm
43
+ name: normalized accuracy
44
+ value: 62.03071672354948
45
  source:
46
  name: Open LLM Leaderboard
47
+ url: >-
48
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
49
+ - task:
 
50
  type: text-generation
51
  name: Text Generation
52
  dataset:
 
56
  args:
57
  num_few_shot: 10
58
  metrics:
59
+ - type: acc_norm
60
+ name: normalized accuracy
61
+ value: 84.35570603465445
62
  source:
63
  name: Open LLM Leaderboard
64
+ url: >-
65
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
66
+ - task:
 
67
  type: text-generation
68
  name: Text Generation
69
  dataset:
 
73
  args:
74
  num_few_shot: 3
75
  metrics:
76
+ - type: f1
77
+ name: f1 score
78
+ value: 9.66243708053691
79
  source:
80
  name: Open LLM Leaderboard
81
+ url: >-
82
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
83
+ - task:
 
84
  type: text-generation
85
  name: Text Generation
86
  dataset:
 
91
  args:
92
  num_few_shot: 0
93
  metrics:
94
+ - type: mc2
95
+ value: 57.44916942762855
96
  source:
97
  name: Open LLM Leaderboard
98
+ url: >-
99
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
100
+ - task:
 
101
  type: text-generation
102
  name: Text Generation
103
  dataset:
 
108
  args:
109
  num_few_shot: 5
110
  metrics:
111
+ - type: acc
112
+ name: accuracy
113
+ value: 12.736921910538287
114
  source:
115
  name: Open LLM Leaderboard
116
+ url: >-
117
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
118
+ - task:
 
119
  type: text-generation
120
  name: Text Generation
121
  dataset:
 
126
  args:
127
  num_few_shot: 5
128
  metrics:
129
+ - type: acc
130
+ name: accuracy
131
+ value: 61.07
132
  source:
133
  name: Open LLM Leaderboard
134
+ url: >-
135
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
136
+ - task:
 
137
  type: text-generation
138
  name: Text Generation
139
  dataset:
 
144
  args:
145
  num_few_shot: 5
146
  metrics:
147
+ - type: acc
148
+ name: accuracy
149
+ value: 77.7426992896606
150
  source:
151
  name: Open LLM Leaderboard
152
+ url: >-
153
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
154
+ - task:
 
155
  type: text-generation
156
  name: Text Generation
157
  dataset:
158
  name: AlpacaEval
159
  type: tatsu-lab/alpaca_eval
160
  metrics:
161
+ - type: unknown
162
+ name: win rate
163
+ value: 0.906
164
  source:
165
  url: https://tatsu-lab.github.io/alpaca_eval/
166
+ - task:
 
 
167
  type: text-generation
168
  name: Text Generation
169
  dataset:
170
  name: MT-Bench
171
  type: unknown
172
  metrics:
173
+ - type: unknown
174
+ name: score
175
+ value: 7.34
176
  source:
177
  url: https://huggingface.co/spaces/lmsys/mt-bench
178
  ---