karrrr123456 commited on
Commit
ee07c31
1 Parent(s): 892b3d7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +64 -74
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  tags:
3
- - generated_from_trainer
4
  license: mit
5
  datasets:
6
  - HuggingFaceH4/ultrachat_200k
@@ -9,26 +9,25 @@ language:
9
  - en
10
  base_model: mistralai/Mistral-7B-v0.1
11
  widget:
12
- - example_title: Pirate!
13
- messages:
14
- - role: system
15
- content: You are a pirate chatbot who always responds with Arr!
16
- - role: user
17
- content: "There's a llama on my lawn, how can I get rid of him?"
18
- output:
19
- text: >-
20
- Arr! 'Tis a puzzlin' matter, me hearty! A llama on yer lawn be a rare
21
- sight, but I've got a plan that might help ye get rid of 'im. Ye'll need
22
- to gather some carrots and hay, and then lure the llama away with the
23
- promise of a tasty treat. Once he's gone, ye can clean up yer lawn and
24
- enjoy the peace and quiet once again. But beware, me hearty, for there
25
- may be more llamas where that one came from! Arr!
26
  pipeline_tag: text-generation
27
  model-index:
28
  - name: zephyr-7b-beta
29
  results:
30
- # AI2 Reasoning Challenge (25-Shot)
31
- - task:
32
  type: text-generation
33
  name: Text Generation
34
  dataset:
@@ -39,15 +38,14 @@ model-index:
39
  args:
40
  num_few_shot: 25
41
  metrics:
42
- - type: acc_norm
43
- name: normalized accuracy
44
- value: 62.03071672354948
45
  source:
46
  name: Open LLM Leaderboard
47
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
48
-
49
- # HellaSwag (10-shot)
50
- - task:
51
  type: text-generation
52
  name: Text Generation
53
  dataset:
@@ -57,15 +55,14 @@ model-index:
57
  args:
58
  num_few_shot: 10
59
  metrics:
60
- - type: acc_norm
61
- name: normalized accuracy
62
- value: 84.35570603465445
63
  source:
64
  name: Open LLM Leaderboard
65
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
66
-
67
- # DROP (3-shot)
68
- - task:
69
  type: text-generation
70
  name: Text Generation
71
  dataset:
@@ -75,15 +72,14 @@ model-index:
75
  args:
76
  num_few_shot: 3
77
  metrics:
78
- - type: f1
79
- name: f1 score
80
- value: 9.662437080536909
81
  source:
82
  name: Open LLM Leaderboard
83
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
84
-
85
- # TruthfulQA (0-shot)
86
- - task:
87
  type: text-generation
88
  name: Text Generation
89
  dataset:
@@ -94,14 +90,13 @@ model-index:
94
  args:
95
  num_few_shot: 0
96
  metrics:
97
- - type: mc2
98
- value: 57.44916942762855
99
  source:
100
  name: Open LLM Leaderboard
101
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
102
-
103
- # GSM8k (5-shot)
104
- - task:
105
  type: text-generation
106
  name: Text Generation
107
  dataset:
@@ -112,15 +107,14 @@ model-index:
112
  args:
113
  num_few_shot: 5
114
  metrics:
115
- - type: acc
116
- name: accuracy
117
- value: 12.736921910538287
118
  source:
119
  name: Open LLM Leaderboard
120
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
121
-
122
- # MMLU (5-Shot)
123
- - task:
124
  type: text-generation
125
  name: Text Generation
126
  dataset:
@@ -131,15 +125,14 @@ model-index:
131
  args:
132
  num_few_shot: 5
133
  metrics:
134
- - type: acc
135
- name: accuracy
136
- value: 61.07
137
  source:
138
  name: Open LLM Leaderboard
139
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
140
-
141
- # Winogrande (5-shot)
142
- - task:
143
  type: text-generation
144
  name: Text Generation
145
  dataset:
@@ -150,38 +143,35 @@ model-index:
150
  args:
151
  num_few_shot: 5
152
  metrics:
153
- - type: acc
154
- name: accuracy
155
- value: 77.74269928966061
156
  source:
157
  name: Open LLM Leaderboard
158
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
159
-
160
- # AlpacaEval (taken from model card)
161
- - task:
162
  type: text-generation
163
  name: Text Generation
164
  dataset:
165
  name: AlpacaEval
166
  type: tatsu-lab/alpaca_eval
167
  metrics:
168
- - type: unknown
169
- name: win rate
170
- value: 0.9060
171
  source:
172
  url: https://tatsu-lab.github.io/alpaca_eval/
173
-
174
- # MT-Bench (taken from model card)
175
- - task:
176
  type: text-generation
177
  name: Text Generation
178
  dataset:
179
  name: MT-Bench
180
  type: unknown
181
  metrics:
182
- - type: unknown
183
- name: score
184
- value: 7.34
185
  source:
186
  url: https://huggingface.co/spaces/lmsys/mt-bench
187
  ---
 
1
  ---
2
  tags:
3
+ - not-for-all-audiences
4
  license: mit
5
  datasets:
6
  - HuggingFaceH4/ultrachat_200k
 
9
  - en
10
  base_model: mistralai/Mistral-7B-v0.1
11
  widget:
12
+ - example_title: Pirate!
13
+ messages:
14
+ - role: system
15
+ content: You are a pirate chatbot who always responds with Arr!
16
+ - role: user
17
+ content: There's a llama on my lawn, how can I get rid of him?
18
+ output:
19
+ text: >-
20
+ Arr! 'Tis a puzzlin' matter, me hearty! A llama on yer lawn be a rare
21
+ sight, but I've got a plan that might help ye get rid of 'im. Ye'll need
22
+ to gather some carrots and hay, and then lure the llama away with the
23
+ promise of a tasty treat. Once he's gone, ye can clean up yer lawn and
24
+ enjoy the peace and quiet once again. But beware, me hearty, for there may
25
+ be more llamas where that one came from! Arr!
26
  pipeline_tag: text-generation
27
  model-index:
28
  - name: zephyr-7b-beta
29
  results:
30
+ - task:
 
31
  type: text-generation
32
  name: Text Generation
33
  dataset:
 
38
  args:
39
  num_few_shot: 25
40
  metrics:
41
+ - type: acc_norm
42
+ name: normalized accuracy
43
+ value: 62.03071672354948
44
  source:
45
  name: Open LLM Leaderboard
46
+ url: >-
47
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
48
+ - task:
 
49
  type: text-generation
50
  name: Text Generation
51
  dataset:
 
55
  args:
56
  num_few_shot: 10
57
  metrics:
58
+ - type: acc_norm
59
+ name: normalized accuracy
60
+ value: 84.35570603465445
61
  source:
62
  name: Open LLM Leaderboard
63
+ url: >-
64
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
65
+ - task:
 
66
  type: text-generation
67
  name: Text Generation
68
  dataset:
 
72
  args:
73
  num_few_shot: 3
74
  metrics:
75
+ - type: f1
76
+ name: f1 score
77
+ value: 9.66243708053691
78
  source:
79
  name: Open LLM Leaderboard
80
+ url: >-
81
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
82
+ - task:
 
83
  type: text-generation
84
  name: Text Generation
85
  dataset:
 
90
  args:
91
  num_few_shot: 0
92
  metrics:
93
+ - type: mc2
94
+ value: 57.44916942762855
95
  source:
96
  name: Open LLM Leaderboard
97
+ url: >-
98
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
99
+ - task:
 
100
  type: text-generation
101
  name: Text Generation
102
  dataset:
 
107
  args:
108
  num_few_shot: 5
109
  metrics:
110
+ - type: acc
111
+ name: accuracy
112
+ value: 12.736921910538287
113
  source:
114
  name: Open LLM Leaderboard
115
+ url: >-
116
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
117
+ - task:
 
118
  type: text-generation
119
  name: Text Generation
120
  dataset:
 
125
  args:
126
  num_few_shot: 5
127
  metrics:
128
+ - type: acc
129
+ name: accuracy
130
+ value: 61.07
131
  source:
132
  name: Open LLM Leaderboard
133
+ url: >-
134
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
135
+ - task:
 
136
  type: text-generation
137
  name: Text Generation
138
  dataset:
 
143
  args:
144
  num_few_shot: 5
145
  metrics:
146
+ - type: acc
147
+ name: accuracy
148
+ value: 77.7426992896606
149
  source:
150
  name: Open LLM Leaderboard
151
+ url: >-
152
+ https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-beta
153
+ - task:
 
154
  type: text-generation
155
  name: Text Generation
156
  dataset:
157
  name: AlpacaEval
158
  type: tatsu-lab/alpaca_eval
159
  metrics:
160
+ - type: unknown
161
+ name: win rate
162
+ value: 0.906
163
  source:
164
  url: https://tatsu-lab.github.io/alpaca_eval/
165
+ - task:
 
 
166
  type: text-generation
167
  name: Text Generation
168
  dataset:
169
  name: MT-Bench
170
  type: unknown
171
  metrics:
172
+ - type: unknown
173
+ name: score
174
+ value: 7.34
175
  source:
176
  url: https://huggingface.co/spaces/lmsys/mt-bench
177
  ---