Undi95 commited on
Commit
0d204bf
1 Parent(s): fdb61be

Upload SolarMaid-v0.1.1.json

Browse files
Files changed (1) hide show
  1. SolarMaid-v0.1.1.json +347 -0
SolarMaid-v0.1.1.json ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "arc_challenge": {
4
+ "acc,none": 0.5930034129692833,
5
+ "acc_stderr,none": 0.014356399418009123,
6
+ "acc_norm,none": 0.6143344709897611,
7
+ "acc_norm_stderr,none": 0.014224250973257177,
8
+ "alias": "arc_challenge"
9
+ },
10
+ "arc_easy": {
11
+ "acc,none": 0.8379629629629629,
12
+ "acc_stderr,none": 0.007561148218715585,
13
+ "acc_norm,none": 0.8265993265993266,
14
+ "acc_norm_stderr,none": 0.007768570412816704,
15
+ "alias": "arc_easy"
16
+ },
17
+ "gsm8k": {
18
+ "exact_match,get-answer": 0.5890826383623957,
19
+ "exact_match_stderr,get-answer": 0.013552132901423226,
20
+ "alias": "gsm8k"
21
+ },
22
+ "hellaswag": {
23
+ "acc,none": 0.6665006970722963,
24
+ "acc_stderr,none": 0.00470499629414501,
25
+ "acc_norm,none": 0.8445528779127663,
26
+ "acc_norm_stderr,none": 0.003615898928269306,
27
+ "alias": "hellaswag"
28
+ },
29
+ "piqa": {
30
+ "acc,none": 0.8128400435255713,
31
+ "acc_stderr,none": 0.009100273290473547,
32
+ "acc_norm,none": 0.8264417845484222,
33
+ "acc_norm_stderr,none": 0.008836375101386922,
34
+ "alias": "piqa"
35
+ },
36
+ "truthfulqa_mc2": {
37
+ "acc,none": 0.6089171791978354,
38
+ "acc_stderr,none": 0.015669761019363578,
39
+ "alias": "truthfulqa_mc2"
40
+ },
41
+ "winogrande": {
42
+ "acc,none": 0.7426992896606156,
43
+ "acc_stderr,none": 0.012285989618865702,
44
+ "alias": "winogrande"
45
+ }
46
+ },
47
+ "configs": {
48
+ "arc_challenge": {
49
+ "task": "arc_challenge",
50
+ "group": [
51
+ "ai2_arc"
52
+ ],
53
+ "dataset_path": "ai2_arc",
54
+ "dataset_name": "ARC-Challenge",
55
+ "training_split": "train",
56
+ "validation_split": "validation",
57
+ "test_split": "test",
58
+ "doc_to_text": "Question: {{question}}\nAnswer:",
59
+ "doc_to_target": "{{choices.label.index(answerKey)}}",
60
+ "doc_to_choice": "{{choices.text}}",
61
+ "description": "",
62
+ "target_delimiter": " ",
63
+ "fewshot_delimiter": "\n\n",
64
+ "metric_list": [
65
+ {
66
+ "metric": "acc",
67
+ "aggregation": "mean",
68
+ "higher_is_better": true
69
+ },
70
+ {
71
+ "metric": "acc_norm",
72
+ "aggregation": "mean",
73
+ "higher_is_better": true
74
+ }
75
+ ],
76
+ "output_type": "multiple_choice",
77
+ "repeats": 1,
78
+ "should_decontaminate": true,
79
+ "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
80
+ "metadata": [
81
+ {
82
+ "version": 1.0
83
+ }
84
+ ]
85
+ },
86
+ "arc_easy": {
87
+ "task": "arc_easy",
88
+ "group": [
89
+ "ai2_arc"
90
+ ],
91
+ "dataset_path": "ai2_arc",
92
+ "dataset_name": "ARC-Easy",
93
+ "training_split": "train",
94
+ "validation_split": "validation",
95
+ "test_split": "test",
96
+ "doc_to_text": "Question: {{question}}\nAnswer:",
97
+ "doc_to_target": "{{choices.label.index(answerKey)}}",
98
+ "doc_to_choice": "{{choices.text}}",
99
+ "description": "",
100
+ "target_delimiter": " ",
101
+ "fewshot_delimiter": "\n\n",
102
+ "metric_list": [
103
+ {
104
+ "metric": "acc",
105
+ "aggregation": "mean",
106
+ "higher_is_better": true
107
+ },
108
+ {
109
+ "metric": "acc_norm",
110
+ "aggregation": "mean",
111
+ "higher_is_better": true
112
+ }
113
+ ],
114
+ "output_type": "multiple_choice",
115
+ "repeats": 1,
116
+ "should_decontaminate": true,
117
+ "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
118
+ "metadata": [
119
+ {
120
+ "version": 1.0
121
+ }
122
+ ]
123
+ },
124
+ "gsm8k": {
125
+ "task": "gsm8k",
126
+ "group": [
127
+ "math_word_problems"
128
+ ],
129
+ "dataset_path": "gsm8k",
130
+ "dataset_name": "main",
131
+ "training_split": "train",
132
+ "test_split": "test",
133
+ "fewshot_split": "train",
134
+ "doc_to_text": "Question: {{question}}\nAnswer:",
135
+ "doc_to_target": "{{answer}}",
136
+ "description": "",
137
+ "target_delimiter": " ",
138
+ "fewshot_delimiter": "\n\n",
139
+ "num_fewshot": 5,
140
+ "metric_list": [
141
+ {
142
+ "metric": "exact_match",
143
+ "aggregation": "mean",
144
+ "higher_is_better": true,
145
+ "ignore_case": true,
146
+ "ignore_punctuation": false,
147
+ "regexes_to_ignore": [
148
+ ",",
149
+ "\\$",
150
+ "(?s).*#### "
151
+ ]
152
+ }
153
+ ],
154
+ "output_type": "generate_until",
155
+ "generation_kwargs": {
156
+ "until": [
157
+ "\n\n",
158
+ "Question:"
159
+ ],
160
+ "do_sample": false,
161
+ "temperature": 0.0
162
+ },
163
+ "repeats": 1,
164
+ "filter_list": [
165
+ {
166
+ "name": "get-answer",
167
+ "filter": [
168
+ {
169
+ "function": "regex",
170
+ "regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
171
+ },
172
+ {
173
+ "function": "take_first"
174
+ }
175
+ ]
176
+ }
177
+ ],
178
+ "should_decontaminate": false,
179
+ "metadata": [
180
+ {
181
+ "version": 1.0
182
+ }
183
+ ]
184
+ },
185
+ "hellaswag": {
186
+ "task": "hellaswag",
187
+ "group": [
188
+ "multiple_choice"
189
+ ],
190
+ "dataset_path": "hellaswag",
191
+ "training_split": "train",
192
+ "validation_split": "validation",
193
+ "process_docs": "<function process_docs at 0x7fa89ca84ca0>",
194
+ "doc_to_text": "{{query}}",
195
+ "doc_to_target": "{{label}}",
196
+ "doc_to_choice": "choices",
197
+ "description": "",
198
+ "target_delimiter": " ",
199
+ "fewshot_delimiter": "\n\n",
200
+ "metric_list": [
201
+ {
202
+ "metric": "acc",
203
+ "aggregation": "mean",
204
+ "higher_is_better": true
205
+ },
206
+ {
207
+ "metric": "acc_norm",
208
+ "aggregation": "mean",
209
+ "higher_is_better": true
210
+ }
211
+ ],
212
+ "output_type": "multiple_choice",
213
+ "repeats": 1,
214
+ "should_decontaminate": false,
215
+ "metadata": [
216
+ {
217
+ "version": 1.0
218
+ }
219
+ ]
220
+ },
221
+ "piqa": {
222
+ "task": "piqa",
223
+ "dataset_path": "piqa",
224
+ "training_split": "train",
225
+ "validation_split": "validation",
226
+ "doc_to_text": "Question: {{goal}}\nAnswer:",
227
+ "doc_to_target": "label",
228
+ "doc_to_choice": "{{[sol1, sol2]}}",
229
+ "description": "",
230
+ "target_delimiter": " ",
231
+ "fewshot_delimiter": "\n\n",
232
+ "metric_list": [
233
+ {
234
+ "metric": "acc",
235
+ "aggregation": "mean",
236
+ "higher_is_better": true
237
+ },
238
+ {
239
+ "metric": "acc_norm",
240
+ "aggregation": "mean",
241
+ "higher_is_better": true
242
+ }
243
+ ],
244
+ "output_type": "multiple_choice",
245
+ "repeats": 1,
246
+ "should_decontaminate": true,
247
+ "doc_to_decontamination_query": "goal",
248
+ "metadata": [
249
+ {
250
+ "version": 1.0
251
+ }
252
+ ]
253
+ },
254
+ "truthfulqa_mc2": {
255
+ "task": "truthfulqa_mc2",
256
+ "group": [
257
+ "truthfulqa"
258
+ ],
259
+ "dataset_path": "truthful_qa",
260
+ "dataset_name": "multiple_choice",
261
+ "validation_split": "validation",
262
+ "doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question + '\nA:'}}",
263
+ "doc_to_target": 0,
264
+ "doc_to_choice": "{{mc2_targets.choices}}",
265
+ "process_results": "<function process_results_mc2 at 0x7fa89df957e0>",
266
+ "description": "",
267
+ "target_delimiter": " ",
268
+ "fewshot_delimiter": "\n\n",
269
+ "num_fewshot": 0,
270
+ "metric_list": [
271
+ {
272
+ "metric": "acc",
273
+ "aggregation": "mean",
274
+ "higher_is_better": true
275
+ }
276
+ ],
277
+ "output_type": "multiple_choice",
278
+ "repeats": 1,
279
+ "should_decontaminate": true,
280
+ "doc_to_decontamination_query": "question",
281
+ "metadata": [
282
+ {
283
+ "version": 2.0
284
+ }
285
+ ]
286
+ },
287
+ "winogrande": {
288
+ "task": "winogrande",
289
+ "dataset_path": "winogrande",
290
+ "dataset_name": "winogrande_xl",
291
+ "training_split": "train",
292
+ "validation_split": "validation",
293
+ "doc_to_text": "<function doc_to_text at 0x7fa89df46f80>",
294
+ "doc_to_target": "<function doc_to_target at 0x7fa89df94c10>",
295
+ "doc_to_choice": "<function doc_to_choice at 0x7fa89df94f70>",
296
+ "description": "",
297
+ "target_delimiter": " ",
298
+ "fewshot_delimiter": "\n\n",
299
+ "metric_list": [
300
+ {
301
+ "metric": "acc",
302
+ "aggregation": "mean",
303
+ "higher_is_better": true
304
+ }
305
+ ],
306
+ "output_type": "multiple_choice",
307
+ "repeats": 1,
308
+ "should_decontaminate": true,
309
+ "doc_to_decontamination_query": "sentence",
310
+ "metadata": [
311
+ {
312
+ "version": 1.0
313
+ }
314
+ ]
315
+ }
316
+ },
317
+ "versions": {
318
+ "arc_challenge": "Yaml",
319
+ "arc_easy": "Yaml",
320
+ "gsm8k": "Yaml",
321
+ "hellaswag": "Yaml",
322
+ "piqa": "Yaml",
323
+ "truthfulqa_mc2": "Yaml",
324
+ "winogrande": "Yaml"
325
+ },
326
+ "n-shot": {
327
+ "arc_challenge": 0,
328
+ "arc_easy": 0,
329
+ "gsm8k": 5,
330
+ "hellaswag": 0,
331
+ "piqa": 0,
332
+ "truthfulqa_mc2": 0,
333
+ "winogrande": 0
334
+ },
335
+ "config": {
336
+ "model": "hf",
337
+ "model_args": "pretrained=Undi95/SolarMaid-v0.1.1",
338
+ "batch_size": "4",
339
+ "batch_sizes": [],
340
+ "device": "cuda:0",
341
+ "use_cache": null,
342
+ "limit": null,
343
+ "bootstrap_iters": 100000,
344
+ "gen_kwargs": null
345
+ },
346
+ "git_hash": "fcfc0c60"
347
+ }