amirali1985 commited on
Commit
09429c8
·
verified ·
1 Parent(s): 2cc8559

Upload add_sub_baseline_10K_1L3H510d

Browse files
add_sub_baseline_10K_1L3H510d/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention"
17
+ ],
18
+ "max_position_embeddings": 128,
19
+ "max_window_layers": 28,
20
+ "model_type": "qwen3",
21
+ "num_attention_heads": 3,
22
+ "num_hidden_layers": 1,
23
+ "num_key_value_heads": 3,
24
+ "pad_token_id": null,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_parameters": {
27
+ "rope_theta": 10000.0,
28
+ "rope_type": "default"
29
+ },
30
+ "sliding_window": null,
31
+ "tie_word_embeddings": false,
32
+ "transformers_version": "5.5.0",
33
+ "use_cache": true,
34
+ "use_sliding_window": false,
35
+ "vocab_size": 151645
36
+ }
add_sub_baseline_10K_1L3H510d/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_baseline_10K_1L3H510d/metrics.json ADDED
@@ -0,0 +1,831 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 200,
8
+ 250,
9
+ 300,
10
+ 350,
11
+ 400,
12
+ 450,
13
+ 500,
14
+ 550,
15
+ 600,
16
+ 650,
17
+ 700,
18
+ 750,
19
+ 800,
20
+ 850,
21
+ 900,
22
+ 950,
23
+ 1000,
24
+ 1050,
25
+ 1100,
26
+ 1150,
27
+ 1200,
28
+ 1250,
29
+ 1300,
30
+ 1350,
31
+ 1400,
32
+ 1450,
33
+ 1500,
34
+ 1550,
35
+ 1600,
36
+ 1650,
37
+ 1700,
38
+ 1750,
39
+ 1800,
40
+ 1850,
41
+ 1900,
42
+ 1950,
43
+ 2000,
44
+ 2050,
45
+ 2100,
46
+ 2150,
47
+ 2200,
48
+ 2250,
49
+ 2300,
50
+ 2350,
51
+ 2400,
52
+ 2450,
53
+ 2500,
54
+ 2550,
55
+ 2600,
56
+ 2650,
57
+ 2700,
58
+ 2750,
59
+ 2800,
60
+ 2850,
61
+ 2900,
62
+ 2950,
63
+ 3000,
64
+ 3050,
65
+ 3100
66
+ ],
67
+ "loss": [
68
+ 7.182804584503174,
69
+ 4.20333194732666,
70
+ 2.0677404403686523,
71
+ 1.829144835472107,
72
+ 1.8410581350326538,
73
+ 1.820699691772461,
74
+ 1.7079013586044312,
75
+ 1.655383825302124,
76
+ 1.6272556781768799,
77
+ 1.56789231300354,
78
+ 1.2935447692871094,
79
+ 0.8198140263557434,
80
+ 0.7896751761436462,
81
+ 0.7217035293579102,
82
+ 0.6449242830276489,
83
+ 0.6155810356140137,
84
+ 0.6273579001426697,
85
+ 0.5691636800765991,
86
+ 0.5195116400718689,
87
+ 0.5188413262367249,
88
+ 0.4571366012096405,
89
+ 0.46536850929260254,
90
+ 0.45766669511795044,
91
+ 0.4275817275047302,
92
+ 0.40487247705459595,
93
+ 0.4464508593082428,
94
+ 0.39323002099990845,
95
+ 0.39669889211654663,
96
+ 0.34110480546951294,
97
+ 0.3717530369758606,
98
+ 0.3865724503993988,
99
+ 0.36133044958114624,
100
+ 0.3357429802417755,
101
+ 0.3121393620967865,
102
+ 0.3670870363712311,
103
+ 0.2978810667991638,
104
+ 0.3120233118534088,
105
+ 0.34596773982048035,
106
+ 0.29005879163742065,
107
+ 0.34079042077064514,
108
+ 0.29599788784980774,
109
+ 0.30136188864707947,
110
+ 0.2756097614765167,
111
+ 0.3011065125465393,
112
+ 0.30379655957221985,
113
+ 0.2766466736793518,
114
+ 0.2690426707267761,
115
+ 0.3143273890018463,
116
+ 0.26247408986091614,
117
+ 0.27483493089675903,
118
+ 0.2900548279285431,
119
+ 0.2740040123462677,
120
+ 0.2515677511692047,
121
+ 0.2737451195716858,
122
+ 0.246206596493721,
123
+ 0.2460457980632782,
124
+ 0.2329309731721878,
125
+ 0.25465384125709534,
126
+ 0.2698599696159363,
127
+ 0.2485286146402359,
128
+ 0.24830453097820282,
129
+ 0.2897026538848877
130
+ ],
131
+ "base_loss": [
132
+ 7.182804584503174,
133
+ 4.20333194732666,
134
+ 2.0677404403686523,
135
+ 1.829144835472107,
136
+ 1.8410581350326538,
137
+ 1.820699691772461,
138
+ 1.7079013586044312,
139
+ 1.655383825302124,
140
+ 1.6272556781768799,
141
+ 1.56789231300354,
142
+ 1.2935447692871094,
143
+ 0.8198140263557434,
144
+ 0.7896751761436462,
145
+ 0.7217035293579102,
146
+ 0.6449242830276489,
147
+ 0.6155810356140137,
148
+ 0.6273579001426697,
149
+ 0.5691636800765991,
150
+ 0.5195116400718689,
151
+ 0.5188413262367249,
152
+ 0.4571366012096405,
153
+ 0.46536850929260254,
154
+ 0.45766669511795044,
155
+ 0.4275817275047302,
156
+ 0.40487247705459595,
157
+ 0.4464508593082428,
158
+ 0.39323002099990845,
159
+ 0.39669889211654663,
160
+ 0.34110480546951294,
161
+ 0.3717530369758606,
162
+ 0.3865724503993988,
163
+ 0.36133044958114624,
164
+ 0.3357429802417755,
165
+ 0.3121393620967865,
166
+ 0.3670870363712311,
167
+ 0.2978810667991638,
168
+ 0.3120233118534088,
169
+ 0.34596773982048035,
170
+ 0.29005879163742065,
171
+ 0.34079042077064514,
172
+ 0.29599788784980774,
173
+ 0.30136188864707947,
174
+ 0.2756097614765167,
175
+ 0.3011065125465393,
176
+ 0.30379655957221985,
177
+ 0.2766466736793518,
178
+ 0.2690426707267761,
179
+ 0.3143273890018463,
180
+ 0.26247408986091614,
181
+ 0.27483493089675903,
182
+ 0.2900548279285431,
183
+ 0.2740040123462677,
184
+ 0.2515677511692047,
185
+ 0.2737451195716858,
186
+ 0.246206596493721,
187
+ 0.2460457980632782,
188
+ 0.2329309731721878,
189
+ 0.25465384125709534,
190
+ 0.2698599696159363,
191
+ 0.2485286146402359,
192
+ 0.24830453097820282,
193
+ 0.2897026538848877
194
+ ],
195
+ "lr": [
196
+ 3.9200000000000004e-05,
197
+ 7.92e-05,
198
+ 7.994872780244471e-05,
199
+ 7.979084217550451e-05,
200
+ 7.952674320281786e-05,
201
+ 7.915713584145437e-05,
202
+ 7.868300668109943e-05,
203
+ 7.810562131055899e-05,
204
+ 7.742652093953451e-05,
205
+ 7.664751828468545e-05,
206
+ 7.57706927309605e-05,
207
+ 7.47983847811137e-05,
208
+ 7.373318980822093e-05,
209
+ 7.25779511278734e-05,
210
+ 7.133575240854014e-05,
211
+ 7.000990944035905e-05,
212
+ 6.860396128432721e-05,
213
+ 6.712166082551651e-05,
214
+ 6.556696475553043e-05,
215
+ 6.394402301094192e-05,
216
+ 6.225716769590408e-05,
217
+ 6.0510901518502626e-05,
218
+ 5.8709885771716645e-05,
219
+ 5.685892789107026e-05,
220
+ 5.4962968622187084e-05,
221
+ 5.3027068832501364e-05,
222
+ 5.1056396002328924e-05,
223
+ 4.9056210431357356e-05,
224
+ 4.703185119737419e-05,
225
+ 4.498872190471344e-05,
226
+ 4.293227626046202e-05,
227
+ 4.086800351692724e-05,
228
+ 3.880141381922381e-05,
229
+ 3.673802349709187e-05,
230
+ 3.468334034020644e-05,
231
+ 3.2642848896282794e-05,
232
+ 3.062199583122133e-05,
233
+ 2.862617539037015e-05,
234
+ 2.6660714999713274e-05,
235
+ 2.4730861045419232e-05,
236
+ 2.284176486970851e-05,
237
+ 2.099846902042102e-05,
238
+ 1.9205893790987304e-05,
239
+ 1.7468824086732586e-05,
240
+ 1.579189665257094e-05,
241
+ 1.4179587696182778e-05,
242
+ 1.2636200939713001e-05,
243
+ 1.1165856131883247e-05,
244
+ 9.772478051182794e-06,
245
+ 8.459786029491775e-06,
246
+ 7.231284024101261e-06,
247
+ 6.090251264630804e-06,
248
+ 5.039733499809587e-06,
249
+ 4.082534867486105e-06,
250
+ 3.221210409567612e-06,
251
+ 2.458059251869167e-06,
252
+ 1.7951184670772902e-06,
253
+ 1.2341576372097185e-06,
254
+ 7.766741300856728e-07,
255
+ 4.23889102415056e-07,
256
+ 1.7674424017557922e-07,
257
+ 3.58992449786566e-08
258
+ ],
259
+ "eval_step": [
260
+ 156,
261
+ 312,
262
+ 468,
263
+ 624,
264
+ 780,
265
+ 936,
266
+ 1092,
267
+ 1248,
268
+ 1404,
269
+ 1560,
270
+ 1716,
271
+ 1872,
272
+ 2028,
273
+ 2184,
274
+ 2340,
275
+ 2496,
276
+ 2652,
277
+ 2808,
278
+ 2964,
279
+ 3120
280
+ ],
281
+ "eval_epoch": [
282
+ 1,
283
+ 2,
284
+ 3,
285
+ 4,
286
+ 5,
287
+ 6,
288
+ 7,
289
+ 8,
290
+ 9,
291
+ 10,
292
+ 11,
293
+ 12,
294
+ 13,
295
+ 14,
296
+ 15,
297
+ 16,
298
+ 17,
299
+ 18,
300
+ 19,
301
+ 20
302
+ ],
303
+ "eval_accuracy": [
304
+ 0.0,
305
+ 0.0044444444444444444,
306
+ 0.006666666666666667,
307
+ 0.01888888888888889,
308
+ 0.057777777777777775,
309
+ 0.09444444444444444,
310
+ 0.11888888888888889,
311
+ 0.20222222222222222,
312
+ 0.24555555555555555,
313
+ 0.2611111111111111,
314
+ 0.32666666666666666,
315
+ 0.3011111111111111,
316
+ 0.33555555555555555,
317
+ 0.39222222222222225,
318
+ 0.41888888888888887,
319
+ 0.3888888888888889,
320
+ 0.4111111111111111,
321
+ 0.39666666666666667,
322
+ 0.41,
323
+ 0.42444444444444446
324
+ ]
325
+ },
326
+ "final_accuracy": 0.33625,
327
+ "sft_eval": {
328
+ "config": {
329
+ "ops": "add_sub",
330
+ "K": null,
331
+ "mode": "sft",
332
+ "n_digits": 6,
333
+ "n_per_split": 100
334
+ },
335
+ "splits": {
336
+ "add_S0": {
337
+ "full_accuracy": 0.65,
338
+ "n_examples": 100,
339
+ "per_subtask": {
340
+ "SA": {
341
+ "accuracy": 0.9404958677685951,
342
+ "count": 605
343
+ },
344
+ "SS": {
345
+ "accuracy": 0.9473684210526315,
346
+ "count": 95
347
+ }
348
+ }
349
+ },
350
+ "add_S1": {
351
+ "full_accuracy": 0.62,
352
+ "n_examples": 100,
353
+ "per_subtask": {
354
+ "SA": {
355
+ "accuracy": 0.946078431372549,
356
+ "count": 204
357
+ },
358
+ "SC": {
359
+ "accuracy": 0.9585798816568047,
360
+ "count": 169
361
+ },
362
+ "SS": {
363
+ "accuracy": 0.9354838709677419,
364
+ "count": 31
365
+ },
366
+ "UC": {
367
+ "accuracy": 0.9087837837837838,
368
+ "count": 296
369
+ }
370
+ }
371
+ },
372
+ "add_S2": {
373
+ "full_accuracy": 0.3,
374
+ "n_examples": 100,
375
+ "per_subtask": {
376
+ "SA": {
377
+ "accuracy": 0.9631901840490797,
378
+ "count": 163
379
+ },
380
+ "SC": {
381
+ "accuracy": 0.8923076923076924,
382
+ "count": 130
383
+ },
384
+ "SS": {
385
+ "accuracy": 0.8850574712643678,
386
+ "count": 87
387
+ },
388
+ "UC": {
389
+ "accuracy": 0.6945812807881774,
390
+ "count": 203
391
+ },
392
+ "US": {
393
+ "accuracy": 0.9401709401709402,
394
+ "count": 117
395
+ }
396
+ }
397
+ },
398
+ "add_S3": {
399
+ "full_accuracy": 0.2,
400
+ "n_examples": 100,
401
+ "per_subtask": {
402
+ "SA": {
403
+ "accuracy": 0.9834710743801653,
404
+ "count": 121
405
+ },
406
+ "SC": {
407
+ "accuracy": 0.9173553719008265,
408
+ "count": 121
409
+ },
410
+ "SS": {
411
+ "accuracy": 0.9795918367346939,
412
+ "count": 49
413
+ },
414
+ "UC": {
415
+ "accuracy": 0.6075268817204301,
416
+ "count": 186
417
+ },
418
+ "US": {
419
+ "accuracy": 0.6457399103139013,
420
+ "count": 223
421
+ }
422
+ }
423
+ },
424
+ "add_S4": {
425
+ "full_accuracy": 0.21,
426
+ "n_examples": 100,
427
+ "per_subtask": {
428
+ "SA": {
429
+ "accuracy": 0.9903846153846154,
430
+ "count": 104
431
+ },
432
+ "SC": {
433
+ "accuracy": 0.9622641509433962,
434
+ "count": 106
435
+ },
436
+ "SS": {
437
+ "accuracy": 1.0,
438
+ "count": 23
439
+ },
440
+ "UC": {
441
+ "accuracy": 0.675,
442
+ "count": 160
443
+ },
444
+ "US": {
445
+ "accuracy": 0.50814332247557,
446
+ "count": 307
447
+ }
448
+ }
449
+ },
450
+ "add_S5": {
451
+ "full_accuracy": 0.2,
452
+ "n_examples": 100,
453
+ "per_subtask": {
454
+ "SA": {
455
+ "accuracy": 1.0,
456
+ "count": 100
457
+ },
458
+ "SC": {
459
+ "accuracy": 0.96,
460
+ "count": 100
461
+ },
462
+ "UC": {
463
+ "accuracy": 0.39,
464
+ "count": 100
465
+ },
466
+ "US": {
467
+ "accuracy": 0.4575,
468
+ "count": 400
469
+ }
470
+ }
471
+ },
472
+ "add_S6": {
473
+ "full_accuracy": 0.32,
474
+ "n_examples": 100,
475
+ "per_subtask": {
476
+ "SC": {
477
+ "accuracy": 1.0,
478
+ "count": 100
479
+ },
480
+ "UC": {
481
+ "accuracy": 0.43,
482
+ "count": 100
483
+ },
484
+ "US": {
485
+ "accuracy": 0.424,
486
+ "count": 500
487
+ }
488
+ }
489
+ },
490
+ "add_random": {
491
+ "full_accuracy": 0.625,
492
+ "n_examples": 200,
493
+ "per_subtask": {
494
+ "SA": {
495
+ "accuracy": 0.9686800894854586,
496
+ "count": 447
497
+ },
498
+ "SC": {
499
+ "accuracy": 0.953125,
500
+ "count": 320
501
+ },
502
+ "SS": {
503
+ "accuracy": 0.9107142857142857,
504
+ "count": 56
505
+ },
506
+ "UC": {
507
+ "accuracy": 0.888468809073724,
508
+ "count": 529
509
+ },
510
+ "US": {
511
+ "accuracy": 0.8541666666666666,
512
+ "count": 48
513
+ }
514
+ }
515
+ },
516
+ "add_C3": {
517
+ "full_accuracy": 0.41,
518
+ "n_examples": 100,
519
+ "per_subtask": {
520
+ "SA": {
521
+ "accuracy": 0.98,
522
+ "count": 300
523
+ },
524
+ "SC": {
525
+ "accuracy": 1.0,
526
+ "count": 100
527
+ },
528
+ "UC": {
529
+ "accuracy": 0.689119170984456,
530
+ "count": 193
531
+ },
532
+ "US": {
533
+ "accuracy": 0.794392523364486,
534
+ "count": 107
535
+ }
536
+ }
537
+ },
538
+ "add_C4": {
539
+ "full_accuracy": 0.35,
540
+ "n_examples": 100,
541
+ "per_subtask": {
542
+ "SA": {
543
+ "accuracy": 0.985,
544
+ "count": 200
545
+ },
546
+ "SC": {
547
+ "accuracy": 0.97,
548
+ "count": 100
549
+ },
550
+ "UC": {
551
+ "accuracy": 0.70703125,
552
+ "count": 256
553
+ },
554
+ "US": {
555
+ "accuracy": 0.7222222222222222,
556
+ "count": 144
557
+ }
558
+ }
559
+ },
560
+ "add_C5": {
561
+ "full_accuracy": 0.24,
562
+ "n_examples": 100,
563
+ "per_subtask": {
564
+ "SA": {
565
+ "accuracy": 1.0,
566
+ "count": 100
567
+ },
568
+ "SC": {
569
+ "accuracy": 0.93,
570
+ "count": 100
571
+ },
572
+ "UC": {
573
+ "accuracy": 0.696078431372549,
574
+ "count": 306
575
+ },
576
+ "US": {
577
+ "accuracy": 0.7731958762886598,
578
+ "count": 194
579
+ }
580
+ }
581
+ },
582
+ "add_C6": {
583
+ "full_accuracy": 0.2,
584
+ "n_examples": 100,
585
+ "per_subtask": {
586
+ "SC": {
587
+ "accuracy": 1.0,
588
+ "count": 100
589
+ },
590
+ "UC": {
591
+ "accuracy": 0.7295081967213115,
592
+ "count": 366
593
+ },
594
+ "US": {
595
+ "accuracy": 0.8076923076923077,
596
+ "count": 234
597
+ }
598
+ }
599
+ },
600
+ "sub_M0": {
601
+ "full_accuracy": 0.74,
602
+ "n_examples": 100,
603
+ "per_subtask": {
604
+ "MD": {
605
+ "accuracy": 0.9500831946755408,
606
+ "count": 601
607
+ },
608
+ "ME": {
609
+ "accuracy": 1.0,
610
+ "count": 99
611
+ }
612
+ }
613
+ },
614
+ "sub_M1": {
615
+ "full_accuracy": 0.5,
616
+ "n_examples": 100,
617
+ "per_subtask": {
618
+ "MD": {
619
+ "accuracy": 0.9605734767025089,
620
+ "count": 279
621
+ },
622
+ "MB": {
623
+ "accuracy": 0.9655172413793104,
624
+ "count": 145
625
+ },
626
+ "ME": {
627
+ "accuracy": 0.875,
628
+ "count": 24
629
+ },
630
+ "UB": {
631
+ "accuracy": 0.8174603174603174,
632
+ "count": 252
633
+ }
634
+ }
635
+ },
636
+ "sub_M2": {
637
+ "full_accuracy": 0.23,
638
+ "n_examples": 100,
639
+ "per_subtask": {
640
+ "MD": {
641
+ "accuracy": 0.9671361502347418,
642
+ "count": 213
643
+ },
644
+ "MB": {
645
+ "accuracy": 0.9646017699115044,
646
+ "count": 113
647
+ },
648
+ "ME": {
649
+ "accuracy": 0.9176470588235294,
650
+ "count": 85
651
+ },
652
+ "UB": {
653
+ "accuracy": 0.6077348066298343,
654
+ "count": 181
655
+ },
656
+ "UD": {
657
+ "accuracy": 0.8425925925925926,
658
+ "count": 108
659
+ }
660
+ }
661
+ },
662
+ "sub_M3": {
663
+ "full_accuracy": 0.1,
664
+ "n_examples": 100,
665
+ "per_subtask": {
666
+ "MD": {
667
+ "accuracy": 0.994413407821229,
668
+ "count": 179
669
+ },
670
+ "MB": {
671
+ "accuracy": 0.9029126213592233,
672
+ "count": 103
673
+ },
674
+ "ME": {
675
+ "accuracy": 1.0,
676
+ "count": 56
677
+ },
678
+ "UB": {
679
+ "accuracy": 0.4966442953020134,
680
+ "count": 149
681
+ },
682
+ "UD": {
683
+ "accuracy": 0.5821596244131455,
684
+ "count": 213
685
+ }
686
+ }
687
+ },
688
+ "sub_M4": {
689
+ "full_accuracy": 0.11,
690
+ "n_examples": 100,
691
+ "per_subtask": {
692
+ "MD": {
693
+ "accuracy": 0.995,
694
+ "count": 200
695
+ },
696
+ "MB": {
697
+ "accuracy": 1.0,
698
+ "count": 100
699
+ },
700
+ "UB": {
701
+ "accuracy": 0.44,
702
+ "count": 100
703
+ },
704
+ "UD": {
705
+ "accuracy": 0.33666666666666667,
706
+ "count": 300
707
+ }
708
+ }
709
+ },
710
+ "sub_M5": {
711
+ "full_accuracy": 0.06,
712
+ "n_examples": 100,
713
+ "per_subtask": {
714
+ "MD": {
715
+ "accuracy": 1.0,
716
+ "count": 100
717
+ },
718
+ "MB": {
719
+ "accuracy": 0.99,
720
+ "count": 100
721
+ },
722
+ "UB": {
723
+ "accuracy": 0.47,
724
+ "count": 100
725
+ },
726
+ "UD": {
727
+ "accuracy": 0.27,
728
+ "count": 400
729
+ }
730
+ }
731
+ },
732
+ "sub_random": {
733
+ "full_accuracy": 0.47,
734
+ "n_examples": 200,
735
+ "per_subtask": {
736
+ "MD": {
737
+ "accuracy": 0.955,
738
+ "count": 600
739
+ },
740
+ "MB": {
741
+ "accuracy": 0.9400749063670412,
742
+ "count": 267
743
+ },
744
+ "ME": {
745
+ "accuracy": 0.9622641509433962,
746
+ "count": 53
747
+ },
748
+ "UB": {
749
+ "accuracy": 0.8223234624145785,
750
+ "count": 439
751
+ },
752
+ "UD": {
753
+ "accuracy": 0.9024390243902439,
754
+ "count": 41
755
+ }
756
+ }
757
+ },
758
+ "sub_B3": {
759
+ "full_accuracy": 0.2,
760
+ "n_examples": 100,
761
+ "per_subtask": {
762
+ "MD": {
763
+ "accuracy": 0.9833333333333333,
764
+ "count": 300
765
+ },
766
+ "MB": {
767
+ "accuracy": 0.98,
768
+ "count": 100
769
+ },
770
+ "UB": {
771
+ "accuracy": 0.5736040609137056,
772
+ "count": 197
773
+ },
774
+ "UD": {
775
+ "accuracy": 0.5825242718446602,
776
+ "count": 103
777
+ }
778
+ }
779
+ },
780
+ "sub_B4": {
781
+ "full_accuracy": 0.11,
782
+ "n_examples": 100,
783
+ "per_subtask": {
784
+ "MD": {
785
+ "accuracy": 0.985,
786
+ "count": 200
787
+ },
788
+ "MB": {
789
+ "accuracy": 0.99,
790
+ "count": 100
791
+ },
792
+ "UB": {
793
+ "accuracy": 0.6153846153846154,
794
+ "count": 247
795
+ },
796
+ "UD": {
797
+ "accuracy": 0.49673202614379086,
798
+ "count": 153
799
+ }
800
+ }
801
+ },
802
+ "sub_B5": {
803
+ "full_accuracy": 0.13,
804
+ "n_examples": 100,
805
+ "per_subtask": {
806
+ "MD": {
807
+ "accuracy": 1.0,
808
+ "count": 100
809
+ },
810
+ "MB": {
811
+ "accuracy": 0.99,
812
+ "count": 100
813
+ },
814
+ "UB": {
815
+ "accuracy": 0.6644295302013423,
816
+ "count": 298
817
+ },
818
+ "UD": {
819
+ "accuracy": 0.5247524752475248,
820
+ "count": 202
821
+ }
822
+ }
823
+ }
824
+ },
825
+ "summary": {
826
+ "overall_accuracy": 0.33625,
827
+ "total_examples": 2400,
828
+ "n_splits": 22
829
+ }
830
+ }
831
+ }
add_sub_baseline_10K_1L3H510d/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb08c950f588dd85bf054c11d3921a45bb77e8a8be21e8944ddd67270255396
3
+ size 634642298
add_sub_baseline_10K_1L3H510d/train_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 4,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_baseline_10K_1L3H510d",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 1,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 0,
65
+ "dataset_size": 10000,
66
+ "mode": "baseline",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 158584246,
71
+ "run_name": "add_sub_baseline_10K_1L3H510d",
72
+ "git_commit": "7c8a203ce79a277a1e41a3ec0648cb73d5b2b760",
73
+ "timestamp": "2026-04-14T02:09:33.297198+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "model_repo": "thoughtworks/arithmetic-sorl",
78
+ "trainer_version": "sft",
79
+ "wandb_run_id": "6ykzfg1p",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/6ykzfg1p",
81
+ "final_accuracy": 0.33625,
82
+ "sft_accuracy": 0.33625,
83
+ "eval_method": "ArithmeticEvaluator"
84
+ }