Frikallo commited on
Commit
0f3e953
1 Parent(s): c4152be

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +547 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.530629653492193,
4
+ "train_runtime": 110.6495,
5
+ "train_samples": 435,
6
+ "train_samples_per_second": 3.931,
7
+ "train_steps_per_second": 3.931
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.530629653492193,
4
+ "train_runtime": 110.6495,
5
+ "train_samples": 435,
6
+ "train_samples_per_second": 3.931,
7
+ "train_steps_per_second": 3.931
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 435,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 0.00013562298850574713,
13
+ "loss": 4.7615,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.02,
18
+ "learning_rate": 0.00013404597701149426,
19
+ "loss": 3.6623,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.03,
24
+ "learning_rate": 0.0001324689655172414,
25
+ "loss": 4.0255,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.05,
30
+ "learning_rate": 0.0001308919540229885,
31
+ "loss": 4.4715,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.06,
36
+ "learning_rate": 0.00012931494252873562,
37
+ "loss": 3.5876,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.07,
42
+ "learning_rate": 0.00012773793103448275,
43
+ "loss": 2.8973,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.08,
48
+ "learning_rate": 0.00012616091954022987,
49
+ "loss": 3.3497,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.09,
54
+ "learning_rate": 0.000124583908045977,
55
+ "loss": 2.6265,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.1,
60
+ "learning_rate": 0.00012300689655172413,
61
+ "loss": 3.152,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.11,
66
+ "learning_rate": 0.00012142988505747127,
67
+ "loss": 2.5628,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.13,
72
+ "learning_rate": 0.0001198528735632184,
73
+ "loss": 1.9529,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.14,
78
+ "learning_rate": 0.00011827586206896552,
79
+ "loss": 3.2591,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.15,
84
+ "learning_rate": 0.00011669885057471265,
85
+ "loss": 1.6728,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.16,
90
+ "learning_rate": 0.00011512183908045976,
91
+ "loss": 2.6952,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.17,
96
+ "learning_rate": 0.00011354482758620689,
97
+ "loss": 2.9556,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.18,
102
+ "learning_rate": 0.00011196781609195402,
103
+ "loss": 2.195,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.2,
108
+ "learning_rate": 0.00011039080459770115,
109
+ "loss": 1.7066,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.21,
114
+ "learning_rate": 0.00010881379310344828,
115
+ "loss": 1.5754,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.22,
120
+ "learning_rate": 0.0001072367816091954,
121
+ "loss": 1.8903,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.23,
126
+ "learning_rate": 0.00010565977011494252,
127
+ "loss": 2.3167,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.24,
132
+ "learning_rate": 0.00010408275862068965,
133
+ "loss": 2.2342,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.25,
138
+ "learning_rate": 0.00010250574712643678,
139
+ "loss": 2.1345,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.26,
144
+ "learning_rate": 0.0001009287356321839,
145
+ "loss": 1.7189,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.28,
150
+ "learning_rate": 9.935172413793103e-05,
151
+ "loss": 1.1786,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.29,
156
+ "learning_rate": 9.777471264367816e-05,
157
+ "loss": 2.1098,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.3,
162
+ "learning_rate": 9.619770114942529e-05,
163
+ "loss": 2.0657,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.31,
168
+ "learning_rate": 9.462068965517242e-05,
169
+ "loss": 1.5821,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.32,
174
+ "learning_rate": 9.304367816091953e-05,
175
+ "loss": 1.8502,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.33,
180
+ "learning_rate": 9.146666666666666e-05,
181
+ "loss": 1.4641,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.34,
186
+ "learning_rate": 8.988965517241379e-05,
187
+ "loss": 1.4556,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.36,
192
+ "learning_rate": 8.831264367816092e-05,
193
+ "loss": 0.5807,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.37,
198
+ "learning_rate": 8.673563218390805e-05,
199
+ "loss": 1.8225,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.38,
204
+ "learning_rate": 8.515862068965518e-05,
205
+ "loss": 0.8647,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.39,
210
+ "learning_rate": 8.35816091954023e-05,
211
+ "loss": 2.1795,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.4,
216
+ "learning_rate": 8.200459770114943e-05,
217
+ "loss": 0.52,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.41,
222
+ "learning_rate": 8.042758620689655e-05,
223
+ "loss": 1.325,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.43,
228
+ "learning_rate": 7.885057471264368e-05,
229
+ "loss": 0.8714,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.44,
234
+ "learning_rate": 7.72735632183908e-05,
235
+ "loss": 1.5474,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.45,
240
+ "learning_rate": 7.569655172413794e-05,
241
+ "loss": 1.579,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.46,
246
+ "learning_rate": 7.411954022988506e-05,
247
+ "loss": 0.8652,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.47,
252
+ "learning_rate": 7.254252873563219e-05,
253
+ "loss": 1.6002,
254
+ "step": 205
255
+ },
256
+ {
257
+ "epoch": 0.48,
258
+ "learning_rate": 7.096551724137932e-05,
259
+ "loss": 1.5124,
260
+ "step": 210
261
+ },
262
+ {
263
+ "epoch": 0.49,
264
+ "learning_rate": 6.938850574712645e-05,
265
+ "loss": 0.8354,
266
+ "step": 215
267
+ },
268
+ {
269
+ "epoch": 0.51,
270
+ "learning_rate": 6.781149425287356e-05,
271
+ "loss": 1.1408,
272
+ "step": 220
273
+ },
274
+ {
275
+ "epoch": 0.52,
276
+ "learning_rate": 6.62344827586207e-05,
277
+ "loss": 0.7256,
278
+ "step": 225
279
+ },
280
+ {
281
+ "epoch": 0.53,
282
+ "learning_rate": 6.465747126436781e-05,
283
+ "loss": 1.1633,
284
+ "step": 230
285
+ },
286
+ {
287
+ "epoch": 0.54,
288
+ "learning_rate": 6.308045977011494e-05,
289
+ "loss": 1.0975,
290
+ "step": 235
291
+ },
292
+ {
293
+ "epoch": 0.55,
294
+ "learning_rate": 6.150344827586207e-05,
295
+ "loss": 0.7067,
296
+ "step": 240
297
+ },
298
+ {
299
+ "epoch": 0.56,
300
+ "learning_rate": 5.99264367816092e-05,
301
+ "loss": 1.6993,
302
+ "step": 245
303
+ },
304
+ {
305
+ "epoch": 0.57,
306
+ "learning_rate": 5.834942528735632e-05,
307
+ "loss": 1.0712,
308
+ "step": 250
309
+ },
310
+ {
311
+ "epoch": 0.59,
312
+ "learning_rate": 5.6772413793103445e-05,
313
+ "loss": 1.6506,
314
+ "step": 255
315
+ },
316
+ {
317
+ "epoch": 0.6,
318
+ "learning_rate": 5.519540229885057e-05,
319
+ "loss": 1.2777,
320
+ "step": 260
321
+ },
322
+ {
323
+ "epoch": 0.61,
324
+ "learning_rate": 5.36183908045977e-05,
325
+ "loss": 1.5069,
326
+ "step": 265
327
+ },
328
+ {
329
+ "epoch": 0.62,
330
+ "learning_rate": 5.2041379310344824e-05,
331
+ "loss": 0.6828,
332
+ "step": 270
333
+ },
334
+ {
335
+ "epoch": 0.63,
336
+ "learning_rate": 5.046436781609195e-05,
337
+ "loss": 0.9765,
338
+ "step": 275
339
+ },
340
+ {
341
+ "epoch": 0.64,
342
+ "learning_rate": 4.888735632183908e-05,
343
+ "loss": 0.9234,
344
+ "step": 280
345
+ },
346
+ {
347
+ "epoch": 0.66,
348
+ "learning_rate": 4.731034482758621e-05,
349
+ "loss": 1.2876,
350
+ "step": 285
351
+ },
352
+ {
353
+ "epoch": 0.67,
354
+ "learning_rate": 4.573333333333333e-05,
355
+ "loss": 0.9512,
356
+ "step": 290
357
+ },
358
+ {
359
+ "epoch": 0.68,
360
+ "learning_rate": 4.415632183908046e-05,
361
+ "loss": 0.8959,
362
+ "step": 295
363
+ },
364
+ {
365
+ "epoch": 0.69,
366
+ "learning_rate": 4.257931034482759e-05,
367
+ "loss": 0.8888,
368
+ "step": 300
369
+ },
370
+ {
371
+ "epoch": 0.7,
372
+ "learning_rate": 4.100229885057472e-05,
373
+ "loss": 1.4088,
374
+ "step": 305
375
+ },
376
+ {
377
+ "epoch": 0.71,
378
+ "learning_rate": 3.942528735632184e-05,
379
+ "loss": 1.1202,
380
+ "step": 310
381
+ },
382
+ {
383
+ "epoch": 0.72,
384
+ "learning_rate": 3.784827586206897e-05,
385
+ "loss": 1.1273,
386
+ "step": 315
387
+ },
388
+ {
389
+ "epoch": 0.74,
390
+ "learning_rate": 3.6271264367816096e-05,
391
+ "loss": 0.3291,
392
+ "step": 320
393
+ },
394
+ {
395
+ "epoch": 0.75,
396
+ "learning_rate": 3.4694252873563225e-05,
397
+ "loss": 1.4211,
398
+ "step": 325
399
+ },
400
+ {
401
+ "epoch": 0.76,
402
+ "learning_rate": 3.311724137931035e-05,
403
+ "loss": 0.8079,
404
+ "step": 330
405
+ },
406
+ {
407
+ "epoch": 0.77,
408
+ "learning_rate": 3.154022988505747e-05,
409
+ "loss": 0.8223,
410
+ "step": 335
411
+ },
412
+ {
413
+ "epoch": 0.78,
414
+ "learning_rate": 2.99632183908046e-05,
415
+ "loss": 1.0104,
416
+ "step": 340
417
+ },
418
+ {
419
+ "epoch": 0.79,
420
+ "learning_rate": 2.8386206896551722e-05,
421
+ "loss": 0.7821,
422
+ "step": 345
423
+ },
424
+ {
425
+ "epoch": 0.8,
426
+ "learning_rate": 2.680919540229885e-05,
427
+ "loss": 1.0688,
428
+ "step": 350
429
+ },
430
+ {
431
+ "epoch": 0.82,
432
+ "learning_rate": 2.5232183908045976e-05,
433
+ "loss": 1.0253,
434
+ "step": 355
435
+ },
436
+ {
437
+ "epoch": 0.83,
438
+ "learning_rate": 2.3655172413793105e-05,
439
+ "loss": 0.7701,
440
+ "step": 360
441
+ },
442
+ {
443
+ "epoch": 0.84,
444
+ "learning_rate": 2.207816091954023e-05,
445
+ "loss": 0.4998,
446
+ "step": 365
447
+ },
448
+ {
449
+ "epoch": 0.85,
450
+ "learning_rate": 2.050114942528736e-05,
451
+ "loss": 0.555,
452
+ "step": 370
453
+ },
454
+ {
455
+ "epoch": 0.86,
456
+ "learning_rate": 1.8924137931034484e-05,
457
+ "loss": 0.7096,
458
+ "step": 375
459
+ },
460
+ {
461
+ "epoch": 0.87,
462
+ "learning_rate": 1.7347126436781612e-05,
463
+ "loss": 0.5239,
464
+ "step": 380
465
+ },
466
+ {
467
+ "epoch": 0.89,
468
+ "learning_rate": 1.5770114942528734e-05,
469
+ "loss": 0.761,
470
+ "step": 385
471
+ },
472
+ {
473
+ "epoch": 0.9,
474
+ "learning_rate": 1.4193103448275861e-05,
475
+ "loss": 0.5269,
476
+ "step": 390
477
+ },
478
+ {
479
+ "epoch": 0.91,
480
+ "learning_rate": 1.2616091954022988e-05,
481
+ "loss": 0.5471,
482
+ "step": 395
483
+ },
484
+ {
485
+ "epoch": 0.92,
486
+ "learning_rate": 1.1039080459770115e-05,
487
+ "loss": 1.1833,
488
+ "step": 400
489
+ },
490
+ {
491
+ "epoch": 0.93,
492
+ "learning_rate": 9.462068965517242e-06,
493
+ "loss": 0.7521,
494
+ "step": 405
495
+ },
496
+ {
497
+ "epoch": 0.94,
498
+ "learning_rate": 7.885057471264367e-06,
499
+ "loss": 0.5307,
500
+ "step": 410
501
+ },
502
+ {
503
+ "epoch": 0.95,
504
+ "learning_rate": 6.308045977011494e-06,
505
+ "loss": 1.1967,
506
+ "step": 415
507
+ },
508
+ {
509
+ "epoch": 0.97,
510
+ "learning_rate": 4.731034482758621e-06,
511
+ "loss": 1.2001,
512
+ "step": 420
513
+ },
514
+ {
515
+ "epoch": 0.98,
516
+ "learning_rate": 3.154022988505747e-06,
517
+ "loss": 0.962,
518
+ "step": 425
519
+ },
520
+ {
521
+ "epoch": 0.99,
522
+ "learning_rate": 1.5770114942528735e-06,
523
+ "loss": 0.7309,
524
+ "step": 430
525
+ },
526
+ {
527
+ "epoch": 1.0,
528
+ "learning_rate": 0.0,
529
+ "loss": 0.9335,
530
+ "step": 435
531
+ },
532
+ {
533
+ "epoch": 1.0,
534
+ "step": 435,
535
+ "total_flos": 227324067840000.0,
536
+ "train_loss": 1.530629653492193,
537
+ "train_runtime": 110.6495,
538
+ "train_samples_per_second": 3.931,
539
+ "train_steps_per_second": 3.931
540
+ }
541
+ ],
542
+ "max_steps": 435,
543
+ "num_train_epochs": 1,
544
+ "total_flos": 227324067840000.0,
545
+ "trial_name": null,
546
+ "trial_params": null
547
+ }