maesneako commited on
Commit
d2ec230
1 Parent(s): a3ffcb5

Training in progress, step 2000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9920ab3b7fcf347744f3df6a4d73d6e5424b66f7f9cac3561d3623f5918f8297
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1edf4f86c6269a79267fb4ce7d98eb3f9f313b813ec70cc44a8efe5c7a9bdf8
3
  size 995604017
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20e6bbfaf75a65090cf93fcbf806e6c68aa1ba846abaa532e34d979a48354dd6
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be2cdd04b6fd3a2b0e3d3da36df5b1fc4508e842028d912c36c98523fa24cdd
3
  size 510396521
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac2d72c15bf5b0aed76c4f006a7cbdf876848f08a7f53ee5df9432db53697c40
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d28830c94d92b6d19326a0184936f2862087877e8af1a427622e66aefb96fa
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e96cb9ff72c5907038895fe00e41f20b04c0701ff63b53fc3111d5dcbacfe411
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7fd4940c48846b401f583408aac64a7d546cbc01ca65e30e622a1b591fafa7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,592 +1,48 @@
1
  {
2
- "best_metric": 3.926462411880493,
3
  "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
4
- "epoch": 7.137192704203014,
5
- "global_step": 36000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.1,
12
- "learning_rate": 1.9880573248407645e-05,
13
- "loss": 4.1429,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.2,
18
- "learning_rate": 1.9681528662420383e-05,
19
- "loss": 3.7366,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 0.3,
24
- "learning_rate": 1.9482484076433124e-05,
25
- "loss": 3.5836,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 0.4,
30
- "learning_rate": 1.9283439490445862e-05,
31
- "loss": 3.4588,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 0.4,
36
- "eval_loss": 3.926462411880493,
37
- "eval_runtime": 958.5106,
38
- "eval_samples_per_second": 42.233,
39
- "eval_steps_per_second": 2.641,
40
- "step": 2000
41
- },
42
- {
43
- "epoch": 0.5,
44
- "learning_rate": 1.90843949044586e-05,
45
- "loss": 3.3359,
46
- "step": 2500
47
- },
48
- {
49
- "epoch": 0.59,
50
- "learning_rate": 1.8885350318471337e-05,
51
- "loss": 3.2161,
52
- "step": 3000
53
- },
54
- {
55
- "epoch": 0.69,
56
- "learning_rate": 1.868630573248408e-05,
57
- "loss": 3.122,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.79,
62
- "learning_rate": 1.8487261146496816e-05,
63
- "loss": 3.0293,
64
- "step": 4000
65
- },
66
- {
67
- "epoch": 0.79,
68
- "eval_loss": 4.006344795227051,
69
- "eval_runtime": 958.9036,
70
- "eval_samples_per_second": 42.216,
71
- "eval_steps_per_second": 2.639,
72
- "step": 4000
73
- },
74
- {
75
- "epoch": 0.89,
76
- "learning_rate": 1.8288216560509554e-05,
77
- "loss": 2.9397,
78
- "step": 4500
79
- },
80
- {
81
- "epoch": 0.99,
82
- "learning_rate": 1.8089171974522295e-05,
83
- "loss": 2.8495,
84
- "step": 5000
85
- },
86
- {
87
- "epoch": 1.09,
88
- "learning_rate": 1.7890127388535033e-05,
89
- "loss": 2.7033,
90
- "step": 5500
91
- },
92
- {
93
- "epoch": 1.19,
94
- "learning_rate": 1.769108280254777e-05,
95
- "loss": 2.6331,
96
- "step": 6000
97
- },
98
- {
99
- "epoch": 1.19,
100
- "eval_loss": 4.1308207511901855,
101
- "eval_runtime": 958.6456,
102
- "eval_samples_per_second": 42.227,
103
  "eval_steps_per_second": 2.64,
104
- "step": 6000
105
- },
106
- {
107
- "epoch": 1.29,
108
- "learning_rate": 1.7492038216560512e-05,
109
- "loss": 2.5481,
110
- "step": 6500
111
- },
112
- {
113
- "epoch": 1.39,
114
- "learning_rate": 1.729299363057325e-05,
115
- "loss": 2.4854,
116
- "step": 7000
117
- },
118
- {
119
- "epoch": 1.49,
120
- "learning_rate": 1.7093949044585988e-05,
121
- "loss": 2.4066,
122
- "step": 7500
123
- },
124
- {
125
- "epoch": 1.59,
126
- "learning_rate": 1.6894904458598726e-05,
127
- "loss": 2.3432,
128
- "step": 8000
129
- },
130
- {
131
- "epoch": 1.59,
132
- "eval_loss": 4.2555365562438965,
133
- "eval_runtime": 959.2942,
134
- "eval_samples_per_second": 42.199,
135
- "eval_steps_per_second": 2.638,
136
- "step": 8000
137
- },
138
- {
139
- "epoch": 1.69,
140
- "learning_rate": 1.6695859872611467e-05,
141
- "loss": 2.2699,
142
- "step": 8500
143
- },
144
- {
145
- "epoch": 1.78,
146
- "learning_rate": 1.6496815286624205e-05,
147
- "loss": 2.1978,
148
- "step": 9000
149
- },
150
- {
151
- "epoch": 1.88,
152
- "learning_rate": 1.6297770700636943e-05,
153
- "loss": 2.1452,
154
- "step": 9500
155
- },
156
- {
157
- "epoch": 1.98,
158
- "learning_rate": 1.6098726114649684e-05,
159
- "loss": 2.0791,
160
- "step": 10000
161
- },
162
- {
163
- "epoch": 1.98,
164
- "eval_loss": 4.388513088226318,
165
- "eval_runtime": 958.7919,
166
- "eval_samples_per_second": 42.221,
167
- "eval_steps_per_second": 2.64,
168
- "step": 10000
169
- },
170
- {
171
- "epoch": 2.08,
172
- "learning_rate": 1.589968152866242e-05,
173
- "loss": 1.9673,
174
- "step": 10500
175
- },
176
- {
177
- "epoch": 2.18,
178
- "learning_rate": 1.570063694267516e-05,
179
- "loss": 1.9147,
180
- "step": 11000
181
- },
182
- {
183
- "epoch": 2.28,
184
- "learning_rate": 1.55015923566879e-05,
185
- "loss": 1.8628,
186
- "step": 11500
187
- },
188
- {
189
- "epoch": 2.38,
190
- "learning_rate": 1.530254777070064e-05,
191
- "loss": 1.7991,
192
- "step": 12000
193
- },
194
- {
195
- "epoch": 2.38,
196
- "eval_loss": 4.536414623260498,
197
- "eval_runtime": 958.7918,
198
- "eval_samples_per_second": 42.221,
199
- "eval_steps_per_second": 2.64,
200
- "step": 12000
201
- },
202
- {
203
- "epoch": 2.48,
204
- "learning_rate": 1.5103503184713378e-05,
205
- "loss": 1.755,
206
- "step": 12500
207
- },
208
- {
209
- "epoch": 2.58,
210
- "learning_rate": 1.4904458598726114e-05,
211
- "loss": 1.699,
212
- "step": 13000
213
- },
214
- {
215
- "epoch": 2.68,
216
- "learning_rate": 1.4705414012738855e-05,
217
- "loss": 1.6565,
218
- "step": 13500
219
- },
220
- {
221
- "epoch": 2.78,
222
- "learning_rate": 1.4506369426751595e-05,
223
- "loss": 1.6005,
224
- "step": 14000
225
- },
226
- {
227
- "epoch": 2.78,
228
- "eval_loss": 4.665872097015381,
229
- "eval_runtime": 958.5203,
230
- "eval_samples_per_second": 42.233,
231
- "eval_steps_per_second": 2.641,
232
- "step": 14000
233
- },
234
- {
235
- "epoch": 2.87,
236
- "learning_rate": 1.4307324840764331e-05,
237
- "loss": 1.5739,
238
- "step": 14500
239
- },
240
- {
241
- "epoch": 2.97,
242
- "learning_rate": 1.4108280254777072e-05,
243
- "loss": 1.5312,
244
- "step": 15000
245
- },
246
- {
247
- "epoch": 3.07,
248
- "learning_rate": 1.3909235668789812e-05,
249
- "loss": 1.4352,
250
- "step": 15500
251
- },
252
- {
253
- "epoch": 3.17,
254
- "learning_rate": 1.371019108280255e-05,
255
- "loss": 1.4018,
256
- "step": 16000
257
- },
258
- {
259
- "epoch": 3.17,
260
- "eval_loss": 4.807626724243164,
261
- "eval_runtime": 959.3471,
262
- "eval_samples_per_second": 42.196,
263
- "eval_steps_per_second": 2.638,
264
- "step": 16000
265
- },
266
- {
267
- "epoch": 3.27,
268
- "learning_rate": 1.3511146496815289e-05,
269
- "loss": 1.362,
270
- "step": 16500
271
- },
272
- {
273
- "epoch": 3.37,
274
- "learning_rate": 1.3312101910828025e-05,
275
- "loss": 1.3312,
276
- "step": 17000
277
- },
278
- {
279
- "epoch": 3.47,
280
- "learning_rate": 1.3113057324840766e-05,
281
- "loss": 1.2996,
282
- "step": 17500
283
- },
284
- {
285
- "epoch": 3.57,
286
- "learning_rate": 1.2914012738853506e-05,
287
- "loss": 1.2584,
288
- "step": 18000
289
- },
290
- {
291
- "epoch": 3.57,
292
- "eval_loss": 4.938758373260498,
293
- "eval_runtime": 959.0042,
294
- "eval_samples_per_second": 42.211,
295
- "eval_steps_per_second": 2.639,
296
- "step": 18000
297
- },
298
- {
299
- "epoch": 3.67,
300
- "learning_rate": 1.2714968152866244e-05,
301
- "loss": 1.2251,
302
- "step": 18500
303
- },
304
- {
305
- "epoch": 3.77,
306
- "learning_rate": 1.2515923566878983e-05,
307
- "loss": 1.1985,
308
- "step": 19000
309
- },
310
- {
311
- "epoch": 3.87,
312
- "learning_rate": 1.231687898089172e-05,
313
- "loss": 1.1689,
314
- "step": 19500
315
- },
316
- {
317
- "epoch": 3.97,
318
- "learning_rate": 1.211783439490446e-05,
319
- "loss": 1.1476,
320
- "step": 20000
321
- },
322
- {
323
- "epoch": 3.97,
324
- "eval_loss": 5.052674293518066,
325
- "eval_runtime": 959.1305,
326
- "eval_samples_per_second": 42.206,
327
- "eval_steps_per_second": 2.639,
328
- "step": 20000
329
- },
330
- {
331
- "epoch": 4.06,
332
- "learning_rate": 1.19187898089172e-05,
333
- "loss": 1.0804,
334
- "step": 20500
335
- },
336
- {
337
- "epoch": 4.16,
338
- "learning_rate": 1.1719745222929938e-05,
339
- "loss": 1.0489,
340
- "step": 21000
341
- },
342
- {
343
- "epoch": 4.26,
344
- "learning_rate": 1.1520700636942677e-05,
345
- "loss": 1.0267,
346
- "step": 21500
347
- },
348
- {
349
- "epoch": 4.36,
350
- "learning_rate": 1.1321656050955413e-05,
351
- "loss": 1.0043,
352
- "step": 22000
353
- },
354
- {
355
- "epoch": 4.36,
356
- "eval_loss": 5.172101974487305,
357
- "eval_runtime": 958.8916,
358
- "eval_samples_per_second": 42.216,
359
- "eval_steps_per_second": 2.64,
360
- "step": 22000
361
- },
362
- {
363
- "epoch": 4.46,
364
- "learning_rate": 1.1122611464968155e-05,
365
- "loss": 0.989,
366
- "step": 22500
367
- },
368
- {
369
- "epoch": 4.56,
370
- "learning_rate": 1.0923566878980894e-05,
371
- "loss": 0.9617,
372
- "step": 23000
373
- },
374
- {
375
- "epoch": 4.66,
376
- "learning_rate": 1.0724522292993632e-05,
377
- "loss": 0.9621,
378
- "step": 23500
379
- },
380
- {
381
- "epoch": 4.76,
382
- "learning_rate": 1.0525477707006371e-05,
383
- "loss": 0.9271,
384
- "step": 24000
385
- },
386
- {
387
- "epoch": 4.76,
388
- "eval_loss": 5.2821364402771,
389
- "eval_runtime": 959.4888,
390
- "eval_samples_per_second": 42.19,
391
- "eval_steps_per_second": 2.638,
392
- "step": 24000
393
- },
394
- {
395
- "epoch": 4.86,
396
- "learning_rate": 1.0326433121019107e-05,
397
- "loss": 0.8991,
398
- "step": 24500
399
- },
400
- {
401
- "epoch": 4.96,
402
- "learning_rate": 1.0127388535031849e-05,
403
- "loss": 0.8971,
404
- "step": 25000
405
- },
406
- {
407
- "epoch": 5.06,
408
- "learning_rate": 9.928343949044586e-06,
409
- "loss": 0.8486,
410
- "step": 25500
411
- },
412
- {
413
- "epoch": 5.15,
414
- "learning_rate": 9.729299363057324e-06,
415
- "loss": 0.8291,
416
- "step": 26000
417
- },
418
- {
419
- "epoch": 5.15,
420
- "eval_loss": 5.384232521057129,
421
- "eval_runtime": 958.7754,
422
- "eval_samples_per_second": 42.222,
423
- "eval_steps_per_second": 2.64,
424
- "step": 26000
425
- },
426
- {
427
- "epoch": 5.25,
428
- "learning_rate": 9.530254777070066e-06,
429
- "loss": 0.8108,
430
- "step": 26500
431
- },
432
- {
433
- "epoch": 5.35,
434
- "learning_rate": 9.331210191082803e-06,
435
- "loss": 0.7972,
436
- "step": 27000
437
- },
438
- {
439
- "epoch": 5.45,
440
- "learning_rate": 9.132165605095543e-06,
441
- "loss": 0.7845,
442
- "step": 27500
443
- },
444
- {
445
- "epoch": 5.55,
446
- "learning_rate": 8.93312101910828e-06,
447
- "loss": 0.776,
448
- "step": 28000
449
- },
450
- {
451
- "epoch": 5.55,
452
- "eval_loss": 5.476274490356445,
453
- "eval_runtime": 959.1462,
454
- "eval_samples_per_second": 42.205,
455
- "eval_steps_per_second": 2.639,
456
- "step": 28000
457
- },
458
- {
459
- "epoch": 5.65,
460
- "learning_rate": 8.734076433121018e-06,
461
- "loss": 0.76,
462
- "step": 28500
463
- },
464
- {
465
- "epoch": 5.75,
466
- "learning_rate": 8.53503184713376e-06,
467
- "loss": 0.7445,
468
- "step": 29000
469
- },
470
- {
471
- "epoch": 5.85,
472
- "learning_rate": 8.335987261146497e-06,
473
- "loss": 0.731,
474
- "step": 29500
475
- },
476
- {
477
- "epoch": 5.95,
478
- "learning_rate": 8.136942675159237e-06,
479
- "loss": 0.7241,
480
- "step": 30000
481
- },
482
- {
483
- "epoch": 5.95,
484
- "eval_loss": 5.554164886474609,
485
- "eval_runtime": 958.7676,
486
- "eval_samples_per_second": 42.222,
487
- "eval_steps_per_second": 2.64,
488
- "step": 30000
489
- },
490
- {
491
- "epoch": 6.05,
492
- "learning_rate": 7.937898089171975e-06,
493
- "loss": 0.7027,
494
- "step": 30500
495
- },
496
- {
497
- "epoch": 6.15,
498
- "learning_rate": 7.738853503184714e-06,
499
- "loss": 0.6773,
500
- "step": 31000
501
- },
502
- {
503
- "epoch": 6.25,
504
- "learning_rate": 7.539808917197453e-06,
505
- "loss": 0.6782,
506
- "step": 31500
507
- },
508
- {
509
- "epoch": 6.34,
510
- "learning_rate": 7.340764331210192e-06,
511
- "loss": 0.6602,
512
- "step": 32000
513
- },
514
- {
515
- "epoch": 6.34,
516
- "eval_loss": 5.626761436462402,
517
- "eval_runtime": 959.3904,
518
- "eval_samples_per_second": 42.195,
519
- "eval_steps_per_second": 2.638,
520
- "step": 32000
521
- },
522
- {
523
- "epoch": 6.44,
524
- "learning_rate": 7.14171974522293e-06,
525
- "loss": 0.6511,
526
- "step": 32500
527
- },
528
- {
529
- "epoch": 6.54,
530
- "learning_rate": 6.942675159235669e-06,
531
- "loss": 0.6436,
532
- "step": 33000
533
- },
534
- {
535
- "epoch": 6.64,
536
- "learning_rate": 6.7436305732484085e-06,
537
- "loss": 0.6435,
538
- "step": 33500
539
- },
540
- {
541
- "epoch": 6.74,
542
- "learning_rate": 6.544585987261147e-06,
543
- "loss": 0.635,
544
- "step": 34000
545
- },
546
- {
547
- "epoch": 6.74,
548
- "eval_loss": 5.690598011016846,
549
- "eval_runtime": 958.9891,
550
- "eval_samples_per_second": 42.212,
551
- "eval_steps_per_second": 2.639,
552
- "step": 34000
553
- },
554
- {
555
- "epoch": 6.84,
556
- "learning_rate": 6.345541401273886e-06,
557
- "loss": 0.6306,
558
- "step": 34500
559
- },
560
- {
561
- "epoch": 6.94,
562
- "learning_rate": 6.1464968152866244e-06,
563
- "loss": 0.6146,
564
- "step": 35000
565
- },
566
- {
567
- "epoch": 7.04,
568
- "learning_rate": 5.947452229299363e-06,
569
- "loss": 0.5998,
570
- "step": 35500
571
- },
572
- {
573
- "epoch": 7.14,
574
- "learning_rate": 5.748407643312103e-06,
575
- "loss": 0.5837,
576
- "step": 36000
577
- },
578
- {
579
- "epoch": 7.14,
580
- "eval_loss": 5.752773761749268,
581
- "eval_runtime": 959.2742,
582
- "eval_samples_per_second": 42.2,
583
- "eval_steps_per_second": 2.638,
584
- "step": 36000
585
  }
586
  ],
587
  "max_steps": 50440,
588
  "num_train_epochs": 10,
589
- "total_flos": 4.40860643136e+16,
590
  "trial_name": null,
591
  "trial_params": null
592
  }
 
1
  {
2
+ "best_metric": 4.065090656280518,
3
  "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
4
+ "epoch": 0.3965107057890563,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.1,
12
+ "learning_rate": 9.940286624203822e-07,
13
+ "loss": 4.6573,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.2,
18
+ "learning_rate": 9.84076433121019e-07,
19
+ "loss": 4.1908,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 0.3,
24
+ "learning_rate": 9.74124203821656e-07,
25
+ "loss": 4.1363,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 0.4,
30
+ "learning_rate": 9.64171974522293e-07,
31
+ "loss": 4.0949,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 0.4,
36
+ "eval_loss": 4.065090656280518,
37
+ "eval_runtime": 958.7496,
38
+ "eval_samples_per_second": 42.223,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  "eval_steps_per_second": 2.64,
40
+ "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "max_steps": 50440,
44
  "num_train_epochs": 10,
45
+ "total_flos": 2449612800000000.0,
46
  "trial_name": null,
47
  "trial_params": null
48
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f92189c0b6c23d5f9d7c3d130adcc7471dc4c86b70cac88476f76de70c090a5a
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a5bfc0fa6a3aa2ab924b9d0fd18640137651b4b65a92e04e2fc725dbd4f332
3
  size 3055
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20e6bbfaf75a65090cf93fcbf806e6c68aa1ba846abaa532e34d979a48354dd6
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be2cdd04b6fd3a2b0e3d3da36df5b1fc4508e842028d912c36c98523fa24cdd
3
  size 510396521
stderr-2e-05.slurm ADDED
The diff for this file is too large to render. See raw diff
 
stderr.slurm CHANGED
The diff for this file is too large to render. See raw diff
 
stdout-2e-05.slurm ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'loss': 4.1429, 'learning_rate': 1.9880573248407645e-05, 'epoch': 0.1}
2
+ {'loss': 3.7366, 'learning_rate': 1.9681528662420383e-05, 'epoch': 0.2}
3
+ {'loss': 3.5836, 'learning_rate': 1.9482484076433124e-05, 'epoch': 0.3}
4
+ {'loss': 3.4588, 'learning_rate': 1.9283439490445862e-05, 'epoch': 0.4}
5
+ {'eval_loss': 3.926462411880493, 'eval_runtime': 958.5106, 'eval_samples_per_second': 42.233, 'eval_steps_per_second': 2.641, 'epoch': 0.4}
6
+ {'loss': 3.3359, 'learning_rate': 1.90843949044586e-05, 'epoch': 0.5}
7
+ {'loss': 3.2161, 'learning_rate': 1.8885350318471337e-05, 'epoch': 0.59}
8
+ {'loss': 3.122, 'learning_rate': 1.868630573248408e-05, 'epoch': 0.69}
9
+ {'loss': 3.0293, 'learning_rate': 1.8487261146496816e-05, 'epoch': 0.79}
10
+ {'eval_loss': 4.006344795227051, 'eval_runtime': 958.9036, 'eval_samples_per_second': 42.216, 'eval_steps_per_second': 2.639, 'epoch': 0.79}
11
+ {'loss': 2.9397, 'learning_rate': 1.8288216560509554e-05, 'epoch': 0.89}
12
+ {'loss': 2.8495, 'learning_rate': 1.8089171974522295e-05, 'epoch': 0.99}
13
+ {'loss': 2.7033, 'learning_rate': 1.7890127388535033e-05, 'epoch': 1.09}
14
+ {'loss': 2.6331, 'learning_rate': 1.769108280254777e-05, 'epoch': 1.19}
15
+ {'eval_loss': 4.1308207511901855, 'eval_runtime': 958.6456, 'eval_samples_per_second': 42.227, 'eval_steps_per_second': 2.64, 'epoch': 1.19}
16
+ {'loss': 2.5481, 'learning_rate': 1.7492038216560512e-05, 'epoch': 1.29}
17
+ {'loss': 2.4854, 'learning_rate': 1.729299363057325e-05, 'epoch': 1.39}
18
+ {'loss': 2.4066, 'learning_rate': 1.7093949044585988e-05, 'epoch': 1.49}
19
+ {'loss': 2.3432, 'learning_rate': 1.6894904458598726e-05, 'epoch': 1.59}
20
+ {'eval_loss': 4.2555365562438965, 'eval_runtime': 959.2942, 'eval_samples_per_second': 42.199, 'eval_steps_per_second': 2.638, 'epoch': 1.59}
21
+ {'loss': 2.2699, 'learning_rate': 1.6695859872611467e-05, 'epoch': 1.69}
22
+ {'loss': 2.1978, 'learning_rate': 1.6496815286624205e-05, 'epoch': 1.78}
23
+ {'loss': 2.1452, 'learning_rate': 1.6297770700636943e-05, 'epoch': 1.88}
24
+ {'loss': 2.0791, 'learning_rate': 1.6098726114649684e-05, 'epoch': 1.98}
25
+ {'eval_loss': 4.388513088226318, 'eval_runtime': 958.7919, 'eval_samples_per_second': 42.221, 'eval_steps_per_second': 2.64, 'epoch': 1.98}
26
+ {'loss': 1.9673, 'learning_rate': 1.589968152866242e-05, 'epoch': 2.08}
27
+ {'loss': 1.9147, 'learning_rate': 1.570063694267516e-05, 'epoch': 2.18}
28
+ {'loss': 1.8628, 'learning_rate': 1.55015923566879e-05, 'epoch': 2.28}
29
+ {'loss': 1.7991, 'learning_rate': 1.530254777070064e-05, 'epoch': 2.38}
30
+ {'eval_loss': 4.536414623260498, 'eval_runtime': 958.7918, 'eval_samples_per_second': 42.221, 'eval_steps_per_second': 2.64, 'epoch': 2.38}
31
+ {'loss': 1.755, 'learning_rate': 1.5103503184713378e-05, 'epoch': 2.48}
32
+ {'loss': 1.699, 'learning_rate': 1.4904458598726114e-05, 'epoch': 2.58}
33
+ {'loss': 1.6565, 'learning_rate': 1.4705414012738855e-05, 'epoch': 2.68}
34
+ {'loss': 1.6005, 'learning_rate': 1.4506369426751595e-05, 'epoch': 2.78}
35
+ {'eval_loss': 4.665872097015381, 'eval_runtime': 958.5203, 'eval_samples_per_second': 42.233, 'eval_steps_per_second': 2.641, 'epoch': 2.78}
36
+ {'loss': 1.5739, 'learning_rate': 1.4307324840764331e-05, 'epoch': 2.87}
37
+ {'loss': 1.5312, 'learning_rate': 1.4108280254777072e-05, 'epoch': 2.97}
38
+ {'loss': 1.4352, 'learning_rate': 1.3909235668789812e-05, 'epoch': 3.07}
39
+ {'loss': 1.4018, 'learning_rate': 1.371019108280255e-05, 'epoch': 3.17}
40
+ {'eval_loss': 4.807626724243164, 'eval_runtime': 959.3471, 'eval_samples_per_second': 42.196, 'eval_steps_per_second': 2.638, 'epoch': 3.17}
41
+ {'loss': 1.362, 'learning_rate': 1.3511146496815289e-05, 'epoch': 3.27}
42
+ {'loss': 1.3312, 'learning_rate': 1.3312101910828025e-05, 'epoch': 3.37}
43
+ {'loss': 1.2996, 'learning_rate': 1.3113057324840766e-05, 'epoch': 3.47}
44
+ {'loss': 1.2584, 'learning_rate': 1.2914012738853506e-05, 'epoch': 3.57}
45
+ {'eval_loss': 4.938758373260498, 'eval_runtime': 959.0042, 'eval_samples_per_second': 42.211, 'eval_steps_per_second': 2.639, 'epoch': 3.57}
46
+ {'loss': 1.2251, 'learning_rate': 1.2714968152866244e-05, 'epoch': 3.67}
47
+ {'loss': 1.1985, 'learning_rate': 1.2515923566878983e-05, 'epoch': 3.77}
48
+ {'loss': 1.1689, 'learning_rate': 1.231687898089172e-05, 'epoch': 3.87}
49
+ {'loss': 1.1476, 'learning_rate': 1.211783439490446e-05, 'epoch': 3.97}
50
+ {'eval_loss': 5.052674293518066, 'eval_runtime': 959.1305, 'eval_samples_per_second': 42.206, 'eval_steps_per_second': 2.639, 'epoch': 3.97}
51
+ {'loss': 1.0804, 'learning_rate': 1.19187898089172e-05, 'epoch': 4.06}
52
+ {'loss': 1.0489, 'learning_rate': 1.1719745222929938e-05, 'epoch': 4.16}
53
+ {'loss': 1.0267, 'learning_rate': 1.1520700636942677e-05, 'epoch': 4.26}
54
+ {'loss': 1.0043, 'learning_rate': 1.1321656050955413e-05, 'epoch': 4.36}
55
+ {'eval_loss': 5.172101974487305, 'eval_runtime': 958.8916, 'eval_samples_per_second': 42.216, 'eval_steps_per_second': 2.64, 'epoch': 4.36}
56
+ {'loss': 0.989, 'learning_rate': 1.1122611464968155e-05, 'epoch': 4.46}
57
+ {'loss': 0.9617, 'learning_rate': 1.0923566878980894e-05, 'epoch': 4.56}
58
+ {'loss': 0.9621, 'learning_rate': 1.0724522292993632e-05, 'epoch': 4.66}
59
+ {'loss': 0.9271, 'learning_rate': 1.0525477707006371e-05, 'epoch': 4.76}
60
+ {'eval_loss': 5.2821364402771, 'eval_runtime': 959.4888, 'eval_samples_per_second': 42.19, 'eval_steps_per_second': 2.638, 'epoch': 4.76}
61
+ {'loss': 0.8991, 'learning_rate': 1.0326433121019107e-05, 'epoch': 4.86}
62
+ {'loss': 0.8971, 'learning_rate': 1.0127388535031849e-05, 'epoch': 4.96}
63
+ {'loss': 0.8486, 'learning_rate': 9.928343949044586e-06, 'epoch': 5.06}
64
+ {'loss': 0.8291, 'learning_rate': 9.729299363057324e-06, 'epoch': 5.15}
65
+ {'eval_loss': 5.384232521057129, 'eval_runtime': 958.7754, 'eval_samples_per_second': 42.222, 'eval_steps_per_second': 2.64, 'epoch': 5.15}
66
+ {'loss': 0.8108, 'learning_rate': 9.530254777070066e-06, 'epoch': 5.25}
67
+ {'loss': 0.7972, 'learning_rate': 9.331210191082803e-06, 'epoch': 5.35}
68
+ {'loss': 0.7845, 'learning_rate': 9.132165605095543e-06, 'epoch': 5.45}
69
+ {'loss': 0.776, 'learning_rate': 8.93312101910828e-06, 'epoch': 5.55}
70
+ {'eval_loss': 5.476274490356445, 'eval_runtime': 959.1462, 'eval_samples_per_second': 42.205, 'eval_steps_per_second': 2.639, 'epoch': 5.55}
71
+ {'loss': 0.76, 'learning_rate': 8.734076433121018e-06, 'epoch': 5.65}
72
+ {'loss': 0.7445, 'learning_rate': 8.53503184713376e-06, 'epoch': 5.75}
73
+ {'loss': 0.731, 'learning_rate': 8.335987261146497e-06, 'epoch': 5.85}
74
+ {'loss': 0.7241, 'learning_rate': 8.136942675159237e-06, 'epoch': 5.95}
75
+ {'eval_loss': 5.554164886474609, 'eval_runtime': 958.7676, 'eval_samples_per_second': 42.222, 'eval_steps_per_second': 2.64, 'epoch': 5.95}
76
+ {'loss': 0.7027, 'learning_rate': 7.937898089171975e-06, 'epoch': 6.05}
77
+ {'loss': 0.6773, 'learning_rate': 7.738853503184714e-06, 'epoch': 6.15}
78
+ {'loss': 0.6782, 'learning_rate': 7.539808917197453e-06, 'epoch': 6.25}
79
+ {'loss': 0.6602, 'learning_rate': 7.340764331210192e-06, 'epoch': 6.34}
80
+ {'eval_loss': 5.626761436462402, 'eval_runtime': 959.3904, 'eval_samples_per_second': 42.195, 'eval_steps_per_second': 2.638, 'epoch': 6.34}
81
+ {'loss': 0.6511, 'learning_rate': 7.14171974522293e-06, 'epoch': 6.44}
82
+ {'loss': 0.6436, 'learning_rate': 6.942675159235669e-06, 'epoch': 6.54}
83
+ {'loss': 0.6435, 'learning_rate': 6.7436305732484085e-06, 'epoch': 6.64}
84
+ {'loss': 0.635, 'learning_rate': 6.544585987261147e-06, 'epoch': 6.74}
85
+ {'eval_loss': 5.690598011016846, 'eval_runtime': 958.9891, 'eval_samples_per_second': 42.212, 'eval_steps_per_second': 2.639, 'epoch': 6.74}
86
+ {'loss': 0.6306, 'learning_rate': 6.345541401273886e-06, 'epoch': 6.84}
87
+ {'loss': 0.6146, 'learning_rate': 6.1464968152866244e-06, 'epoch': 6.94}
88
+ {'loss': 0.5998, 'learning_rate': 5.947452229299363e-06, 'epoch': 7.04}
89
+ {'loss': 0.5837, 'learning_rate': 5.748407643312103e-06, 'epoch': 7.14}
stdout.slurm CHANGED
@@ -1,89 +1,4 @@
1
- {'loss': 4.1429, 'learning_rate': 1.9880573248407645e-05, 'epoch': 0.1}
2
- {'loss': 3.7366, 'learning_rate': 1.9681528662420383e-05, 'epoch': 0.2}
3
- {'loss': 3.5836, 'learning_rate': 1.9482484076433124e-05, 'epoch': 0.3}
4
- {'loss': 3.4588, 'learning_rate': 1.9283439490445862e-05, 'epoch': 0.4}
5
- {'eval_loss': 3.926462411880493, 'eval_runtime': 958.5106, 'eval_samples_per_second': 42.233, 'eval_steps_per_second': 2.641, 'epoch': 0.4}
6
- {'loss': 3.3359, 'learning_rate': 1.90843949044586e-05, 'epoch': 0.5}
7
- {'loss': 3.2161, 'learning_rate': 1.8885350318471337e-05, 'epoch': 0.59}
8
- {'loss': 3.122, 'learning_rate': 1.868630573248408e-05, 'epoch': 0.69}
9
- {'loss': 3.0293, 'learning_rate': 1.8487261146496816e-05, 'epoch': 0.79}
10
- {'eval_loss': 4.006344795227051, 'eval_runtime': 958.9036, 'eval_samples_per_second': 42.216, 'eval_steps_per_second': 2.639, 'epoch': 0.79}
11
- {'loss': 2.9397, 'learning_rate': 1.8288216560509554e-05, 'epoch': 0.89}
12
- {'loss': 2.8495, 'learning_rate': 1.8089171974522295e-05, 'epoch': 0.99}
13
- {'loss': 2.7033, 'learning_rate': 1.7890127388535033e-05, 'epoch': 1.09}
14
- {'loss': 2.6331, 'learning_rate': 1.769108280254777e-05, 'epoch': 1.19}
15
- {'eval_loss': 4.1308207511901855, 'eval_runtime': 958.6456, 'eval_samples_per_second': 42.227, 'eval_steps_per_second': 2.64, 'epoch': 1.19}
16
- {'loss': 2.5481, 'learning_rate': 1.7492038216560512e-05, 'epoch': 1.29}
17
- {'loss': 2.4854, 'learning_rate': 1.729299363057325e-05, 'epoch': 1.39}
18
- {'loss': 2.4066, 'learning_rate': 1.7093949044585988e-05, 'epoch': 1.49}
19
- {'loss': 2.3432, 'learning_rate': 1.6894904458598726e-05, 'epoch': 1.59}
20
- {'eval_loss': 4.2555365562438965, 'eval_runtime': 959.2942, 'eval_samples_per_second': 42.199, 'eval_steps_per_second': 2.638, 'epoch': 1.59}
21
- {'loss': 2.2699, 'learning_rate': 1.6695859872611467e-05, 'epoch': 1.69}
22
- {'loss': 2.1978, 'learning_rate': 1.6496815286624205e-05, 'epoch': 1.78}
23
- {'loss': 2.1452, 'learning_rate': 1.6297770700636943e-05, 'epoch': 1.88}
24
- {'loss': 2.0791, 'learning_rate': 1.6098726114649684e-05, 'epoch': 1.98}
25
- {'eval_loss': 4.388513088226318, 'eval_runtime': 958.7919, 'eval_samples_per_second': 42.221, 'eval_steps_per_second': 2.64, 'epoch': 1.98}
26
- {'loss': 1.9673, 'learning_rate': 1.589968152866242e-05, 'epoch': 2.08}
27
- {'loss': 1.9147, 'learning_rate': 1.570063694267516e-05, 'epoch': 2.18}
28
- {'loss': 1.8628, 'learning_rate': 1.55015923566879e-05, 'epoch': 2.28}
29
- {'loss': 1.7991, 'learning_rate': 1.530254777070064e-05, 'epoch': 2.38}
30
- {'eval_loss': 4.536414623260498, 'eval_runtime': 958.7918, 'eval_samples_per_second': 42.221, 'eval_steps_per_second': 2.64, 'epoch': 2.38}
31
- {'loss': 1.755, 'learning_rate': 1.5103503184713378e-05, 'epoch': 2.48}
32
- {'loss': 1.699, 'learning_rate': 1.4904458598726114e-05, 'epoch': 2.58}
33
- {'loss': 1.6565, 'learning_rate': 1.4705414012738855e-05, 'epoch': 2.68}
34
- {'loss': 1.6005, 'learning_rate': 1.4506369426751595e-05, 'epoch': 2.78}
35
- {'eval_loss': 4.665872097015381, 'eval_runtime': 958.5203, 'eval_samples_per_second': 42.233, 'eval_steps_per_second': 2.641, 'epoch': 2.78}
36
- {'loss': 1.5739, 'learning_rate': 1.4307324840764331e-05, 'epoch': 2.87}
37
- {'loss': 1.5312, 'learning_rate': 1.4108280254777072e-05, 'epoch': 2.97}
38
- {'loss': 1.4352, 'learning_rate': 1.3909235668789812e-05, 'epoch': 3.07}
39
- {'loss': 1.4018, 'learning_rate': 1.371019108280255e-05, 'epoch': 3.17}
40
- {'eval_loss': 4.807626724243164, 'eval_runtime': 959.3471, 'eval_samples_per_second': 42.196, 'eval_steps_per_second': 2.638, 'epoch': 3.17}
41
- {'loss': 1.362, 'learning_rate': 1.3511146496815289e-05, 'epoch': 3.27}
42
- {'loss': 1.3312, 'learning_rate': 1.3312101910828025e-05, 'epoch': 3.37}
43
- {'loss': 1.2996, 'learning_rate': 1.3113057324840766e-05, 'epoch': 3.47}
44
- {'loss': 1.2584, 'learning_rate': 1.2914012738853506e-05, 'epoch': 3.57}
45
- {'eval_loss': 4.938758373260498, 'eval_runtime': 959.0042, 'eval_samples_per_second': 42.211, 'eval_steps_per_second': 2.639, 'epoch': 3.57}
46
- {'loss': 1.2251, 'learning_rate': 1.2714968152866244e-05, 'epoch': 3.67}
47
- {'loss': 1.1985, 'learning_rate': 1.2515923566878983e-05, 'epoch': 3.77}
48
- {'loss': 1.1689, 'learning_rate': 1.231687898089172e-05, 'epoch': 3.87}
49
- {'loss': 1.1476, 'learning_rate': 1.211783439490446e-05, 'epoch': 3.97}
50
- {'eval_loss': 5.052674293518066, 'eval_runtime': 959.1305, 'eval_samples_per_second': 42.206, 'eval_steps_per_second': 2.639, 'epoch': 3.97}
51
- {'loss': 1.0804, 'learning_rate': 1.19187898089172e-05, 'epoch': 4.06}
52
- {'loss': 1.0489, 'learning_rate': 1.1719745222929938e-05, 'epoch': 4.16}
53
- {'loss': 1.0267, 'learning_rate': 1.1520700636942677e-05, 'epoch': 4.26}
54
- {'loss': 1.0043, 'learning_rate': 1.1321656050955413e-05, 'epoch': 4.36}
55
- {'eval_loss': 5.172101974487305, 'eval_runtime': 958.8916, 'eval_samples_per_second': 42.216, 'eval_steps_per_second': 2.64, 'epoch': 4.36}
56
- {'loss': 0.989, 'learning_rate': 1.1122611464968155e-05, 'epoch': 4.46}
57
- {'loss': 0.9617, 'learning_rate': 1.0923566878980894e-05, 'epoch': 4.56}
58
- {'loss': 0.9621, 'learning_rate': 1.0724522292993632e-05, 'epoch': 4.66}
59
- {'loss': 0.9271, 'learning_rate': 1.0525477707006371e-05, 'epoch': 4.76}
60
- {'eval_loss': 5.2821364402771, 'eval_runtime': 959.4888, 'eval_samples_per_second': 42.19, 'eval_steps_per_second': 2.638, 'epoch': 4.76}
61
- {'loss': 0.8991, 'learning_rate': 1.0326433121019107e-05, 'epoch': 4.86}
62
- {'loss': 0.8971, 'learning_rate': 1.0127388535031849e-05, 'epoch': 4.96}
63
- {'loss': 0.8486, 'learning_rate': 9.928343949044586e-06, 'epoch': 5.06}
64
- {'loss': 0.8291, 'learning_rate': 9.729299363057324e-06, 'epoch': 5.15}
65
- {'eval_loss': 5.384232521057129, 'eval_runtime': 958.7754, 'eval_samples_per_second': 42.222, 'eval_steps_per_second': 2.64, 'epoch': 5.15}
66
- {'loss': 0.8108, 'learning_rate': 9.530254777070066e-06, 'epoch': 5.25}
67
- {'loss': 0.7972, 'learning_rate': 9.331210191082803e-06, 'epoch': 5.35}
68
- {'loss': 0.7845, 'learning_rate': 9.132165605095543e-06, 'epoch': 5.45}
69
- {'loss': 0.776, 'learning_rate': 8.93312101910828e-06, 'epoch': 5.55}
70
- {'eval_loss': 5.476274490356445, 'eval_runtime': 959.1462, 'eval_samples_per_second': 42.205, 'eval_steps_per_second': 2.639, 'epoch': 5.55}
71
- {'loss': 0.76, 'learning_rate': 8.734076433121018e-06, 'epoch': 5.65}
72
- {'loss': 0.7445, 'learning_rate': 8.53503184713376e-06, 'epoch': 5.75}
73
- {'loss': 0.731, 'learning_rate': 8.335987261146497e-06, 'epoch': 5.85}
74
- {'loss': 0.7241, 'learning_rate': 8.136942675159237e-06, 'epoch': 5.95}
75
- {'eval_loss': 5.554164886474609, 'eval_runtime': 958.7676, 'eval_samples_per_second': 42.222, 'eval_steps_per_second': 2.64, 'epoch': 5.95}
76
- {'loss': 0.7027, 'learning_rate': 7.937898089171975e-06, 'epoch': 6.05}
77
- {'loss': 0.6773, 'learning_rate': 7.738853503184714e-06, 'epoch': 6.15}
78
- {'loss': 0.6782, 'learning_rate': 7.539808917197453e-06, 'epoch': 6.25}
79
- {'loss': 0.6602, 'learning_rate': 7.340764331210192e-06, 'epoch': 6.34}
80
- {'eval_loss': 5.626761436462402, 'eval_runtime': 959.3904, 'eval_samples_per_second': 42.195, 'eval_steps_per_second': 2.638, 'epoch': 6.34}
81
- {'loss': 0.6511, 'learning_rate': 7.14171974522293e-06, 'epoch': 6.44}
82
- {'loss': 0.6436, 'learning_rate': 6.942675159235669e-06, 'epoch': 6.54}
83
- {'loss': 0.6435, 'learning_rate': 6.7436305732484085e-06, 'epoch': 6.64}
84
- {'loss': 0.635, 'learning_rate': 6.544585987261147e-06, 'epoch': 6.74}
85
- {'eval_loss': 5.690598011016846, 'eval_runtime': 958.9891, 'eval_samples_per_second': 42.212, 'eval_steps_per_second': 2.639, 'epoch': 6.74}
86
- {'loss': 0.6306, 'learning_rate': 6.345541401273886e-06, 'epoch': 6.84}
87
- {'loss': 0.6146, 'learning_rate': 6.1464968152866244e-06, 'epoch': 6.94}
88
- {'loss': 0.5998, 'learning_rate': 5.947452229299363e-06, 'epoch': 7.04}
89
- {'loss': 0.5837, 'learning_rate': 5.748407643312103e-06, 'epoch': 7.14}
 
1
+ {'loss': 4.6573, 'learning_rate': 9.940286624203822e-07, 'epoch': 0.1}
2
+ {'loss': 4.1908, 'learning_rate': 9.84076433121019e-07, 'epoch': 0.2}
3
+ {'loss': 4.1363, 'learning_rate': 9.74124203821656e-07, 'epoch': 0.3}
4
+ {'loss': 4.0949, 'learning_rate': 9.64171974522293e-07, 'epoch': 0.4}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f92189c0b6c23d5f9d7c3d130adcc7471dc4c86b70cac88476f76de70c090a5a
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a5bfc0fa6a3aa2ab924b9d0fd18640137651b4b65a92e04e2fc725dbd4f332
3
  size 3055