AlekseyKorshuk commited on
Commit
e2d31cb
1 Parent(s): 79b47b3

huggingartists

Browse files
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.6784825325012207, "eval_runtime": 1.6409, "eval_samples_per_second": 40.831, "eval_steps_per_second": 5.485, "epoch": 10.0}
 
1
+ {"eval_loss": 1.5699154138565063, "eval_runtime": 1.6396, "eval_samples_per_second": 40.864, "eval_steps_per_second": 5.489, "epoch": 19.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fecd751b5700963f44c2ee402ff0b7a4c67e61624a859a934f710eb191c45a74
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c6d9455a9248044f8aa6d328cfc1cacfec66afa20d954f5b23c85da87bb276
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d097a87db35207ad3d05130da50e9ebb55fa07513c0df666ca52f27cb06b358
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0547fe049e205f9d46a99245e89e28e83958fd72a8d9fa5b43bf74f1ac59c7d0
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90d824a120c62c3e83dd6a73c662684e5dbd1b4b707b29c9ebf5e0acf99da375
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b234073b5cdc8b007569ead18a8e3db6e74c4f98109b5edb6969fffac2bc99
3
  size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66929c1ab9e7989bb184f169bf7f2442be88e09dfd4b503a328757b1b59345f6
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48674dcd750e9b55c532cef24aef9f7d2e9eb9e19cbfa1a48641e0750f3f3e85
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48279aa48ac21cf8f06ef968c16ddaba90d867b569da938adf4544c7aea84cf0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425621844a56a29950ce1fbf1c301ca1c67a62f1420b3598db0e6910a94a3c8c
3
  size 623
trainer_state.json CHANGED
@@ -1,592 +1,338 @@
1
  {
2
- "best_metric": 1.6784825325012207,
3
- "best_model_checkpoint": "output/headie-one/checkpoint-423",
4
- "epoch": 9.0,
5
- "global_step": 423,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.11,
12
- "learning_rate": 0.00013340429968430322,
13
- "loss": 3.3897,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.21,
18
- "learning_rate": 0.00012243723783011615,
19
- "loss": 3.2804,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.32,
24
- "learning_rate": 0.00010551244942700737,
25
- "loss": 3.1867,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.43,
30
- "learning_rate": 8.450286230835245e-05,
31
- "loss": 3.1078,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.53,
36
- "learning_rate": 6.173343555458685e-05,
37
- "loss": 2.9997,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.64,
42
- "learning_rate": 3.972387529741623e-05,
43
- "loss": 3.037,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.74,
48
- "learning_rate": 2.090979946151954e-05,
49
- "loss": 2.8067,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.85,
54
- "learning_rate": 7.373207860012155e-06,
55
- "loss": 2.6461,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.96,
60
  "learning_rate": 6.1208431258209e-07,
61
- "loss": 2.7311,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 1.0,
66
- "eval_loss": 2.646137237548828,
67
- "eval_runtime": 1.4447,
68
- "eval_samples_per_second": 46.378,
69
- "eval_steps_per_second": 6.23,
70
  "step": 47
71
  },
72
  {
73
  "epoch": 1.06,
74
- "learning_rate": 1.3746270344901413e-06,
75
- "loss": 2.8414,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 1.17,
80
- "learning_rate": 9.576451662754438e-06,
81
- "loss": 2.7047,
82
  "step": 55
83
  },
84
  {
85
  "epoch": 1.28,
86
- "learning_rate": 2.4309929383066146e-05,
87
- "loss": 2.7129,
88
  "step": 60
89
  },
90
  {
91
  "epoch": 1.38,
92
- "learning_rate": 4.3944626783346644e-05,
93
- "loss": 2.7639,
94
  "step": 65
95
  },
96
  {
97
  "epoch": 1.49,
98
- "learning_rate": 6.630773257727353e-05,
99
- "loss": 2.67,
100
  "step": 70
101
  },
102
  {
103
  "epoch": 1.6,
104
- "learning_rate": 8.892450484875447e-05,
105
- "loss": 2.4349,
106
  "step": 75
107
  },
108
  {
109
  "epoch": 1.7,
110
- "learning_rate": 0.00010929213048843373,
111
- "loss": 2.6474,
112
  "step": 80
113
  },
114
  {
115
  "epoch": 1.81,
116
- "learning_rate": 0.00012515669103944476,
117
- "loss": 2.741,
118
  "step": 85
119
  },
120
  {
121
  "epoch": 1.91,
122
  "learning_rate": 0.00013476258540873022,
123
- "loss": 2.5921,
124
  "step": 90
125
  },
126
  {
127
  "epoch": 2.0,
128
- "eval_loss": 2.3871500492095947,
129
- "eval_runtime": 1.4491,
130
- "eval_samples_per_second": 46.235,
131
- "eval_steps_per_second": 6.211,
132
  "step": 94
133
  },
134
  {
135
  "epoch": 2.02,
136
  "learning_rate": 0.00013704680787354832,
137
- "loss": 2.4265,
138
  "step": 95
139
  },
140
  {
141
  "epoch": 2.13,
142
  "learning_rate": 0.00013175658222600302,
143
- "loss": 2.5165,
144
  "step": 100
145
  },
146
  {
147
  "epoch": 2.23,
148
- "learning_rate": 0.00011947733444744994,
149
- "loss": 2.1946,
150
  "step": 105
151
  },
152
  {
153
  "epoch": 2.34,
154
- "learning_rate": 0.0001015679084058065,
155
- "loss": 2.3182,
156
  "step": 110
157
  },
158
  {
159
  "epoch": 2.45,
160
- "learning_rate": 8.001019372440279e-05,
161
- "loss": 2.2781,
162
  "step": 115
163
  },
164
  {
165
  "epoch": 2.55,
166
- "learning_rate": 5.718980627559731e-05,
167
- "loss": 2.2824,
168
  "step": 120
169
  },
170
  {
171
  "epoch": 2.66,
172
- "learning_rate": 3.563209159419354e-05,
173
- "loss": 2.2139,
174
  "step": 125
175
  },
176
  {
177
  "epoch": 2.77,
178
- "learning_rate": 1.772266555255008e-05,
179
- "loss": 2.3554,
180
  "step": 130
181
  },
182
  {
183
  "epoch": 2.87,
184
- "learning_rate": 5.443417773996978e-06,
185
- "loss": 2.1634,
186
  "step": 135
187
  },
188
  {
189
  "epoch": 2.98,
190
- "learning_rate": 1.5319212645169297e-07,
191
- "loss": 2.2246,
192
  "step": 140
193
  },
194
  {
195
  "epoch": 3.0,
196
- "eval_loss": 2.2159550189971924,
197
- "eval_runtime": 1.505,
198
- "eval_samples_per_second": 44.519,
199
- "eval_steps_per_second": 5.98,
200
  "step": 141
201
  },
202
  {
203
  "epoch": 3.09,
204
- "learning_rate": 2.4374145912697595e-06,
205
- "loss": 2.0033,
206
  "step": 145
207
  },
208
  {
209
  "epoch": 3.19,
210
- "learning_rate": 1.204330896055522e-05,
211
- "loss": 2.1305,
212
  "step": 150
213
  },
214
  {
215
  "epoch": 3.3,
216
- "learning_rate": 2.790786951156628e-05,
217
- "loss": 2.0744,
218
  "step": 155
219
  },
220
  {
221
  "epoch": 3.4,
222
- "learning_rate": 4.8275495151245426e-05,
223
- "loss": 2.1473,
224
  "step": 160
225
  },
226
  {
227
  "epoch": 3.51,
228
- "learning_rate": 7.089226742272638e-05,
229
- "loss": 2.2506,
230
  "step": 165
231
  },
232
  {
233
  "epoch": 3.62,
234
- "learning_rate": 9.325537321665337e-05,
235
- "loss": 2.157,
236
  "step": 170
237
  },
238
  {
239
  "epoch": 3.72,
240
- "learning_rate": 0.00011289007061693382,
241
- "loss": 2.1313,
242
  "step": 175
243
  },
244
  {
245
  "epoch": 3.83,
246
- "learning_rate": 0.00012762354833724553,
247
- "loss": 2.0593,
248
  "step": 180
249
  },
250
  {
251
  "epoch": 3.94,
252
- "learning_rate": 0.00013582537296550986,
253
- "loss": 2.0425,
254
  "step": 185
255
  },
256
  {
257
  "epoch": 4.0,
258
- "eval_loss": 2.1005759239196777,
259
- "eval_runtime": 1.5665,
260
- "eval_samples_per_second": 42.771,
261
- "eval_steps_per_second": 5.745,
262
  "step": 188
263
  },
264
  {
265
  "epoch": 4.04,
266
- "learning_rate": 0.0001365879156874179,
267
- "loss": 1.9915,
268
  "step": 190
269
  },
270
  {
271
  "epoch": 4.15,
272
- "learning_rate": 0.00012982679213998792,
273
- "loss": 1.8713,
274
  "step": 195
275
  },
276
  {
277
  "epoch": 4.26,
278
- "learning_rate": 0.00011629020053848047,
279
- "loss": 1.8176,
280
  "step": 200
281
  },
282
  {
283
  "epoch": 4.36,
284
- "learning_rate": 9.747612470258382e-05,
285
- "loss": 1.8704,
286
  "step": 205
287
  },
288
  {
289
  "epoch": 4.47,
290
- "learning_rate": 7.546656444541333e-05,
291
- "loss": 1.9435,
292
  "step": 210
293
  },
294
  {
295
  "epoch": 4.57,
296
- "learning_rate": 5.2697137691647635e-05,
297
- "loss": 2.0704,
298
  "step": 215
299
  },
300
  {
301
  "epoch": 4.68,
302
- "learning_rate": 3.1687550572992616e-05,
303
- "loss": 2.0372,
304
  "step": 220
305
  },
306
  {
307
  "epoch": 4.79,
308
- "learning_rate": 1.4762762169883855e-05,
309
- "loss": 1.9286,
310
  "step": 225
311
  },
312
  {
313
  "epoch": 4.89,
314
- "learning_rate": 3.795700315696817e-06,
315
- "loss": 1.7845,
316
  "step": 230
317
  },
318
  {
319
  "epoch": 5.0,
320
  "learning_rate": 0.0,
321
- "loss": 1.7819,
322
  "step": 235
323
  },
324
  {
325
  "epoch": 5.0,
326
- "eval_loss": 1.9693368673324585,
327
- "eval_runtime": 1.5591,
328
- "eval_samples_per_second": 42.973,
329
- "eval_steps_per_second": 5.772,
330
  "step": 235
331
- },
332
- {
333
- "epoch": 5.11,
334
- "learning_rate": 3.7957003156967485e-06,
335
- "loss": 1.6773,
336
- "step": 240
337
- },
338
- {
339
- "epoch": 5.21,
340
- "learning_rate": 1.4762762169883802e-05,
341
- "loss": 1.9066,
342
- "step": 245
343
- },
344
- {
345
- "epoch": 5.32,
346
- "learning_rate": 3.168755057299255e-05,
347
- "loss": 1.7702,
348
- "step": 250
349
- },
350
- {
351
- "epoch": 5.43,
352
- "learning_rate": 5.269713769164743e-05,
353
- "loss": 1.7718,
354
- "step": 255
355
- },
356
- {
357
- "epoch": 5.53,
358
- "learning_rate": 7.546656444541325e-05,
359
- "loss": 1.6923,
360
- "step": 260
361
- },
362
- {
363
- "epoch": 5.64,
364
- "learning_rate": 9.747612470258363e-05,
365
- "loss": 1.8134,
366
- "step": 265
367
- },
368
- {
369
- "epoch": 5.74,
370
- "learning_rate": 0.0001162902005384805,
371
- "loss": 1.6943,
372
- "step": 270
373
- },
374
- {
375
- "epoch": 5.85,
376
- "learning_rate": 0.00012982679213998787,
377
- "loss": 1.7474,
378
- "step": 275
379
- },
380
- {
381
- "epoch": 5.96,
382
- "learning_rate": 0.00013658791568741792,
383
- "loss": 1.664,
384
- "step": 280
385
- },
386
- {
387
- "epoch": 6.0,
388
- "eval_loss": 1.8830074071884155,
389
- "eval_runtime": 1.6028,
390
- "eval_samples_per_second": 41.801,
391
- "eval_steps_per_second": 5.615,
392
- "step": 282
393
- },
394
- {
395
- "epoch": 6.06,
396
- "learning_rate": 0.00013582537296550986,
397
- "loss": 1.7593,
398
- "step": 285
399
- },
400
- {
401
- "epoch": 6.17,
402
- "learning_rate": 0.00012762354833724559,
403
- "loss": 1.68,
404
- "step": 290
405
- },
406
- {
407
- "epoch": 6.28,
408
- "learning_rate": 0.0001128900706169339,
409
- "loss": 1.5629,
410
- "step": 295
411
- },
412
- {
413
- "epoch": 6.38,
414
- "learning_rate": 9.325537321665346e-05,
415
- "loss": 1.5448,
416
- "step": 300
417
- },
418
- {
419
- "epoch": 6.49,
420
- "learning_rate": 7.089226742272658e-05,
421
- "loss": 1.5643,
422
- "step": 305
423
- },
424
- {
425
- "epoch": 6.6,
426
- "learning_rate": 4.827549515124539e-05,
427
- "loss": 1.5585,
428
- "step": 310
429
- },
430
- {
431
- "epoch": 6.7,
432
- "learning_rate": 2.7907869511566348e-05,
433
- "loss": 1.724,
434
- "step": 315
435
- },
436
- {
437
- "epoch": 6.81,
438
- "learning_rate": 1.2043308960555334e-05,
439
- "loss": 1.3896,
440
- "step": 320
441
- },
442
- {
443
- "epoch": 6.91,
444
- "learning_rate": 2.437414591269752e-06,
445
- "loss": 1.4656,
446
- "step": 325
447
- },
448
- {
449
- "epoch": 7.0,
450
- "eval_loss": 1.7848814725875854,
451
- "eval_runtime": 1.6359,
452
- "eval_samples_per_second": 40.956,
453
- "eval_steps_per_second": 5.502,
454
- "step": 329
455
- },
456
- {
457
- "epoch": 7.02,
458
- "learning_rate": 1.5319212645167772e-07,
459
- "loss": 1.4519,
460
- "step": 330
461
- },
462
- {
463
- "epoch": 7.13,
464
- "learning_rate": 5.443417773996994e-06,
465
- "loss": 1.5894,
466
- "step": 335
467
- },
468
- {
469
- "epoch": 7.23,
470
- "learning_rate": 1.772266555255011e-05,
471
- "loss": 1.377,
472
- "step": 340
473
- },
474
- {
475
- "epoch": 7.34,
476
- "learning_rate": 3.563209159419346e-05,
477
- "loss": 1.4357,
478
- "step": 345
479
- },
480
- {
481
- "epoch": 7.45,
482
- "learning_rate": 5.718980627559723e-05,
483
- "loss": 1.5269,
484
- "step": 350
485
- },
486
- {
487
- "epoch": 7.55,
488
- "learning_rate": 8.001019372440265e-05,
489
- "loss": 1.4501,
490
- "step": 355
491
- },
492
- {
493
- "epoch": 7.66,
494
- "learning_rate": 0.00010156790840580641,
495
- "loss": 1.2612,
496
- "step": 360
497
- },
498
- {
499
- "epoch": 7.77,
500
- "learning_rate": 0.0001194773344474498,
501
- "loss": 1.3453,
502
- "step": 365
503
- },
504
- {
505
- "epoch": 7.87,
506
- "learning_rate": 0.00013175658222600294,
507
- "loss": 1.4728,
508
- "step": 370
509
- },
510
- {
511
- "epoch": 7.98,
512
- "learning_rate": 0.00013704680787354832,
513
- "loss": 1.4687,
514
- "step": 375
515
- },
516
- {
517
- "epoch": 8.0,
518
- "eval_loss": 1.76621675491333,
519
- "eval_runtime": 1.6517,
520
- "eval_samples_per_second": 40.565,
521
- "eval_steps_per_second": 5.449,
522
- "step": 376
523
- },
524
- {
525
- "epoch": 8.09,
526
- "learning_rate": 0.00013476258540873022,
527
- "loss": 1.4856,
528
- "step": 380
529
- },
530
- {
531
- "epoch": 8.19,
532
- "learning_rate": 0.00012515669103944473,
533
- "loss": 1.2361,
534
- "step": 385
535
- },
536
- {
537
- "epoch": 8.3,
538
- "learning_rate": 0.00010929213048843395,
539
- "loss": 1.1886,
540
- "step": 390
541
- },
542
- {
543
- "epoch": 8.4,
544
- "learning_rate": 8.892450484875472e-05,
545
- "loss": 1.3658,
546
- "step": 395
547
- },
548
- {
549
- "epoch": 8.51,
550
- "learning_rate": 6.630773257727356e-05,
551
- "loss": 1.3489,
552
- "step": 400
553
- },
554
- {
555
- "epoch": 8.62,
556
- "learning_rate": 4.394462678334666e-05,
557
- "loss": 1.2336,
558
- "step": 405
559
- },
560
- {
561
- "epoch": 8.72,
562
- "learning_rate": 2.4309929383066207e-05,
563
- "loss": 1.2826,
564
- "step": 410
565
- },
566
- {
567
- "epoch": 8.83,
568
- "learning_rate": 9.576451662754362e-06,
569
- "loss": 1.381,
570
- "step": 415
571
- },
572
- {
573
- "epoch": 8.94,
574
- "learning_rate": 1.3746270344902175e-06,
575
- "loss": 1.2151,
576
- "step": 420
577
- },
578
- {
579
- "epoch": 9.0,
580
- "eval_loss": 1.6784825325012207,
581
- "eval_runtime": 1.678,
582
- "eval_samples_per_second": 39.927,
583
- "eval_steps_per_second": 5.363,
584
- "step": 423
585
  }
586
  ],
587
- "max_steps": 470,
588
- "num_train_epochs": 10,
589
- "total_flos": 439754489856000.0,
590
  "trial_name": null,
591
  "trial_params": null
592
  }
 
1
  {
2
+ "best_metric": 1.5699154138565063,
3
+ "best_model_checkpoint": "output/headie-one/checkpoint-235",
4
+ "epoch": 5.0,
5
+ "global_step": 235,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.11,
12
+ "learning_rate": 0.00013340429968430325,
13
+ "loss": 1.271,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.21,
18
+ "learning_rate": 0.00012243723783011623,
19
+ "loss": 1.1922,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.32,
24
+ "learning_rate": 0.00010551244942700729,
25
+ "loss": 1.0699,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.43,
30
+ "learning_rate": 8.450286230835261e-05,
31
+ "loss": 1.0247,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.53,
36
+ "learning_rate": 6.17334355545868e-05,
37
+ "loss": 0.9594,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.64,
42
+ "learning_rate": 3.9723875297416415e-05,
43
+ "loss": 0.9693,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.74,
48
+ "learning_rate": 2.090979946151953e-05,
49
+ "loss": 1.106,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.85,
54
+ "learning_rate": 7.373207860012262e-06,
55
+ "loss": 1.0108,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.96,
60
  "learning_rate": 6.1208431258209e-07,
61
+ "loss": 1.0614,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 1.0,
66
+ "eval_loss": 1.6016333103179932,
67
+ "eval_runtime": 1.5387,
68
+ "eval_samples_per_second": 43.543,
69
+ "eval_steps_per_second": 5.849,
70
  "step": 47
71
  },
72
  {
73
  "epoch": 1.06,
74
+ "learning_rate": 1.3746270344901794e-06,
75
+ "loss": 1.1841,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 1.17,
80
+ "learning_rate": 9.5764516627544e-06,
81
+ "loss": 1.0057,
82
  "step": 55
83
  },
84
  {
85
  "epoch": 1.28,
86
+ "learning_rate": 2.4309929383065895e-05,
87
+ "loss": 0.8867,
88
  "step": 60
89
  },
90
  {
91
  "epoch": 1.38,
92
+ "learning_rate": 4.394462678334651e-05,
93
+ "loss": 1.0264,
94
  "step": 65
95
  },
96
  {
97
  "epoch": 1.49,
98
+ "learning_rate": 6.630773257727362e-05,
99
+ "loss": 0.9567,
100
  "step": 70
101
  },
102
  {
103
  "epoch": 1.6,
104
+ "learning_rate": 8.892450484875433e-05,
105
+ "loss": 0.8565,
106
  "step": 75
107
  },
108
  {
109
  "epoch": 1.7,
110
+ "learning_rate": 0.00010929213048843382,
111
+ "loss": 1.0029,
112
  "step": 80
113
  },
114
  {
115
  "epoch": 1.81,
116
+ "learning_rate": 0.00012515669103944463,
117
+ "loss": 1.0274,
118
  "step": 85
119
  },
120
  {
121
  "epoch": 1.91,
122
  "learning_rate": 0.00013476258540873022,
123
+ "loss": 0.9839,
124
  "step": 90
125
  },
126
  {
127
  "epoch": 2.0,
128
+ "eval_loss": 1.6458494663238525,
129
+ "eval_runtime": 1.5583,
130
+ "eval_samples_per_second": 42.996,
131
+ "eval_steps_per_second": 5.776,
132
  "step": 94
133
  },
134
  {
135
  "epoch": 2.02,
136
  "learning_rate": 0.00013704680787354832,
137
+ "loss": 0.9767,
138
  "step": 95
139
  },
140
  {
141
  "epoch": 2.13,
142
  "learning_rate": 0.00013175658222600302,
143
+ "loss": 1.1312,
144
  "step": 100
145
  },
146
  {
147
  "epoch": 2.23,
148
+ "learning_rate": 0.00011947733444745008,
149
+ "loss": 0.7833,
150
  "step": 105
151
  },
152
  {
153
  "epoch": 2.34,
154
+ "learning_rate": 0.00010156790840580658,
155
+ "loss": 0.8376,
156
  "step": 110
157
  },
158
  {
159
  "epoch": 2.45,
160
+ "learning_rate": 8.001019372440257e-05,
161
+ "loss": 0.9393,
162
  "step": 115
163
  },
164
  {
165
  "epoch": 2.55,
166
+ "learning_rate": 5.71898062755974e-05,
167
+ "loss": 0.8203,
168
  "step": 120
169
  },
170
  {
171
  "epoch": 2.66,
172
+ "learning_rate": 3.5632091594193825e-05,
173
+ "loss": 0.9363,
174
  "step": 125
175
  },
176
  {
177
  "epoch": 2.77,
178
+ "learning_rate": 1.7722665552550217e-05,
179
+ "loss": 0.9023,
180
  "step": 130
181
  },
182
  {
183
  "epoch": 2.87,
184
+ "learning_rate": 5.443417773996963e-06,
185
+ "loss": 0.7999,
186
  "step": 135
187
  },
188
  {
189
  "epoch": 2.98,
190
+ "learning_rate": 1.5319212645170819e-07,
191
+ "loss": 0.8643,
192
  "step": 140
193
  },
194
  {
195
  "epoch": 3.0,
196
+ "eval_loss": 1.5993356704711914,
197
+ "eval_runtime": 1.5942,
198
+ "eval_samples_per_second": 42.027,
199
+ "eval_steps_per_second": 5.645,
200
  "step": 141
201
  },
202
  {
203
  "epoch": 3.09,
204
+ "learning_rate": 2.4374145912697747e-06,
205
+ "loss": 0.6501,
206
  "step": 145
207
  },
208
  {
209
  "epoch": 3.19,
210
+ "learning_rate": 1.204330896055538e-05,
211
+ "loss": 0.7052,
212
  "step": 150
213
  },
214
  {
215
  "epoch": 3.3,
216
+ "learning_rate": 2.7907869511566212e-05,
217
+ "loss": 0.9473,
218
  "step": 155
219
  },
220
  {
221
  "epoch": 3.4,
222
+ "learning_rate": 4.8275495151245236e-05,
223
+ "loss": 0.7318,
224
  "step": 160
225
  },
226
  {
227
  "epoch": 3.51,
228
+ "learning_rate": 7.08922674227264e-05,
229
+ "loss": 0.7595,
230
  "step": 165
231
  },
232
  {
233
  "epoch": 3.62,
234
+ "learning_rate": 9.325537321665307e-05,
235
+ "loss": 0.8141,
236
  "step": 170
237
  },
238
  {
239
  "epoch": 3.72,
240
+ "learning_rate": 0.00011289007061693377,
241
+ "loss": 0.7721,
242
  "step": 175
243
  },
244
  {
245
  "epoch": 3.83,
246
+ "learning_rate": 0.0001276235483372456,
247
+ "loss": 0.8703,
248
  "step": 180
249
  },
250
  {
251
  "epoch": 3.94,
252
+ "learning_rate": 0.00013582537296550983,
253
+ "loss": 0.8111,
254
  "step": 185
255
  },
256
  {
257
  "epoch": 4.0,
258
+ "eval_loss": 1.582382082939148,
259
+ "eval_runtime": 1.6284,
260
+ "eval_samples_per_second": 41.144,
261
+ "eval_steps_per_second": 5.527,
262
  "step": 188
263
  },
264
  {
265
  "epoch": 4.04,
266
+ "learning_rate": 0.00013658791568741795,
267
+ "loss": 0.8595,
268
  "step": 190
269
  },
270
  {
271
  "epoch": 4.15,
272
+ "learning_rate": 0.00012982679213998795,
273
+ "loss": 0.6313,
274
  "step": 195
275
  },
276
  {
277
  "epoch": 4.26,
278
+ "learning_rate": 0.00011629020053848043,
279
+ "loss": 0.8127,
280
  "step": 200
281
  },
282
  {
283
  "epoch": 4.36,
284
+ "learning_rate": 9.747612470258399e-05,
285
+ "loss": 0.8002,
286
  "step": 205
287
  },
288
  {
289
  "epoch": 4.47,
290
+ "learning_rate": 7.546656444541318e-05,
291
+ "loss": 0.7541,
292
  "step": 210
293
  },
294
  {
295
  "epoch": 4.57,
296
+ "learning_rate": 5.269713769164736e-05,
297
+ "loss": 0.7412,
298
  "step": 215
299
  },
300
  {
301
  "epoch": 4.68,
302
+ "learning_rate": 3.1687550572992684e-05,
303
+ "loss": 0.6833,
304
  "step": 220
305
  },
306
  {
307
  "epoch": 4.79,
308
+ "learning_rate": 1.4762762169884062e-05,
309
+ "loss": 0.7674,
310
  "step": 225
311
  },
312
  {
313
  "epoch": 4.89,
314
+ "learning_rate": 3.795700315696802e-06,
315
+ "loss": 0.6441,
316
  "step": 230
317
  },
318
  {
319
  "epoch": 5.0,
320
  "learning_rate": 0.0,
321
+ "loss": 0.761,
322
  "step": 235
323
  },
324
  {
325
  "epoch": 5.0,
326
+ "eval_loss": 1.5699154138565063,
327
+ "eval_runtime": 1.6286,
328
+ "eval_samples_per_second": 41.139,
329
+ "eval_steps_per_second": 5.526,
330
  "step": 235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  }
332
  ],
333
+ "max_steps": 893,
334
+ "num_train_epochs": 19,
335
+ "total_flos": 244308049920000.0,
336
  "trial_name": null,
337
  "trial_params": null
338
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaa944c20cbf502e00ccd36c3c1dbba26621b29241ccaca9e30360a750f15bf5
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069f57f162316ae065f1862c62495b43d5890dbecac1d9475fdbff65ca730f07
3
  size 3311