anhtrandc commited on
Commit
b3e36f9
1 Parent(s): 49c342a
phanthiet_expert_1000_steps/checkpoint-120/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5e3270eb38f5b931d4ddfc95f7861e9f7f588fbb8194e8a92bc881af805048b
3
- size 815418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79467403667880b8dd7de3cd218fedbcd13d613cd918f2db785c3e1a6c6b3a8
3
+ size 815482
phanthiet_expert_1000_steps/checkpoint-120/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a17b4f6f8cb5c27490e265dad314b411c637b323ea4db8230d1820d76d5e63fd
3
  size 281733156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15c9bfa779e073110aed2fcbf57d3ec60d14f9f39ee94d2905469abfbdd688b
3
  size 281733156
phanthiet_expert_1000_steps/checkpoint-120/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02077e133ef8a3123ba4dbbb5f9833947bafbe71c53913b566680ee9b5c2193
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111dc4206561b762e6dbb65e6bdfefd257af178f84c247f75215e194fc055a81
3
  size 13990
phanthiet_expert_1000_steps/checkpoint-120/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 4.078540325164795,
3
  "best_model_checkpoint": "phanthiet_expert_1000_steps/checkpoint-120",
4
  "epoch": 1.6666666666666665,
5
  "eval_steps": 120,
@@ -13,1079 +13,1079 @@
13
  "flops": 0.0,
14
  "iter_time": 0.0,
15
  "learning_rate": 1e-05,
16
- "loss": 4.1924,
17
  "remaining_time": 0.0,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 0.03,
22
- "flops": 757182085336.3389,
23
- "iter_time": 2.8997883796691895,
24
  "learning_rate": 9.989989989989992e-06,
25
- "loss": 4.19275,
26
- "remaining_time": 2893.988802909851,
27
  "step": 2
28
  },
29
  {
30
  "epoch": 0.04,
31
- "flops": 676747242321.8419,
32
- "iter_time": 3.244442939758301,
33
  "learning_rate": 9.979979979979981e-06,
34
- "loss": 4.1865745,
35
- "remaining_time": 3234.709610939026,
36
  "step": 3
37
  },
38
  {
39
  "epoch": 0.06,
40
- "flops": 727083157598.7356,
41
- "iter_time": 3.0198303858439126,
42
  "learning_rate": 9.96996996996997e-06,
43
- "loss": 4.178702755,
44
- "remaining_time": 3007.751064300537,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.07,
49
- "flops": 770273224897.2733,
50
- "iter_time": 2.8505051732063293,
51
  "learning_rate": 9.95995995995996e-06,
52
- "loss": 4.17005772745,
53
- "remaining_time": 2836.2526473402977,
54
  "step": 5
55
  },
56
  {
57
  "epoch": 0.08,
58
- "flops": 770617895373.6494,
59
- "iter_time": 2.849230241775513,
60
  "learning_rate": 9.949949949949951e-06,
61
- "loss": 4.1585081501755,
62
- "remaining_time": 2832.13486032486,
63
  "step": 6
64
  },
65
  {
66
  "epoch": 0.1,
67
- "flops": 772366303187.6844,
68
- "iter_time": 2.842780431111654,
69
  "learning_rate": 9.93993993993994e-06,
70
- "loss": 4.147722068673745,
71
- "remaining_time": 2822.880968093872,
72
  "step": 7
73
  },
74
  {
75
  "epoch": 0.11,
76
- "flops": 744854726618.0027,
77
- "iter_time": 2.9477799279349193,
78
  "learning_rate": 9.929929929929931e-06,
79
- "loss": 4.131777847987007,
80
- "remaining_time": 2924.19768851144,
81
  "step": 8
82
  },
83
  {
84
  "epoch": 0.12,
85
- "flops": 757775980856.7782,
86
- "iter_time": 2.8975157141685486,
87
  "learning_rate": 9.91991991991992e-06,
88
- "loss": 4.109818069507138,
89
- "remaining_time": 2871.4380727410316,
90
  "step": 9
91
  },
92
  {
93
  "epoch": 0.14,
94
- "flops": 772465453346.2623,
95
- "iter_time": 2.842415544721815,
96
  "learning_rate": 9.90990990990991e-06,
97
- "loss": 4.095491888812067,
98
- "remaining_time": 2813.991389274597,
99
  "step": 10
100
  },
101
  {
102
  "epoch": 0.15,
103
- "flops": 777000101550.8723,
104
- "iter_time": 2.8258269309997557,
105
  "learning_rate": 9.899899899899901e-06,
106
- "loss": 4.079873969923947,
107
- "remaining_time": 2794.7428347587584,
108
  "step": 11
109
  },
110
  {
111
  "epoch": 0.17,
112
- "flops": 778757974714.995,
113
- "iter_time": 2.8194482543251733,
114
  "learning_rate": 9.88988988988989e-06,
115
- "loss": 4.063009230224707,
116
- "remaining_time": 2785.614875273271,
117
  "step": 12
118
  },
119
  {
120
  "epoch": 0.18,
121
- "flops": 762857729109.9298,
122
- "iter_time": 2.8782140215237937,
123
  "learning_rate": 9.879879879879881e-06,
124
- "loss": 4.04228313792246,
125
- "remaining_time": 2840.797239243984,
126
  "step": 13
127
  },
128
  {
129
  "epoch": 0.19,
130
- "flops": 758247851432.5574,
131
- "iter_time": 2.8957125406998854,
132
  "learning_rate": 9.86986986986987e-06,
133
- "loss": 4.023953306543236,
134
- "remaining_time": 2855.172565130087,
135
  "step": 14
136
  },
137
  {
138
  "epoch": 0.21,
139
- "flops": 762440152593.9678,
140
- "iter_time": 2.8797903742109026,
141
  "learning_rate": 9.85985985985986e-06,
142
- "loss": 4.007716773477804,
143
- "remaining_time": 2836.593518597739,
144
  "step": 15
145
  },
146
  {
147
  "epoch": 0.22,
148
- "flops": 768251722267.9072,
149
- "iter_time": 2.858005714416504,
150
  "learning_rate": 9.849849849849851e-06,
151
- "loss": 3.9873606057430258,
152
- "remaining_time": 2812.27762298584,
153
  "step": 16
154
  },
155
  {
156
  "epoch": 0.24,
157
- "flops": 771504390688.7838,
158
- "iter_time": 2.845956340432167,
159
  "learning_rate": 9.83983983983984e-06,
160
- "loss": 3.9675569996855953,
161
- "remaining_time": 2797.57508264482,
162
  "step": 17
163
  },
164
  {
165
  "epoch": 0.25,
166
- "flops": 738418300379.48,
167
- "iter_time": 2.9734742641448975,
168
  "learning_rate": 9.829829829829831e-06,
169
- "loss": 3.9488584296887392,
170
- "remaining_time": 2919.9517273902893,
171
  "step": 18
172
  },
173
  {
174
  "epoch": 0.26,
175
- "flops": 720531917318.4235,
176
- "iter_time": 3.0472873714235096,
177
  "learning_rate": 9.81981981981982e-06,
178
- "loss": 3.9315318453918517,
179
- "remaining_time": 2989.3889113664627,
180
  "step": 19
181
  },
182
  {
183
  "epoch": 0.28,
184
- "flops": 705425117467.2936,
185
- "iter_time": 3.112545553006624,
186
  "learning_rate": 9.80980980980981e-06,
187
- "loss": 3.909895526937933,
188
- "remaining_time": 3050.2946419464915,
189
  "step": 20
190
  },
191
  {
192
  "epoch": 0.29,
193
- "flops": 706196359318.664,
194
- "iter_time": 3.1091463208198546,
195
  "learning_rate": 9.799799799799801e-06,
196
- "loss": 3.8902945716685537,
197
- "remaining_time": 3043.8542480826377,
198
  "step": 21
199
  },
200
  {
201
  "epoch": 0.31,
202
- "flops": 698413652665.8569,
203
- "iter_time": 3.1437927995409285,
204
  "learning_rate": 9.78978978978979e-06,
205
- "loss": 3.870555625951868,
206
- "remaining_time": 3074.629357951028,
207
  "step": 22
208
  },
209
  {
210
  "epoch": 0.32,
211
- "flops": 700378763760.74,
212
- "iter_time": 3.134971997954629,
213
  "learning_rate": 9.779779779779781e-06,
214
- "loss": 3.8589570696923494,
215
- "remaining_time": 3062.8676420016723,
216
  "step": 23
217
  },
218
  {
219
  "epoch": 0.33,
220
- "flops": 703456955238.3383,
221
- "iter_time": 3.1212539672851562,
222
  "learning_rate": 9.76976976976977e-06,
223
- "loss": 3.8414114989954258,
224
- "remaining_time": 3046.3438720703125,
225
  "step": 24
226
  },
227
  {
228
  "epoch": 0.35,
229
- "flops": 706839428283.1747,
230
- "iter_time": 3.106317679087321,
231
  "learning_rate": 9.75975975975976e-06,
232
- "loss": 3.8234383840054713,
233
- "remaining_time": 3028.659737110138,
234
  "step": 25
235
  },
236
  {
237
  "epoch": 0.36,
238
- "flops": 709694052505.4406,
239
- "iter_time": 3.0938230419158934,
240
  "learning_rate": 9.749749749749751e-06,
241
- "loss": 3.8077940001654165,
242
- "remaining_time": 3013.38364282608,
243
  "step": 26
244
  },
245
  {
246
  "epoch": 0.38,
247
- "flops": 708325686130.973,
248
- "iter_time": 3.099799788915194,
249
  "learning_rate": 9.73973973973974e-06,
250
- "loss": 3.791024060163762,
251
- "remaining_time": 3016.105194614484,
252
  "step": 27
253
  },
254
  {
255
  "epoch": 0.39,
256
- "flops": 710268332115.8358,
257
- "iter_time": 3.0913215654867665,
258
  "learning_rate": 9.729729729729732e-06,
259
- "loss": 3.767240819562124,
260
- "remaining_time": 3004.764561653137,
261
  "step": 28
262
  },
263
  {
264
  "epoch": 0.4,
265
- "flops": 716185064802.5332,
266
- "iter_time": 3.065782742840903,
267
  "learning_rate": 9.719719719719721e-06,
268
- "loss": 3.7443294113665027,
269
- "remaining_time": 2976.8750432985166,
270
  "step": 29
271
  },
272
  {
273
  "epoch": 0.42,
274
- "flops": 718525354369.5928,
275
- "iter_time": 3.055797264493745,
276
  "learning_rate": 9.70970970970971e-06,
277
- "loss": 3.7250901172528375,
278
- "remaining_time": 2964.1233465589326,
279
  "step": 30
280
  },
281
  {
282
  "epoch": 0.43,
283
- "flops": 724075264572.7759,
284
- "iter_time": 3.032375113169352,
285
  "learning_rate": 9.699699699699701e-06,
286
- "loss": 3.707127216080309,
287
- "remaining_time": 2938.3714846611024,
288
  "step": 31
289
  },
290
  {
291
  "epoch": 0.44,
292
- "flops": 721408600240.4366,
293
- "iter_time": 3.0435841929528022,
294
  "learning_rate": 9.68968968968969e-06,
295
- "loss": 3.685780943919506,
296
- "remaining_time": 2946.1894987783126,
297
  "step": 32
298
  },
299
  {
300
  "epoch": 0.46,
301
- "flops": 714269321692.9258,
302
- "iter_time": 3.0740054845809937,
303
  "learning_rate": 9.67967967967968e-06,
304
- "loss": 3.668101134480311,
305
- "remaining_time": 2972.563303589821,
306
  "step": 33
307
  },
308
  {
309
  "epoch": 0.47,
310
- "flops": 716119108432.6882,
311
- "iter_time": 3.066065109137333,
312
  "learning_rate": 9.669669669669671e-06,
313
- "loss": 3.651918123135508,
314
- "remaining_time": 2961.818895426664,
315
  "step": 34
316
  },
317
  {
318
  "epoch": 0.49,
319
- "flops": 721543850839.2638,
320
- "iter_time": 3.0430136848898495,
321
  "learning_rate": 9.65965965965966e-06,
322
- "loss": 3.632711941904153,
323
- "remaining_time": 2936.508205918705,
324
  "step": 35
325
  },
326
  {
327
  "epoch": 0.5,
328
- "flops": 725619778467.1793,
329
- "iter_time": 3.0259205681937082,
330
  "learning_rate": 9.649649649649651e-06,
331
- "loss": 3.613469822485111,
332
- "remaining_time": 2916.987427738735,
333
  "step": 36
334
  },
335
  {
336
  "epoch": 0.51,
337
- "flops": 723057841885.0858,
338
- "iter_time": 3.0366420017348394,
339
  "learning_rate": 9.63963963963964e-06,
340
- "loss": 3.59803012426026,
341
- "remaining_time": 2924.2862476706505,
342
  "step": 37
343
  },
344
  {
345
  "epoch": 0.53,
346
- "flops": 723865047067.9672,
347
- "iter_time": 3.0332557446247823,
348
  "learning_rate": 9.62962962962963e-06,
349
- "loss": 3.5883728230176573,
350
- "remaining_time": 2917.9920263290405,
351
  "step": 38
352
  },
353
  {
354
  "epoch": 0.54,
355
- "flops": 724833511698.737,
356
- "iter_time": 3.0292029506281803,
357
  "learning_rate": 9.61961961961962e-06,
358
- "loss": 3.5712650947874804,
359
- "remaining_time": 2911.064035553681,
360
  "step": 39
361
  },
362
  {
363
  "epoch": 0.56,
364
- "flops": 728609540585.4153,
365
- "iter_time": 3.013504065000094,
366
  "learning_rate": 9.60960960960961e-06,
367
- "loss": 3.5493454438396057,
368
- "remaining_time": 2892.9639024000903,
369
  "step": 40
370
  },
371
  {
372
  "epoch": 0.57,
373
- "flops": 730835767693.7731,
374
- "iter_time": 3.004324513673782,
375
  "learning_rate": 9.5995995995996e-06,
376
- "loss": 3.5320349894012097,
377
- "remaining_time": 2881.147208613157,
378
  "step": 41
379
  },
380
  {
381
  "epoch": 0.58,
382
- "flops": 731234413380.5944,
383
- "iter_time": 3.002686651741586,
384
  "learning_rate": 9.58958958958959e-06,
385
- "loss": 3.5149416395071977,
386
- "remaining_time": 2876.5738123684396,
387
  "step": 42
388
  },
389
  {
390
  "epoch": 0.6,
391
- "flops": 727368192337.5139,
392
- "iter_time": 3.018647000903175,
393
  "learning_rate": 9.57957957957958e-06,
394
- "loss": 3.498084223112126,
395
- "remaining_time": 2888.845179864338,
396
  "step": 43
397
  },
398
  {
399
  "epoch": 0.61,
400
- "flops": 705155915133.6782,
401
- "iter_time": 3.1137338072754615,
402
  "learning_rate": 9.56956956956957e-06,
403
- "loss": 3.4823423808810046,
404
- "remaining_time": 2976.7295197553412,
405
  "step": 44
406
  },
407
  {
408
  "epoch": 0.62,
409
- "flops": 687670387605.7764,
410
- "iter_time": 3.192907317118211,
411
  "learning_rate": 9.55955955955956e-06,
412
- "loss": 3.4651959570721944,
413
- "remaining_time": 3049.226487847892,
414
  "step": 45
415
  },
416
  {
417
  "epoch": 0.64,
418
- "flops": 687719642974.3138,
419
- "iter_time": 3.1926786369747586,
420
  "learning_rate": 9.54954954954955e-06,
421
- "loss": 3.4445809975014727,
422
- "remaining_time": 3045.8154196739197,
423
  "step": 46
424
  },
425
  {
426
  "epoch": 0.65,
427
- "flops": 688252847108.6973,
428
- "iter_time": 3.1902052008587383,
429
  "learning_rate": 9.53953953953954e-06,
430
- "loss": 3.425859187526458,
431
- "remaining_time": 3040.2655564183774,
432
  "step": 47
433
  },
434
  {
435
  "epoch": 0.67,
436
- "flops": 690645296211.3798,
437
- "iter_time": 3.179154081547514,
438
  "learning_rate": 9.52952952952953e-06,
439
- "loss": 3.413328595651193,
440
- "remaining_time": 3026.5546856332335,
441
  "step": 48
442
  },
443
  {
444
  "epoch": 0.68,
445
- "flops": 691543847203.3143,
446
- "iter_time": 3.1750232776006064,
447
  "learning_rate": 9.51951951951952e-06,
448
- "loss": 3.3954373096946813,
449
- "remaining_time": 3019.4471369981766,
450
  "step": 49
451
  },
452
  {
453
  "epoch": 0.69,
454
- "flops": 689714760670.2722,
455
- "iter_time": 3.1834432689511045,
456
  "learning_rate": 9.50950950950951e-06,
457
- "loss": 3.3732359365977347,
458
- "remaining_time": 3024.2711055035493,
459
  "step": 50
460
  },
461
  {
462
  "epoch": 0.71,
463
- "flops": 691448472024.5978,
464
- "iter_time": 3.1754612255096437,
465
  "learning_rate": 9.4994994994995e-06,
466
- "loss": 3.3588975772317573,
467
- "remaining_time": 3013.512703008652,
468
  "step": 51
469
  },
470
  {
471
  "epoch": 0.72,
472
- "flops": 692258843884.0684,
473
- "iter_time": 3.17174396795385,
474
  "learning_rate": 9.489489489489491e-06,
475
- "loss": 3.3495406014594398,
476
- "remaining_time": 3006.81328162025,
477
  "step": 52
478
  },
479
  {
480
  "epoch": 0.74,
481
- "flops": 694408407311.5348,
482
- "iter_time": 3.161925733089447,
483
  "learning_rate": 9.47947947947948e-06,
484
- "loss": 3.3294171954448455,
485
- "remaining_time": 2994.3436692357063,
486
  "step": 53
487
  },
488
  {
489
  "epoch": 0.75,
490
- "flops": 696442793706.463,
491
- "iter_time": 3.1526893984596684,
492
  "learning_rate": 9.46946946946947e-06,
493
- "loss": 3.315428023490397,
494
- "remaining_time": 2982.4441709428465,
495
  "step": 54
496
  },
497
  {
498
  "epoch": 0.76,
499
- "flops": 693614196554.918,
500
- "iter_time": 3.1655462406299733,
501
  "learning_rate": 9.45945945945946e-06,
502
- "loss": 3.296744743255493,
503
- "remaining_time": 2991.4411973953247,
504
  "step": 55
505
  },
506
  {
507
  "epoch": 0.78,
508
- "flops": 693836694069.1173,
509
- "iter_time": 3.164531122554432,
510
  "learning_rate": 9.44944944944945e-06,
511
- "loss": 3.2839522958229383,
512
- "remaining_time": 2987.317379691384,
513
  "step": 56
514
  },
515
  {
516
  "epoch": 0.79,
517
- "flops": 696555431701.5421,
518
- "iter_time": 3.1521795860358646,
519
  "learning_rate": 9.439439439439441e-06,
520
- "loss": 3.2731757728647093,
521
- "remaining_time": 2972.50534963182,
522
  "step": 57
523
  },
524
  {
525
  "epoch": 0.81,
526
- "flops": 698400801361.5933,
527
- "iter_time": 3.143850648612307,
528
  "learning_rate": 9.42942942942943e-06,
529
- "loss": 3.264027015136062,
530
- "remaining_time": 2961.507310992793,
531
  "step": 58
532
  },
533
  {
534
  "epoch": 0.82,
535
- "flops": 695816271468.8467,
536
- "iter_time": 3.1555281219811273,
537
  "learning_rate": 9.41941941941942e-06,
538
- "loss": 3.2479237449847016,
539
- "remaining_time": 2969.3519627842406,
540
  "step": 59
541
  },
542
  {
543
  "epoch": 0.83,
544
- "flops": 695139766846.9948,
545
- "iter_time": 3.1585990574400302,
546
  "learning_rate": 9.40940940940941e-06,
547
- "loss": 3.2323745075348542,
548
- "remaining_time": 2969.0831139936286,
549
  "step": 60
550
  },
551
  {
552
  "epoch": 0.85,
553
- "flops": 696063289567.6348,
554
- "iter_time": 3.154408291975657,
555
  "learning_rate": 9.3993993993994e-06,
556
- "loss": 3.217518762459506,
557
- "remaining_time": 2961.989386165142,
558
  "step": 61
559
  },
560
  {
561
  "epoch": 0.86,
562
- "flops": 698313041409.4874,
563
- "iter_time": 3.144245749614278,
564
  "learning_rate": 9.389389389389391e-06,
565
- "loss": 3.2017655748349108,
566
- "remaining_time": 2949.302513138193,
567
  "step": 62
568
  },
569
  {
570
  "epoch": 0.88,
571
- "flops": 700518921438.2297,
572
- "iter_time": 3.1343447623714322,
573
  "learning_rate": 9.37937937937938e-06,
574
- "loss": 3.1927049190865615,
575
- "remaining_time": 2936.881042342032,
576
  "step": 63
577
  },
578
  {
579
  "epoch": 0.89,
580
- "flops": 699955695586.3307,
581
- "iter_time": 3.1368668419974193,
582
  "learning_rate": 9.36936936936937e-06,
583
- "loss": 3.1761248698956956,
584
- "remaining_time": 2936.1073641095845,
585
  "step": 64
586
  },
587
  {
588
  "epoch": 0.9,
589
- "flops": 698475179757.6893,
590
- "iter_time": 3.14351586997509,
591
  "learning_rate": 9.35935935935936e-06,
592
- "loss": 3.1596636211967386,
593
- "remaining_time": 2939.187338426709,
594
  "step": 65
595
  },
596
  {
597
  "epoch": 0.92,
598
- "flops": 699134034968.3898,
599
- "iter_time": 3.140553459754357,
600
  "learning_rate": 9.34934934934935e-06,
601
- "loss": 3.142600984984771,
602
- "remaining_time": 2933.2769314105694,
603
  "step": 66
604
  },
605
  {
606
  "epoch": 0.93,
607
- "flops": 700150815873.6282,
608
- "iter_time": 3.135992649829749,
609
  "learning_rate": 9.339339339339341e-06,
610
- "loss": 3.1258319751349233,
611
- "remaining_time": 2925.881142291156,
612
  "step": 67
613
  },
614
  {
615
  "epoch": 0.94,
616
- "flops": 700424097005.2478,
617
- "iter_time": 3.1347690945240987,
618
  "learning_rate": 9.32932932932933e-06,
619
- "loss": 3.112117655383574,
620
- "remaining_time": 2921.60479609646,
621
  "step": 68
622
  },
623
  {
624
  "epoch": 0.96,
625
- "flops": 699008293411.1104,
626
- "iter_time": 3.1411184002371395,
627
  "learning_rate": 9.31931931931932e-06,
628
- "loss": 3.0978904788297386,
629
- "remaining_time": 2924.381230620777,
630
  "step": 69
631
  },
632
  {
633
  "epoch": 0.97,
634
- "flops": 697583367916.462,
635
- "iter_time": 3.1475346364836763,
636
  "learning_rate": 9.30930930930931e-06,
637
- "loss": 3.0854575740414414,
638
- "remaining_time": 2927.207211929819,
639
  "step": 70
640
  },
641
  {
642
  "epoch": 0.99,
643
- "flops": 697764677146.6968,
644
- "iter_time": 3.1467167718069895,
645
  "learning_rate": 9.2992992992993e-06,
646
- "loss": 3.075498998301027,
647
- "remaining_time": 2923.2998810086933,
648
  "step": 71
649
  },
650
  {
651
  "epoch": 1.0,
652
- "flops": 699444895699.4308,
653
- "iter_time": 3.1391576746819725,
654
  "learning_rate": 9.289289289289291e-06,
655
- "loss": 3.057908008318017,
656
- "remaining_time": 2913.1383221048704,
657
  "step": 72
658
  },
659
  {
660
  "epoch": 1.01,
661
- "flops": 699980437055.6069,
662
- "iter_time": 3.136755966477924,
663
  "learning_rate": 9.27927927927928e-06,
664
- "loss": 3.0401639282348367,
665
- "remaining_time": 2907.7727809250355,
666
  "step": 73
667
  },
668
  {
669
  "epoch": 1.03,
670
- "flops": 697627999489.0051,
671
- "iter_time": 3.147333269249903,
672
  "learning_rate": 9.26926926926927e-06,
673
- "loss": 3.0235682889524886,
674
- "remaining_time": 2914.43060732541,
675
  "step": 74
676
  },
677
  {
678
  "epoch": 1.04,
679
- "flops": 697874897509.5933,
680
- "iter_time": 3.1462197883709058,
681
  "learning_rate": 9.25925925925926e-06,
682
- "loss": 3.005071606062964,
683
- "remaining_time": 2910.2533042430878,
684
  "step": 75
685
  },
686
  {
687
  "epoch": 1.06,
688
- "flops": 698043278910.083,
689
- "iter_time": 3.145460859934489,
690
  "learning_rate": 9.24924924924925e-06,
691
- "loss": 2.988480890002334,
692
- "remaining_time": 2906.405834579468,
693
  "step": 76
694
  },
695
  {
696
  "epoch": 1.07,
697
- "flops": 698881768860.8909,
698
- "iter_time": 3.1416870637943872,
699
  "learning_rate": 9.239239239239241e-06,
700
- "loss": 2.970070081102311,
701
- "remaining_time": 2899.7771598822196,
702
  "step": 77
703
  },
704
  {
705
  "epoch": 1.08,
706
- "flops": 698044094641.9747,
707
- "iter_time": 3.1454571841599104,
708
  "learning_rate": 9.229229229229229e-06,
709
- "loss": 2.948763380291288,
710
- "remaining_time": 2900.1115237954373,
711
  "step": 78
712
  },
713
  {
714
  "epoch": 1.1,
715
- "flops": 696787120860.9131,
716
- "iter_time": 3.1511314526582375,
717
  "learning_rate": 9.21921921921922e-06,
718
- "loss": 2.926188746488375,
719
- "remaining_time": 2902.192067898237,
720
  "step": 79
721
  },
722
  {
723
  "epoch": 1.11,
724
- "flops": 698057060084.2756,
725
- "iter_time": 3.145398761652693,
726
  "learning_rate": 9.20920920920921e-06,
727
- "loss": 2.9093348590234913,
728
- "remaining_time": 2893.766860720478,
729
  "step": 80
730
  },
731
  {
732
  "epoch": 1.12,
733
- "flops": 699718176378.4906,
734
- "iter_time": 3.137931650876999,
735
  "learning_rate": 9.1991991991992e-06,
736
- "loss": 2.8889005104332566,
737
- "remaining_time": 2883.759187155962,
738
  "step": 81
739
  },
740
  {
741
  "epoch": 1.14,
742
- "flops": 701156882644.3876,
743
- "iter_time": 3.1314929179203363,
744
  "learning_rate": 9.189189189189191e-06,
745
- "loss": 2.872725505328924,
746
- "remaining_time": 2874.710498650869,
747
  "step": 82
748
  },
749
  {
750
  "epoch": 1.15,
751
- "flops": 700021975699.4868,
752
- "iter_time": 3.136569834337002,
753
  "learning_rate": 9.179179179179179e-06,
754
- "loss": 2.853570250275635,
755
- "remaining_time": 2876.2345380870306,
756
  "step": 83
757
  },
758
  {
759
  "epoch": 1.17,
760
- "flops": 700523623002.3307,
761
- "iter_time": 3.134323726217431,
762
  "learning_rate": 9.16916916916917e-06,
763
- "loss": 2.8393305477728785,
764
- "remaining_time": 2871.0405332151668,
765
  "step": 84
766
  },
767
  {
768
  "epoch": 1.18,
769
- "flops": 702234326687.0096,
770
- "iter_time": 3.126688241958618,
771
  "learning_rate": 9.15915915915916e-06,
772
- "loss": 2.8205122422951496,
773
- "remaining_time": 2860.9197413921356,
774
  "step": 85
775
  },
776
  {
777
  "epoch": 1.19,
778
- "flops": 703341707912.4298,
779
- "iter_time": 3.121765405991498,
780
  "learning_rate": 9.14914914914915e-06,
781
- "loss": 2.802283119872198,
782
- "remaining_time": 2853.293581076229,
783
  "step": 86
784
  },
785
  {
786
  "epoch": 1.21,
787
- "flops": 703877030298.328,
788
- "iter_time": 3.1193911973820176,
789
  "learning_rate": 9.13913913913914e-06,
790
- "loss": 2.7910102886734762,
791
- "remaining_time": 2848.0041632097823,
792
  "step": 87
793
  },
794
  {
795
  "epoch": 1.22,
796
- "flops": 702440090448.8597,
797
- "iter_time": 3.125772350135891,
798
  "learning_rate": 9.129129129129129e-06,
799
- "loss": 2.7787841857867415,
800
- "remaining_time": 2850.7043833239327,
801
  "step": 88
802
  },
803
  {
804
  "epoch": 1.24,
805
- "flops": 701763781931.5049,
806
- "iter_time": 3.128784740513021,
807
  "learning_rate": 9.11911911911912e-06,
808
- "loss": 2.761334343928874,
809
- "remaining_time": 2850.3228986073623,
810
  "step": 89
811
  },
812
  {
813
  "epoch": 1.25,
814
- "flops": 702356040935.5374,
815
- "iter_time": 3.1261464049307146,
816
  "learning_rate": 9.10910910910911e-06,
817
- "loss": 2.7461310004895854,
818
- "remaining_time": 2844.7932284869503,
819
  "step": 90
820
  },
821
  {
822
  "epoch": 1.26,
823
- "flops": 702489683649.3961,
824
- "iter_time": 3.125551682048374,
825
  "learning_rate": 9.0990990990991e-06,
826
- "loss": 2.7307716904846897,
827
- "remaining_time": 2841.126478981972,
828
  "step": 91
829
  },
830
  {
831
  "epoch": 1.28,
832
- "flops": 703261367387.8263,
833
- "iter_time": 3.122122036231743,
834
  "learning_rate": 9.08908908908909e-06,
835
- "loss": 2.7152229735798428,
836
- "remaining_time": 2834.8868088984227,
837
  "step": 92
838
  },
839
  {
840
  "epoch": 1.29,
841
- "flops": 702809610961.9054,
842
- "iter_time": 3.1241288936656453,
843
  "learning_rate": 9.079079079079079e-06,
844
- "loss": 2.6977357438440444,
845
- "remaining_time": 2833.5849065547404,
846
  "step": 93
847
  },
848
  {
849
  "epoch": 1.31,
850
- "flops": 702596630290.5204,
851
- "iter_time": 3.1250759222174205,
852
  "learning_rate": 9.06906906906907e-06,
853
- "loss": 2.6863803864056037,
854
- "remaining_time": 2831.318785528983,
855
  "step": 94
856
  },
857
  {
858
  "epoch": 1.32,
859
- "flops": 702763975976.5361,
860
- "iter_time": 3.1243317634501357,
861
  "learning_rate": 9.05905905905906e-06,
862
- "loss": 2.669635582541548,
863
- "remaining_time": 2827.520245922373,
864
  "step": 95
865
  },
866
  {
867
  "epoch": 1.33,
868
- "flops": 703784839651.5507,
869
- "iter_time": 3.119799814726177,
870
  "learning_rate": 9.04904904904905e-06,
871
- "loss": 2.6498242267161323,
872
- "remaining_time": 2820.2990325124642,
873
  "step": 96
874
  },
875
  {
876
  "epoch": 1.35,
877
- "flops": 704648669317.2474,
878
- "iter_time": 3.1159752483169236,
879
  "learning_rate": 9.03903903903904e-06,
880
- "loss": 2.6338019844489713,
881
- "remaining_time": 2813.725649230182,
882
  "step": 97
883
  },
884
  {
885
  "epoch": 1.36,
886
- "flops": 702609712192.6404,
887
- "iter_time": 3.125017736375946,
888
  "learning_rate": 9.029029029029029e-06,
889
- "loss": 2.6165949646044817,
890
- "remaining_time": 2818.7659982111036,
891
  "step": 98
892
  },
893
  {
894
  "epoch": 1.38,
895
- "flops": 702839676286.5919,
896
- "iter_time": 3.1239952530179704,
897
  "learning_rate": 9.01901901901902e-06,
898
- "loss": 2.599163014958437,
899
- "remaining_time": 2814.7197229691915,
900
  "step": 99
901
  },
902
  {
903
  "epoch": 1.39,
904
- "flops": 704200857841.23,
905
- "iter_time": 3.1179567418917262,
906
  "learning_rate": 9.00900900900901e-06,
907
- "loss": 2.587291384808853,
908
- "remaining_time": 2806.1610677025537,
909
  "step": 100
910
  },
911
  {
912
  "epoch": 1.4,
913
- "flops": 704419090417.5841,
914
- "iter_time": 3.116990783214569,
915
  "learning_rate": 8.998998998999e-06,
916
- "loss": 2.5779704709607643,
917
- "remaining_time": 2802.1747141098976,
918
  "step": 101
919
  },
920
  {
921
  "epoch": 1.42,
922
- "flops": 705240729490.4491,
923
- "iter_time": 3.113359340346686,
924
  "learning_rate": 8.98898898898899e-06,
925
- "loss": 2.5642657662511565,
926
- "remaining_time": 2795.796687631324,
927
  "step": 102
928
  },
929
  {
930
  "epoch": 1.43,
931
- "flops": 703480098987.9172,
932
- "iter_time": 3.121151281338112,
933
  "learning_rate": 8.97897897897898e-06,
934
- "loss": 2.549907108588645,
935
- "remaining_time": 2799.6726993602865,
936
  "step": 103
937
  },
938
  {
939
  "epoch": 1.44,
940
- "flops": 704343730843.7548,
941
- "iter_time": 3.117324278192613,
942
  "learning_rate": 8.96896896896897e-06,
943
- "loss": 2.5407680375027586,
944
- "remaining_time": 2793.1225532605813,
945
  "step": 104
946
  },
947
  {
948
  "epoch": 1.46,
949
- "flops": 704648916908.1643,
950
- "iter_time": 3.115974153463657,
951
  "learning_rate": 8.95895895895896e-06,
952
- "loss": 2.5316773571277307,
953
- "remaining_time": 2788.796867349973,
954
  "step": 105
955
  },
956
  {
957
  "epoch": 1.47,
958
- "flops": 705101351742.8137,
959
- "iter_time": 3.1139747596922374,
960
  "learning_rate": 8.94894894894895e-06,
961
- "loss": 2.516967583556453,
962
- "remaining_time": 2783.8934351648604,
963
  "step": 106
964
  },
965
  {
966
  "epoch": 1.49,
967
- "flops": 705340869042.1604,
968
- "iter_time": 3.11291732653132,
969
  "learning_rate": 8.93893893893894e-06,
970
- "loss": 2.505145907720889,
971
- "remaining_time": 2779.835172592469,
972
  "step": 107
973
  },
974
  {
975
  "epoch": 1.5,
976
- "flops": 703496820532.4734,
977
- "iter_time": 3.121077094122628,
978
  "learning_rate": 8.92892892892893e-06,
979
- "loss": 2.4915434486436796,
980
- "remaining_time": 2784.000767957384,
981
  "step": 108
982
  },
983
  {
984
  "epoch": 1.51,
985
- "flops": 704229250599.9259,
986
- "iter_time": 3.1178310336890043,
987
  "learning_rate": 8.91891891891892e-06,
988
- "loss": 2.479670014157243,
989
- "remaining_time": 2777.987451016903,
990
  "step": 109
991
  },
992
  {
993
  "epoch": 1.53,
994
- "flops": 704821287647.8053,
995
- "iter_time": 3.1152121123917604,
996
  "learning_rate": 8.90890890890891e-06,
997
- "loss": 2.4674833140156704,
998
- "remaining_time": 2772.5387800286667,
999
  "step": 110
1000
  },
1001
  {
1002
  "epoch": 1.54,
1003
- "flops": 705395174470.555,
1004
- "iter_time": 3.1126776760274715,
1005
  "learning_rate": 8.8988988988989e-06,
1006
- "loss": 2.451845480875514,
1007
- "remaining_time": 2767.170453988422,
1008
  "step": 111
1009
  },
1010
  {
1011
  "epoch": 1.56,
1012
- "flops": 704875976151.3732,
1013
- "iter_time": 3.114970415562123,
1014
  "learning_rate": 8.888888888888888e-06,
1015
- "loss": 2.441721026066759,
1016
- "remaining_time": 2766.093729019165,
1017
  "step": 112
1018
  },
1019
  {
1020
  "epoch": 1.57,
1021
- "flops": 704042134307.3656,
1022
- "iter_time": 3.11865967299257,
1023
  "learning_rate": 8.87887887887888e-06,
1024
- "loss": 2.432560815806091,
1025
- "remaining_time": 2766.25112994441,
1026
  "step": 113
1027
  },
1028
  {
1029
  "epoch": 1.58,
1030
- "flops": 704520929191.4874,
1031
- "iter_time": 3.1165402209864252,
1032
  "learning_rate": 8.86886886886887e-06,
1033
- "loss": 2.42014220764803,
1034
- "remaining_time": 2761.2546357939727,
1035
  "step": 114
1036
  },
1037
  {
1038
  "epoch": 1.6,
1039
- "flops": 704472464420.1289,
1040
- "iter_time": 3.1167546259729484,
1041
  "learning_rate": 8.85885885885886e-06,
1042
- "loss": 2.40947378557155,
1043
- "remaining_time": 2758.327843986059,
1044
  "step": 115
1045
  },
1046
  {
1047
  "epoch": 1.61,
1048
- "flops": 703188275695.0721,
1049
- "iter_time": 3.1224465598230777,
1050
  "learning_rate": 8.84884884884885e-06,
1051
- "loss": 2.3974270477158344,
1052
- "remaining_time": 2760.242758883601,
1053
  "step": 116
1054
  },
1055
  {
1056
  "epoch": 1.62,
1057
- "flops": 701310643375.6836,
1058
- "iter_time": 3.1308063453641433,
1059
  "learning_rate": 8.838838838838838e-06,
1060
- "loss": 2.384735777238676,
1061
- "remaining_time": 2764.5020029565385,
1062
  "step": 117
1063
  },
1064
  {
1065
  "epoch": 1.64,
1066
- "flops": 700980644419.746,
1067
- "iter_time": 3.1322802274655075,
1068
  "learning_rate": 8.82882882882883e-06,
1069
- "loss": 2.3705114194662893,
1070
- "remaining_time": 2762.6711606245776,
1071
  "step": 118
1072
  },
1073
  {
1074
  "epoch": 1.65,
1075
- "flops": 701384488909.6548,
1076
- "iter_time": 3.130476717221535,
1077
  "learning_rate": 8.818818818818819e-06,
1078
- "loss": 2.360955305271627,
1079
- "remaining_time": 2757.949987872172,
1080
  "step": 119
1081
  },
1082
  {
1083
  "epoch": 1.67,
1084
- "flops": 701597135004.8392,
1085
- "iter_time": 3.129527905408074,
1086
  "learning_rate": 8.80880880880881e-06,
1087
- "loss": 2.34846775221891,
1088
- "remaining_time": 2753.984556759105,
1089
  "step": 120
1090
  }
1091
  ],
@@ -1093,7 +1093,7 @@
1093
  "max_steps": 1000,
1094
  "num_train_epochs": 14,
1095
  "save_steps": 120,
1096
- "total_flos": 2886102417408.0,
1097
  "trial_name": null,
1098
  "trial_params": null
1099
  }
 
1
  {
2
+ "best_metric": 5.269857883453369,
3
  "best_model_checkpoint": "phanthiet_expert_1000_steps/checkpoint-120",
4
  "epoch": 1.6666666666666665,
5
  "eval_steps": 120,
 
13
  "flops": 0.0,
14
  "iter_time": 0.0,
15
  "learning_rate": 1e-05,
16
+ "loss": 0.5456,
17
  "remaining_time": 0.0,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 0.03,
22
+ "flops": 337708474800.72833,
23
+ "iter_time": 6.501666307449341,
24
  "learning_rate": 9.989989989989992e-06,
25
+ "loss": 0.550402,
26
+ "remaining_time": 6488.662974834442,
27
  "step": 2
28
  },
29
  {
30
  "epoch": 0.04,
31
+ "flops": 313913153791.8662,
32
+ "iter_time": 6.994507193565369,
33
  "learning_rate": 9.979979979979981e-06,
34
+ "loss": 0.5577669799999999,
35
+ "remaining_time": 6973.523671984673,
36
  "step": 3
37
  },
38
  {
39
  "epoch": 0.06,
40
+ "flops": 369395348722.3119,
41
+ "iter_time": 5.943950891494751,
42
  "learning_rate": 9.96996996996997e-06,
43
+ "loss": 0.5604453102,
44
+ "remaining_time": 5920.175087928772,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.07,
49
+ "flops": 396889279569.81177,
50
+ "iter_time": 5.532192289829254,
51
  "learning_rate": 9.95995995995996e-06,
52
+ "loss": 0.5664098570980001,
53
+ "remaining_time": 5504.531328380108,
54
  "step": 5
55
  },
56
  {
57
  "epoch": 0.08,
58
+ "flops": 421384081763.41815,
59
+ "iter_time": 5.210609292984008,
60
  "learning_rate": 9.949949949949951e-06,
61
+ "loss": 0.5698937585270201,
62
+ "remaining_time": 5179.345637226104,
63
  "step": 6
64
  },
65
  {
66
  "epoch": 0.1,
67
+ "flops": 446295807257.3362,
68
+ "iter_time": 4.9197589953740435,
69
  "learning_rate": 9.93993993993994e-06,
70
+ "loss": 0.5781998209417499,
71
+ "remaining_time": 4885.3206824064255,
72
  "step": 7
73
  },
74
  {
75
  "epoch": 0.11,
76
+ "flops": 472428549895.4602,
77
+ "iter_time": 4.647618804659162,
78
  "learning_rate": 9.929929929929931e-06,
79
+ "loss": 0.5806858227323325,
80
+ "remaining_time": 4610.437854221888,
81
  "step": 8
82
  },
83
  {
84
  "epoch": 0.12,
85
+ "flops": 496630158877.83734,
86
+ "iter_time": 4.421132653951645,
87
  "learning_rate": 9.91991991991992e-06,
88
+ "loss": 0.5819999645050092,
89
+ "remaining_time": 4381.34246006608,
90
  "step": 9
91
  },
92
  {
93
  "epoch": 0.14,
94
+ "flops": 520012760024.9253,
95
+ "iter_time": 4.222334490882026,
96
  "learning_rate": 9.90990990990991e-06,
97
+ "loss": 0.5885829648599592,
98
+ "remaining_time": 4180.111145973206,
99
  "step": 10
100
  },
101
  {
102
  "epoch": 0.15,
103
+ "flops": 522981934249.4519,
104
+ "iter_time": 4.198362636566162,
105
  "learning_rate": 9.899899899899901e-06,
106
+ "loss": 0.5966651352113596,
107
+ "remaining_time": 4152.180647563934,
108
  "step": 11
109
  },
110
  {
111
  "epoch": 0.17,
112
+ "flops": 533220236875.68567,
113
+ "iter_time": 4.11775034124201,
114
  "learning_rate": 9.88988988988989e-06,
115
+ "loss": 0.6031984838592459,
116
+ "remaining_time": 4068.3373371471057,
117
  "step": 12
118
  },
119
  {
120
  "epoch": 0.18,
121
+ "flops": 544160301238.77454,
122
+ "iter_time": 4.034965078035991,
123
  "learning_rate": 9.879879879879881e-06,
124
+ "loss": 0.6074144990206535,
125
+ "remaining_time": 3982.510532021523,
126
  "step": 13
127
  },
128
  {
129
  "epoch": 0.19,
130
+ "flops": 559425240592.3275,
131
+ "iter_time": 3.9248636869283824,
132
  "learning_rate": 9.86986986986987e-06,
133
+ "loss": 0.6109523540304469,
134
+ "remaining_time": 3869.915595311385,
135
  "step": 14
136
  },
137
  {
138
  "epoch": 0.21,
139
+ "flops": 565694919686.4559,
140
+ "iter_time": 3.8813638516834805,
141
  "learning_rate": 9.85985985985986e-06,
142
+ "loss": 0.6137828304901424,
143
+ "remaining_time": 3823.1433939082285,
144
  "step": 15
145
  },
146
  {
147
  "epoch": 0.22,
148
+ "flops": 565811601772.6144,
149
+ "iter_time": 3.880563433965047,
150
  "learning_rate": 9.849849849849851e-06,
151
+ "loss": 0.621022002185241,
152
+ "remaining_time": 3818.474419021606,
153
  "step": 16
154
  },
155
  {
156
  "epoch": 0.24,
157
+ "flops": 575500361634.0349,
158
+ "iter_time": 3.8152327239513397,
159
  "learning_rate": 9.83983983983984e-06,
160
+ "loss": 0.6294617821633886,
161
+ "remaining_time": 3750.373767644167,
162
  "step": 17
163
  },
164
  {
165
  "epoch": 0.25,
166
+ "flops": 586043194731.8378,
167
+ "iter_time": 3.746597233940573,
168
  "learning_rate": 9.829829829829831e-06,
169
+ "loss": 0.6354701643417546,
170
+ "remaining_time": 3679.1584837296427,
171
  "step": 18
172
  },
173
  {
174
  "epoch": 0.26,
175
+ "flops": 595681113605.7532,
176
+ "iter_time": 3.685978558328417,
177
  "learning_rate": 9.81981981981982e-06,
178
+ "loss": 0.637550462698337,
179
+ "remaining_time": 3615.9449657201767,
180
  "step": 19
181
  },
182
  {
183
  "epoch": 0.28,
184
+ "flops": 599125830319.7644,
185
+ "iter_time": 3.664785761582224,
186
  "learning_rate": 9.80980980980981e-06,
187
+ "loss": 0.6389089580713537,
188
+ "remaining_time": 3591.4900463505796,
189
  "step": 20
190
  },
191
  {
192
  "epoch": 0.29,
193
+ "flops": 601663120837.0624,
194
+ "iter_time": 3.6493308901786805,
195
  "learning_rate": 9.799799799799801e-06,
196
+ "loss": 0.6430028684906403,
197
+ "remaining_time": 3572.694941484928,
198
  "step": 21
199
  },
200
  {
201
  "epoch": 0.31,
202
+ "flops": 608973680084.2296,
203
+ "iter_time": 3.6055216902778264,
204
  "learning_rate": 9.78978978978979e-06,
205
+ "loss": 0.6420408398057339,
206
+ "remaining_time": 3526.2002130917144,
207
  "step": 22
208
  },
209
  {
210
  "epoch": 0.32,
211
+ "flops": 614982690907.9432,
212
+ "iter_time": 3.570292050188238,
213
  "learning_rate": 9.779779779779781e-06,
214
+ "loss": 0.6430324314076765,
215
+ "remaining_time": 3488.1753330339084,
216
  "step": 23
217
  },
218
  {
219
  "epoch": 0.33,
220
+ "flops": 623008450190.2443,
221
+ "iter_time": 3.524298605711564,
222
  "learning_rate": 9.76976976976977e-06,
223
+ "loss": 0.6464111070935997,
224
+ "remaining_time": 3439.7154391744866,
225
  "step": 24
226
  },
227
  {
228
  "epoch": 0.35,
229
+ "flops": 623147799603.7585,
230
+ "iter_time": 3.523510495821635,
231
  "learning_rate": 9.75975975975976e-06,
232
+ "loss": 0.6485699960226637,
233
+ "remaining_time": 3435.422733426094,
234
  "step": 25
235
  },
236
  {
237
  "epoch": 0.36,
238
+ "flops": 624205117442.8488,
239
+ "iter_time": 3.517542152404785,
240
  "learning_rate": 9.749749749749751e-06,
241
+ "loss": 0.6560532960624371,
242
+ "remaining_time": 3426.0860564422605,
243
  "step": 26
244
  },
245
  {
246
  "epoch": 0.38,
247
+ "flops": 629478739319.5417,
248
+ "iter_time": 3.488073028050936,
249
  "learning_rate": 9.73973973973974e-06,
250
+ "loss": 0.6632047631018126,
251
+ "remaining_time": 3393.895056293561,
252
  "step": 27
253
  },
254
  {
255
  "epoch": 0.39,
256
+ "flops": 635024865150.7354,
257
+ "iter_time": 3.457609194296378,
258
  "learning_rate": 9.729729729729732e-06,
259
+ "loss": 0.6693847154707946,
260
+ "remaining_time": 3360.796136856079,
261
  "step": 28
262
  },
263
  {
264
  "epoch": 0.4,
265
+ "flops": 641848997477.1898,
266
+ "iter_time": 3.4208479268210277,
267
  "learning_rate": 9.719719719719721e-06,
268
+ "loss": 0.6715838683160866,
269
+ "remaining_time": 3321.643336943218,
270
  "step": 29
271
  },
272
  {
273
  "epoch": 0.42,
274
+ "flops": 642585125419.3646,
275
+ "iter_time": 3.4169290970111716,
276
  "learning_rate": 9.70970970970971e-06,
277
+ "loss": 0.6747270296329257,
278
+ "remaining_time": 3314.4212241008363,
279
  "step": 30
280
  },
281
  {
282
  "epoch": 0.43,
283
+ "flops": 640577790197.2817,
284
+ "iter_time": 3.427636496225993,
285
  "learning_rate": 9.699699699699701e-06,
286
+ "loss": 0.6797027593365965,
287
+ "remaining_time": 3321.3797648429872,
288
  "step": 31
289
  },
290
  {
291
  "epoch": 0.44,
292
+ "flops": 644293722794.7263,
293
+ "iter_time": 3.407867770041189,
294
  "learning_rate": 9.68968968968969e-06,
295
+ "loss": 0.6856877317432305,
296
+ "remaining_time": 3298.816001399871,
297
  "step": 32
298
  },
299
  {
300
  "epoch": 0.46,
301
+ "flops": 648971809710.5692,
302
+ "iter_time": 3.383302293717861,
303
  "learning_rate": 9.67967967967968e-06,
304
+ "loss": 0.6910388544257982,
305
+ "remaining_time": 3271.6533180251718,
306
  "step": 33
307
  },
308
  {
309
  "epoch": 0.47,
310
+ "flops": 651911653670.7534,
311
+ "iter_time": 3.36804504105539,
312
  "learning_rate": 9.669669669669671e-06,
313
+ "loss": 0.6991184658815401,
314
+ "remaining_time": 3253.531509659507,
315
  "step": 34
316
  },
317
  {
318
  "epoch": 0.49,
319
+ "flops": 650742782498.6014,
320
+ "iter_time": 3.3740947597167072,
321
  "learning_rate": 9.65965965965966e-06,
322
+ "loss": 0.7053602812227248,
323
+ "remaining_time": 3256.0014431266227,
324
  "step": 35
325
  },
326
  {
327
  "epoch": 0.5,
328
+ "flops": 651580763461.6589,
329
+ "iter_time": 3.36975542477199,
330
  "learning_rate": 9.649649649649651e-06,
331
+ "loss": 0.7058286784104976,
332
+ "remaining_time": 3248.4442294801984,
333
  "step": 36
334
  },
335
  {
336
  "epoch": 0.51,
337
+ "flops": 654602944372.9254,
338
+ "iter_time": 3.3541978862550526,
339
  "learning_rate": 9.63963963963964e-06,
340
+ "loss": 0.7075003916263926,
341
+ "remaining_time": 3230.0925644636154,
342
  "step": 37
343
  },
344
  {
345
  "epoch": 0.53,
346
+ "flops": 657540450120.0518,
347
+ "iter_time": 3.339213293951911,
348
  "learning_rate": 9.62962962962963e-06,
349
+ "loss": 0.7157003877101287,
350
+ "remaining_time": 3212.3231887817383,
351
  "step": 38
352
  },
353
  {
354
  "epoch": 0.54,
355
+ "flops": 659049867218.4541,
356
+ "iter_time": 3.3315655181282446,
357
  "learning_rate": 9.61961961961962e-06,
358
+ "loss": 0.7184363838330274,
359
+ "remaining_time": 3201.634462921243,
360
  "step": 39
361
  },
362
  {
363
  "epoch": 0.56,
364
+ "flops": 659793717459.9181,
365
+ "iter_time": 3.3278095172001767,
366
  "learning_rate": 9.60960960960961e-06,
367
+ "loss": 0.7198080199946971,
368
+ "remaining_time": 3194.6971365121694,
369
  "step": 40
370
  },
371
  {
372
  "epoch": 0.57,
373
+ "flops": 658401523052.3262,
374
+ "iter_time": 3.334846192598343,
375
  "learning_rate": 9.5995995995996e-06,
376
+ "loss": 0.7265189397947501,
377
+ "remaining_time": 3198.1174987018107,
378
  "step": 41
379
  },
380
  {
381
  "epoch": 0.58,
382
+ "flops": 661223398293.8737,
383
+ "iter_time": 3.320614209989222,
384
  "learning_rate": 9.58958958958959e-06,
385
+ "loss": 0.7362167503968026,
386
+ "remaining_time": 3181.1484131696748,
387
  "step": 42
388
  },
389
  {
390
  "epoch": 0.6,
391
+ "flops": 665436077713.07,
392
+ "iter_time": 3.299592381431943,
393
  "learning_rate": 9.57957957957958e-06,
394
+ "loss": 0.7387895828928346,
395
+ "remaining_time": 3157.7099090303695,
396
  "step": 43
397
  },
398
  {
399
  "epoch": 0.61,
400
+ "flops": 668037055965.658,
401
+ "iter_time": 3.286745537159055,
402
  "learning_rate": 9.56956956956957e-06,
403
+ "loss": 0.7402816870639062,
404
+ "remaining_time": 3142.1287335240563,
405
  "step": 44
406
  },
407
  {
408
  "epoch": 0.62,
409
+ "flops": 668578505149.5889,
410
+ "iter_time": 3.28408376195214,
411
  "learning_rate": 9.55955955955956e-06,
412
+ "loss": 0.7429708701932671,
413
+ "remaining_time": 3136.2999926642938,
414
  "step": 45
415
  },
416
  {
417
  "epoch": 0.64,
418
+ "flops": 668465991995.3077,
419
+ "iter_time": 3.284636523988512,
420
  "learning_rate": 9.54954954954955e-06,
421
+ "loss": 0.7488811614913344,
422
+ "remaining_time": 3133.5432438850403,
423
  "step": 46
424
  },
425
  {
426
  "epoch": 0.65,
427
+ "flops": 669750945011.7385,
428
+ "iter_time": 3.2783347731051236,
429
  "learning_rate": 9.53953953953954e-06,
430
+ "loss": 0.7512013498764211,
431
+ "remaining_time": 3124.2530387691827,
432
  "step": 47
433
  },
434
  {
435
  "epoch": 0.67,
436
+ "flops": 672507374605.0092,
437
+ "iter_time": 3.2648977472427045,
438
  "learning_rate": 9.52952952952953e-06,
439
+ "loss": 0.7504163363776569,
440
+ "remaining_time": 3108.1826553750548,
441
  "step": 48
442
  },
443
  {
444
  "epoch": 0.68,
445
+ "flops": 674897955624.8076,
446
+ "iter_time": 3.253333032131195,
447
  "learning_rate": 9.51951951951952e-06,
448
+ "loss": 0.7520791730138804,
449
+ "remaining_time": 3093.9197135567665,
450
  "step": 49
451
  },
452
  {
453
  "epoch": 0.69,
454
+ "flops": 672054534580.0323,
455
+ "iter_time": 3.267097682368999,
456
  "learning_rate": 9.50950950950951e-06,
457
+ "loss": 0.7538383812837415,
458
+ "remaining_time": 3103.7427982505487,
459
  "step": 50
460
  },
461
  {
462
  "epoch": 0.71,
463
+ "flops": 671547792110.8484,
464
+ "iter_time": 3.269562997817993,
465
  "learning_rate": 9.4994994994995e-06,
466
+ "loss": 0.759731997470904,
467
+ "remaining_time": 3102.8152849292755,
468
  "step": 51
469
  },
470
  {
471
  "epoch": 0.72,
472
+ "flops": 674273217333.7708,
473
+ "iter_time": 3.2563473617329315,
474
  "learning_rate": 9.489489489489491e-06,
475
+ "loss": 0.759546677496195,
476
+ "remaining_time": 3087.017298922819,
477
  "step": 52
478
  },
479
  {
480
  "epoch": 0.74,
481
+ "flops": 676641652332.1632,
482
+ "iter_time": 3.244949235365941,
483
  "learning_rate": 9.47947947947948e-06,
484
+ "loss": 0.762359210721233,
485
+ "remaining_time": 3072.966925891546,
486
  "step": 53
487
  },
488
  {
489
  "epoch": 0.75,
490
+ "flops": 678842240110.5048,
491
+ "iter_time": 3.23443015566412,
492
  "learning_rate": 9.46946946946947e-06,
493
+ "loss": 0.7682846186140208,
494
+ "remaining_time": 3059.7709272582574,
495
  "step": 54
496
  },
497
  {
498
  "epoch": 0.76,
499
+ "flops": 676107675069.3893,
500
+ "iter_time": 3.247512035899692,
501
  "learning_rate": 9.45945945945946e-06,
502
+ "loss": 0.7732457724278805,
503
+ "remaining_time": 3068.898873925209,
504
  "step": 55
505
  },
506
  {
507
  "epoch": 0.78,
508
+ "flops": 676584884527.398,
509
+ "iter_time": 3.2452214977957987,
510
  "learning_rate": 9.44944944944945e-06,
511
+ "loss": 0.7792853147036017,
512
+ "remaining_time": 3063.489093919234,
513
  "step": 56
514
  },
515
  {
516
  "epoch": 0.79,
517
+ "flops": 679219614722.3761,
518
+ "iter_time": 3.232633105346135,
519
  "learning_rate": 9.439439439439441e-06,
520
+ "loss": 0.7805744615565657,
521
+ "remaining_time": 3048.373018341405,
522
  "step": 57
523
  },
524
  {
525
  "epoch": 0.81,
526
+ "flops": 681611637353.8478,
527
+ "iter_time": 3.221288622471324,
528
  "learning_rate": 9.42942942942943e-06,
529
+ "loss": 0.782551716941,
530
+ "remaining_time": 3034.4538823679873,
531
  "step": 58
532
  },
533
  {
534
  "epoch": 0.82,
535
+ "flops": 683329725813.7307,
536
+ "iter_time": 3.2131893717009445,
537
  "learning_rate": 9.41941941941942e-06,
538
+ "loss": 0.7841451997715899,
539
+ "remaining_time": 3023.611198770589,
540
  "step": 59
541
  },
542
  {
543
  "epoch": 0.83,
544
+ "flops": 680966931813.5386,
545
+ "iter_time": 3.224338377936412,
546
  "learning_rate": 9.40940940940941e-06,
547
+ "loss": 0.783524747773874,
548
+ "remaining_time": 3030.878075260227,
549
  "step": 60
550
  },
551
  {
552
  "epoch": 0.85,
553
+ "flops": 681524289786.793,
554
+ "iter_time": 3.22170147895813,
555
  "learning_rate": 9.3993993993994e-06,
556
+ "loss": 0.7836325002961353,
557
+ "remaining_time": 3025.177688741684,
558
  "step": 61
559
  },
560
  {
561
  "epoch": 0.86,
562
+ "flops": 684239548468.1614,
563
+ "iter_time": 3.208916843914595,
564
  "learning_rate": 9.389389389389391e-06,
565
+ "loss": 0.7845871752931739,
566
+ "remaining_time": 3009.96399959189,
567
  "step": 62
568
  },
569
  {
570
  "epoch": 0.88,
571
+ "flops": 686128573400.2554,
572
+ "iter_time": 3.2000821675023725,
573
  "learning_rate": 9.37937937937938e-06,
574
+ "loss": 0.7834133035402422,
575
+ "remaining_time": 2998.476990949723,
576
  "step": 63
577
  },
578
  {
579
  "epoch": 0.89,
580
+ "flops": 687890959106.9581,
581
+ "iter_time": 3.1918835147978766,
582
  "learning_rate": 9.36936936936937e-06,
583
+ "loss": 0.7835721705048397,
584
+ "remaining_time": 2987.6029698508123,
585
  "step": 64
586
  },
587
  {
588
  "epoch": 0.9,
589
+ "flops": 685411659215.782,
590
+ "iter_time": 3.2034293301403522,
591
  "learning_rate": 9.35935935935936e-06,
592
+ "loss": 0.7846484487997913,
593
+ "remaining_time": 2995.2064236812294,
594
  "step": 65
595
  },
596
  {
597
  "epoch": 0.92,
598
+ "flops": 686396551783.2611,
599
+ "iter_time": 3.198832812676063,
600
  "learning_rate": 9.34934934934935e-06,
601
+ "loss": 0.7844019643117934,
602
+ "remaining_time": 2987.7098470394426,
603
  "step": 66
604
  },
605
  {
606
  "epoch": 0.93,
607
+ "flops": 687757907590.6332,
608
+ "iter_time": 3.192501006704388,
609
  "learning_rate": 9.339339339339341e-06,
610
+ "loss": 0.7828959446686754,
611
+ "remaining_time": 2978.603439255194,
612
  "step": 67
613
  },
614
  {
615
  "epoch": 0.94,
616
+ "flops": 689337825846.0652,
617
+ "iter_time": 3.185183998364121,
618
  "learning_rate": 9.32932932932933e-06,
619
+ "loss": 0.7902479852219887,
620
+ "remaining_time": 2968.591486475361,
621
  "step": 68
622
  },
623
  {
624
  "epoch": 0.96,
625
+ "flops": 690507579500.5035,
626
+ "iter_time": 3.1797881406896256,
627
  "learning_rate": 9.31931931931932e-06,
628
+ "loss": 0.7887315053697688,
629
+ "remaining_time": 2960.382758982041,
630
  "step": 69
631
  },
632
  {
633
  "epoch": 0.97,
634
+ "flops": 688434808155.4791,
635
+ "iter_time": 3.1893619938173154,
636
  "learning_rate": 9.30930930930931e-06,
637
+ "loss": 0.7890811903160712,
638
+ "remaining_time": 2966.1066542501035,
639
  "step": 70
640
  },
641
  {
642
  "epoch": 0.99,
643
+ "flops": 688905124395.0935,
644
+ "iter_time": 3.187184613091605,
645
  "learning_rate": 9.2992992992993e-06,
646
+ "loss": 0.7906293784129104,
647
+ "remaining_time": 2960.8945055621007,
648
  "step": 71
649
  },
650
  {
651
  "epoch": 1.0,
652
+ "flops": 687042581818.5326,
653
+ "iter_time": 3.1958249320446606,
654
  "learning_rate": 9.289289289289291e-06,
655
+ "loss": 0.7891110846287812,
656
+ "remaining_time": 2965.725536937445,
657
  "step": 72
658
  },
659
  {
660
  "epoch": 1.01,
661
+ "flops": 687525348077.9441,
662
+ "iter_time": 3.1935808890395694,
663
  "learning_rate": 9.27927927927928e-06,
664
+ "loss": 0.7882049737824934,
665
+ "remaining_time": 2960.449484139681,
666
  "step": 73
667
  },
668
  {
669
  "epoch": 1.03,
670
+ "flops": 686602655595.3418,
671
+ "iter_time": 3.1978725897122735,
672
  "learning_rate": 9.26926926926927e-06,
673
+ "loss": 0.7864959240446685,
674
+ "remaining_time": 2961.230018073565,
675
  "step": 74
676
  },
677
  {
678
  "epoch": 1.04,
679
+ "flops": 685415546203.5184,
680
+ "iter_time": 3.203411163510503,
681
  "learning_rate": 9.25925925925926e-06,
682
+ "loss": 0.7873949648042218,
683
+ "remaining_time": 2963.1553262472153,
684
  "step": 75
685
  },
686
  {
687
  "epoch": 1.06,
688
+ "flops": 687363654440.5881,
689
+ "iter_time": 3.1943321386973063,
690
  "learning_rate": 9.24924924924925e-06,
691
+ "loss": 0.7858130151561795,
692
+ "remaining_time": 2951.562896156311,
693
  "step": 76
694
  },
695
  {
696
  "epoch": 1.07,
697
+ "flops": 688269975843.3333,
698
+ "iter_time": 3.190125807335502,
699
  "learning_rate": 9.239239239239241e-06,
700
+ "loss": 0.7877598850046177,
701
+ "remaining_time": 2944.4861201706685,
702
  "step": 77
703
  },
704
  {
705
  "epoch": 1.08,
706
+ "flops": 688972485141.1887,
707
+ "iter_time": 3.1868730024857954,
708
  "learning_rate": 9.229229229229229e-06,
709
+ "loss": 0.7854022861545715,
710
+ "remaining_time": 2938.2969082919035,
711
  "step": 78
712
  },
713
  {
714
  "epoch": 1.1,
715
+ "flops": 687824528280.4768,
716
+ "iter_time": 3.1921917903117643,
717
  "learning_rate": 9.21921921921922e-06,
718
+ "loss": 0.7832412632930258,
719
+ "remaining_time": 2940.0086388771347,
720
  "step": 79
721
  },
722
  {
723
  "epoch": 1.11,
724
+ "flops": 688041534544.1229,
725
+ "iter_time": 3.19118498246881,
726
  "learning_rate": 9.20920920920921e-06,
727
+ "loss": 0.7834378506600955,
728
+ "remaining_time": 2935.8901838713055,
729
  "step": 80
730
  },
731
  {
732
  "epoch": 1.12,
733
+ "flops": 689740241054.4022,
734
+ "iter_time": 3.1833256661891935,
735
  "learning_rate": 9.1991991991992e-06,
736
+ "loss": 0.7835284721534945,
737
+ "remaining_time": 2925.476287227869,
738
  "step": 81
739
  },
740
  {
741
  "epoch": 1.14,
742
+ "flops": 691339865745.1554,
743
+ "iter_time": 3.1759600757080833,
744
  "learning_rate": 9.189189189189191e-06,
745
+ "loss": 0.7841371874319596,
746
+ "remaining_time": 2915.5313495000205,
747
  "step": 82
748
  },
749
  {
750
  "epoch": 1.15,
751
+ "flops": 691831676037.651,
752
+ "iter_time": 3.173702344661806,
753
  "learning_rate": 9.179179179179179e-06,
754
+ "loss": 0.7866218155576399,
755
+ "remaining_time": 2910.2850500548757,
756
  "step": 83
757
  },
758
  {
759
  "epoch": 1.17,
760
+ "flops": 692165627939.7103,
761
+ "iter_time": 3.172171115875244,
762
  "learning_rate": 9.16916916916917e-06,
763
+ "loss": 0.7885055974020635,
764
+ "remaining_time": 2905.7087421417236,
765
  "step": 84
766
  },
767
  {
768
  "epoch": 1.18,
769
+ "flops": 691298445724.3085,
770
+ "iter_time": 3.176150367373512,
771
  "learning_rate": 9.15915915915916e-06,
772
+ "loss": 0.7916795414280429,
773
+ "remaining_time": 2906.177586146763,
774
  "step": 85
775
  },
776
  {
777
  "epoch": 1.19,
778
+ "flops": 691887707830.7711,
779
+ "iter_time": 3.1734453257392436,
780
  "learning_rate": 9.14914914914915e-06,
781
+ "loss": 0.7927517460137624,
782
+ "remaining_time": 2900.5290277256686,
783
  "step": 86
784
  },
785
  {
786
  "epoch": 1.21,
787
+ "flops": 693016770807.2743,
788
+ "iter_time": 3.168275148369545,
789
  "learning_rate": 9.13913913913914e-06,
790
+ "loss": 0.7958262285536247,
791
+ "remaining_time": 2892.6352104613948,
792
  "step": 87
793
  },
794
  {
795
  "epoch": 1.22,
796
+ "flops": 694962781760.9435,
797
+ "iter_time": 3.159403452928039,
798
  "learning_rate": 9.129129129129129e-06,
799
+ "loss": 0.7947019662680885,
800
+ "remaining_time": 2881.3759490703715,
801
  "step": 88
802
  },
803
  {
804
  "epoch": 1.24,
805
+ "flops": 694885706163.1102,
806
+ "iter_time": 3.1597538888454437,
807
  "learning_rate": 9.11911911911912e-06,
808
+ "loss": 0.7951459466054077,
809
+ "remaining_time": 2878.5357927381992,
810
  "step": 89
811
  },
812
  {
813
  "epoch": 1.25,
814
+ "flops": 693268991974.0264,
815
+ "iter_time": 3.1671224845929093,
816
  "learning_rate": 9.10910910910911e-06,
817
+ "loss": 0.7985334871393536,
818
+ "remaining_time": 2882.0814609795475,
819
  "step": 90
820
  },
821
  {
822
  "epoch": 1.26,
823
+ "flops": 694390411780.8718,
824
+ "iter_time": 3.1620076762305365,
825
  "learning_rate": 9.0990990990991e-06,
826
+ "loss": 0.79848215226796,
827
+ "remaining_time": 2874.2649776935577,
828
  "step": 91
829
  },
830
  {
831
  "epoch": 1.28,
832
+ "flops": 695010526544.7205,
833
+ "iter_time": 3.1591864129999183,
834
  "learning_rate": 9.08908908908909e-06,
835
+ "loss": 0.8021223307452804,
836
+ "remaining_time": 2868.541263003926,
837
  "step": 92
838
  },
839
  {
840
  "epoch": 1.29,
841
+ "flops": 695355769452.5133,
842
+ "iter_time": 3.157617882541988,
843
  "learning_rate": 9.079079079079079e-06,
844
+ "loss": 0.8028781074378276,
845
+ "remaining_time": 2863.9594194655833,
846
  "step": 93
847
  },
848
  {
849
  "epoch": 1.31,
850
+ "flops": 693781086979.3446,
851
+ "iter_time": 3.1647847621671614,
852
  "learning_rate": 9.06906906906907e-06,
853
+ "loss": 0.8010433263634493,
854
+ "remaining_time": 2867.294994523448,
855
  "step": 94
856
  },
857
  {
858
  "epoch": 1.32,
859
+ "flops": 685750809540.4008,
860
+ "iter_time": 3.201845016885311,
861
  "learning_rate": 9.05905905905906e-06,
862
+ "loss": 0.7989738930998148,
863
+ "remaining_time": 2897.6697402812065,
864
  "step": 95
865
  },
866
  {
867
  "epoch": 1.33,
868
+ "flops": 681989994532.0168,
869
+ "iter_time": 3.2195015028903358,
870
  "learning_rate": 9.04904904904905e-06,
871
+ "loss": 0.7985111541688166,
872
+ "remaining_time": 2910.4293586128633,
873
  "step": 96
874
  },
875
  {
876
  "epoch": 1.35,
877
+ "flops": 679967679761.6364,
878
+ "iter_time": 3.229076730708281,
879
  "learning_rate": 9.03903903903904e-06,
880
+ "loss": 0.7966190426271285,
881
+ "remaining_time": 2915.856287829578,
882
  "step": 97
883
  },
884
  {
885
  "epoch": 1.36,
886
+ "flops": 679499608244.4629,
887
+ "iter_time": 3.231301071717567,
888
  "learning_rate": 9.029029029029029e-06,
889
+ "loss": 0.8009978522008572,
890
+ "remaining_time": 2914.6335666892455,
891
  "step": 98
892
  },
893
  {
894
  "epoch": 1.38,
895
+ "flops": 679556703403.7747,
896
+ "iter_time": 3.231029583483326,
897
  "learning_rate": 9.01901901901902e-06,
898
+ "loss": 0.8004848736788486,
899
+ "remaining_time": 2911.157654718477,
900
  "step": 99
901
  },
902
  {
903
  "epoch": 1.39,
904
+ "flops": 680088449306.3125,
905
+ "iter_time": 3.2285033139315518,
906
  "learning_rate": 9.00900900900901e-06,
907
+ "loss": 0.80179202494206,
908
+ "remaining_time": 2905.6529825383964,
909
  "step": 100
910
  },
911
  {
912
  "epoch": 1.4,
913
+ "flops": 679305263544.2017,
914
+ "iter_time": 3.2322255253791807,
915
  "learning_rate": 8.998998998999e-06,
916
+ "loss": 0.8054241046926395,
917
+ "remaining_time": 2905.7707473158835,
918
  "step": 101
919
  },
920
  {
921
  "epoch": 1.42,
922
+ "flops": 678219678345.323,
923
+ "iter_time": 3.237399153190084,
924
  "learning_rate": 8.98898898898899e-06,
925
+ "loss": 0.806240863645713,
926
+ "remaining_time": 2907.1844395646954,
927
  "step": 102
928
  },
929
  {
930
  "epoch": 1.43,
931
+ "flops": 678601241599.5226,
932
+ "iter_time": 3.235578831504373,
933
  "learning_rate": 8.97897897897898e-06,
934
+ "loss": 0.8044394550092558,
935
+ "remaining_time": 2902.314211859423,
936
  "step": 103
937
  },
938
  {
939
  "epoch": 1.44,
940
+ "flops": 679619159001.5583,
941
+ "iter_time": 3.230732658534374,
942
  "learning_rate": 8.96896896896897e-06,
943
+ "loss": 0.8030870604591633,
944
+ "remaining_time": 2894.736462046799,
945
  "step": 104
946
  },
947
  {
948
  "epoch": 1.46,
949
+ "flops": 679886315597.0814,
950
+ "iter_time": 3.2294631646229672,
951
  "learning_rate": 8.95895895895896e-06,
952
+ "loss": 0.8051971898545717,
953
+ "remaining_time": 2890.3695323375555,
954
  "step": 105
955
  },
956
  {
957
  "epoch": 1.47,
958
+ "flops": 678310913818.5634,
959
+ "iter_time": 3.2369637103307816,
960
  "learning_rate": 8.94894894894895e-06,
961
+ "loss": 0.802898217956026,
962
+ "remaining_time": 2893.8455570357187,
963
  "step": 106
964
  },
965
  {
966
  "epoch": 1.49,
967
+ "flops": 678205029462.8219,
968
+ "iter_time": 3.2374690793595224,
969
  "learning_rate": 8.93893893893894e-06,
970
+ "loss": 0.7999242357764658,
971
+ "remaining_time": 2891.0598878680535,
972
  "step": 107
973
  },
974
  {
975
  "epoch": 1.5,
976
+ "flops": 679183332554.648,
977
+ "iter_time": 3.2328057935304733,
978
  "learning_rate": 8.92892892892893e-06,
979
+ "loss": 0.7983449934187011,
980
+ "remaining_time": 2883.662767829182,
981
  "step": 108
982
  },
983
  {
984
  "epoch": 1.51,
985
+ "flops": 679660594016.6174,
986
+ "iter_time": 3.2305356992615595,
987
  "learning_rate": 8.91891891891892e-06,
988
+ "loss": 0.7975135434845142,
989
+ "remaining_time": 2878.4073080420494,
990
  "step": 109
991
  },
992
  {
993
  "epoch": 1.53,
994
+ "flops": 679917326982.2216,
995
+ "iter_time": 3.2293158671177857,
996
  "learning_rate": 8.90890890890891e-06,
997
+ "loss": 0.800260408049669,
998
+ "remaining_time": 2874.0911217348294,
999
  "step": 110
1000
  },
1001
  {
1002
  "epoch": 1.54,
1003
+ "flops": 678570796524.842,
1004
+ "iter_time": 3.235724000497298,
1005
  "learning_rate": 8.8988988988989e-06,
1006
+ "loss": 0.8001778039691724,
1007
+ "remaining_time": 2876.558636442098,
1008
  "step": 111
1009
  },
1010
  {
1011
  "epoch": 1.56,
1012
+ "flops": 679294181801.9117,
1013
+ "iter_time": 3.2322782546550304,
1014
  "learning_rate": 8.888888888888888e-06,
1015
+ "loss": 0.8043540259294807,
1016
+ "remaining_time": 2870.263090133667,
1017
  "step": 112
1018
  },
1019
  {
1020
  "epoch": 1.57,
1021
+ "flops": 680076936162.0037,
1022
+ "iter_time": 3.228557969842638,
1023
  "learning_rate": 8.87887887887888e-06,
1024
+ "loss": 0.803361485670186,
1025
+ "remaining_time": 2863.73091925042,
1026
  "step": 113
1027
  },
1028
  {
1029
  "epoch": 1.58,
1030
+ "flops": 680394854540.8124,
1031
+ "iter_time": 3.2270494077057963,
1032
  "learning_rate": 8.86886886886887e-06,
1033
+ "loss": 0.8037768708134841,
1034
+ "remaining_time": 2859.1657752273354,
1035
  "step": 114
1036
  },
1037
  {
1038
  "epoch": 1.6,
1039
+ "flops": 679642088482.3146,
1040
+ "iter_time": 3.2306236614260757,
1041
  "learning_rate": 8.85885885885886e-06,
1042
+ "loss": 0.8032941021053492,
1043
+ "remaining_time": 2859.101940362077,
1044
  "step": 115
1045
  },
1046
  {
1047
  "epoch": 1.61,
1048
+ "flops": 678366377394.2675,
1049
+ "iter_time": 3.2366990545521612,
1050
  "learning_rate": 8.84884884884885e-06,
1051
+ "loss": 0.8076981610842958,
1052
+ "remaining_time": 2861.2419642241107,
1053
  "step": 116
1054
  },
1055
  {
1056
  "epoch": 1.62,
1057
+ "flops": 678587964691.6045,
1058
+ "iter_time": 3.235642137198613,
1059
  "learning_rate": 8.838838838838838e-06,
1060
+ "loss": 0.8075931794734529,
1061
+ "remaining_time": 2857.072007146375,
1062
  "step": 117
1063
  },
1064
  {
1065
  "epoch": 1.64,
1066
+ "flops": 679167916745.1449,
1067
+ "iter_time": 3.232879172023545,
1068
  "learning_rate": 8.82882882882883e-06,
1069
+ "loss": 0.8091662476787184,
1070
+ "remaining_time": 2851.3994297247664,
1071
  "step": 118
1072
  },
1073
  {
1074
  "epoch": 1.65,
1075
+ "flops": 679510361033.5404,
1076
+ "iter_time": 3.2312499385769082,
1077
  "learning_rate": 8.818818818818819e-06,
1078
+ "loss": 0.8079495852019312,
1079
+ "remaining_time": 2846.731195886256,
1080
  "step": 119
1081
  },
1082
  {
1083
  "epoch": 1.67,
1084
+ "flops": 678073089164.8191,
1085
+ "iter_time": 3.2380990300859724,
1086
  "learning_rate": 8.80880880880881e-06,
1087
+ "loss": 0.8078880893499119,
1088
+ "remaining_time": 2849.5271464756556,
1089
  "step": 120
1090
  }
1091
  ],
 
1093
  "max_steps": 1000,
1094
  "num_train_epochs": 14,
1095
  "save_steps": 120,
1096
+ "total_flos": 2899503710208.0,
1097
  "trial_name": null,
1098
  "trial_params": null
1099
  }
phanthiet_expert_1000_steps/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EleutherAI/pythia-70m",
3
+ "architectures": [
4
+ "GPTNeoXForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": 0.1,
9
+ "eos_token_id": 0,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2048,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 2048,
17
+ "model_type": "gpt_neox",
18
+ "num_attention_heads": 8,
19
+ "num_hidden_layers": 6,
20
+ "rope_scaling": null,
21
+ "rotary_emb_base": 10000,
22
+ "rotary_pct": 0.25,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.32.1",
26
+ "use_cache": true,
27
+ "use_parallel_residual": true,
28
+ "vocab_size": 50304
29
+ }
phanthiet_expert_1000_steps/final/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EleutherAI/pythia-70m",
3
+ "architectures": [
4
+ "GPTNeoXForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": 0.1,
9
+ "eos_token_id": 0,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2048,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 2048,
17
+ "model_type": "gpt_neox",
18
+ "num_attention_heads": 8,
19
+ "num_hidden_layers": 6,
20
+ "rope_scaling": null,
21
+ "rotary_emb_base": 10000,
22
+ "rotary_pct": 0.25,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.32.1",
26
+ "use_cache": true,
27
+ "use_parallel_residual": true,
28
+ "vocab_size": 50304
29
+ }
phanthiet_expert_1000_steps/final/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "transformers_version": "4.32.1"
6
+ }
phanthiet_expert_1000_steps/final/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17b4f6f8cb5c27490e265dad314b411c637b323ea4db8230d1820d76d5e63fd
3
+ size 281733156
phanthiet_expert_1000_steps/final/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e6a836e449aa3425df1cbc481e8b21d36ee757d8f6d487a88ac995418a8332
3
+ size 4472
phanthiet_expert_1000_steps/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "transformers_version": "4.32.1"
6
+ }
phanthiet_expert_1000_steps/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17b4f6f8cb5c27490e265dad314b411c637b323ea4db8230d1820d76d5e63fd
3
+ size 281733156
phanthiet_expert_1000_steps/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e6a836e449aa3425df1cbc481e8b21d36ee757d8f6d487a88ac995418a8332
3
+ size 4472