diaenra commited on
Commit
b650269
·
verified ·
1 Parent(s): 923b58e

Training in progress, step 7324, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7a4ea1a45e75153ffc20b849fd0f67baf44b9e333ad68b95de3d2463f5c5694
3
  size 1623800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c593d71f7557076e37e0b4b9917c8094c81179d69d7d4e534db17f9dedbe2e92
3
  size 1623800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f6c71fb7d10555fde02594837f1b5b1c72e8adfae4621fe35a1c33a68bddeb8
3
  size 3255543
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7641b338727bb3540ed834aa0894224334616dfda3481784c4699ee547c005c7
3
  size 3255543
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae9da71b074f9b06c32d4d58a28d832767dfab2dd4cba0eca26064fe80f0ba0c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db01e641880e948c7c9c41c9c379ab112d117b55ad5f878dd649990157c93f3a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55a73bde68c0da7bbe678db55f0d085fc9f5de86ed520bfa6e44439f3fa9f996
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:108c408d30709d05ff4c72fd6b80731b42d0c024721a42b380443f36c1af49ff
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9789064099938563,
5
  "eval_steps": 500,
6
- "global_step": 7170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -50197,6 +50197,1084 @@
50197
  "learning_rate": 1.1208910173183817e-07,
50198
  "loss": 46.0056,
50199
  "step": 7170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50200
  }
50201
  ],
50202
  "logging_steps": 1,
@@ -50211,12 +51289,12 @@
50211
  "should_evaluate": false,
50212
  "should_log": false,
50213
  "should_save": true,
50214
- "should_training_stop": false
50215
  },
50216
  "attributes": {}
50217
  }
50218
  },
50219
- "total_flos": 673475530850304.0,
50220
  "train_batch_size": 4,
50221
  "trial_name": null,
50222
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999317359546727,
5
  "eval_steps": 500,
6
+ "global_step": 7324,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
50197
  "learning_rate": 1.1208910173183817e-07,
50198
  "loss": 46.0056,
50199
  "step": 7170
50200
+ },
50201
+ {
50202
+ "epoch": 0.9790429380845109,
50203
+ "grad_norm": 0.08908099681138992,
50204
+ "learning_rate": 1.1063866077125618e-07,
50205
+ "loss": 46.0037,
50206
+ "step": 7171
50207
+ },
50208
+ {
50209
+ "epoch": 0.9791794661751655,
50210
+ "grad_norm": 0.06807133555412292,
50211
+ "learning_rate": 1.0919765503453195e-07,
50212
+ "loss": 46.0035,
50213
+ "step": 7172
50214
+ },
50215
+ {
50216
+ "epoch": 0.9793159942658202,
50217
+ "grad_norm": 0.08383559435606003,
50218
+ "learning_rate": 1.0776608479418082e-07,
50219
+ "loss": 46.0052,
50220
+ "step": 7173
50221
+ },
50222
+ {
50223
+ "epoch": 0.9794525223564748,
50224
+ "grad_norm": 0.07002658396959305,
50225
+ "learning_rate": 1.063439503209529e-07,
50226
+ "loss": 46.0028,
50227
+ "step": 7174
50228
+ },
50229
+ {
50230
+ "epoch": 0.9795890504471295,
50231
+ "grad_norm": 0.0548606738448143,
50232
+ "learning_rate": 1.0493125188379971e-07,
50233
+ "loss": 46.0023,
50234
+ "step": 7175
50235
+ },
50236
+ {
50237
+ "epoch": 0.9797255785377842,
50238
+ "grad_norm": 0.16602098941802979,
50239
+ "learning_rate": 1.0352798974990752e-07,
50240
+ "loss": 46.0117,
50241
+ "step": 7176
50242
+ },
50243
+ {
50244
+ "epoch": 0.9798621066284388,
50245
+ "grad_norm": 0.10093922168016434,
50246
+ "learning_rate": 1.0213416418465294e-07,
50247
+ "loss": 46.0034,
50248
+ "step": 7177
50249
+ },
50250
+ {
50251
+ "epoch": 0.9799986347190934,
50252
+ "grad_norm": 0.08100040256977081,
50253
+ "learning_rate": 1.0074977545164177e-07,
50254
+ "loss": 46.011,
50255
+ "step": 7178
50256
+ },
50257
+ {
50258
+ "epoch": 0.9801351628097481,
50259
+ "grad_norm": 0.046713173389434814,
50260
+ "learning_rate": 9.937482381270346e-08,
50261
+ "loss": 46.0018,
50262
+ "step": 7179
50263
+ },
50264
+ {
50265
+ "epoch": 0.9802716909004028,
50266
+ "grad_norm": 0.06428004056215286,
50267
+ "learning_rate": 9.800930952786336e-08,
50268
+ "loss": 46.0027,
50269
+ "step": 7180
50270
+ },
50271
+ {
50272
+ "epoch": 0.9804082189910575,
50273
+ "grad_norm": 0.09634747356176376,
50274
+ "learning_rate": 9.665323285537598e-08,
50275
+ "loss": 46.0029,
50276
+ "step": 7181
50277
+ },
50278
+ {
50279
+ "epoch": 0.980544747081712,
50280
+ "grad_norm": 0.10261856019496918,
50281
+ "learning_rate": 9.530659405169728e-08,
50282
+ "loss": 46.0024,
50283
+ "step": 7182
50284
+ },
50285
+ {
50286
+ "epoch": 0.9806812751723667,
50287
+ "grad_norm": 0.08636260032653809,
50288
+ "learning_rate": 9.396939337152355e-08,
50289
+ "loss": 46.016,
50290
+ "step": 7183
50291
+ },
50292
+ {
50293
+ "epoch": 0.9808178032630214,
50294
+ "grad_norm": 0.0685448870062828,
50295
+ "learning_rate": 9.264163106774137e-08,
50296
+ "loss": 46.0086,
50297
+ "step": 7184
50298
+ },
50299
+ {
50300
+ "epoch": 0.980954331353676,
50301
+ "grad_norm": 0.10635057091712952,
50302
+ "learning_rate": 9.132330739145545e-08,
50303
+ "loss": 46.0068,
50304
+ "step": 7185
50305
+ },
50306
+ {
50307
+ "epoch": 0.9810908594443307,
50308
+ "grad_norm": 0.04230385273694992,
50309
+ "learning_rate": 9.001442259200521e-08,
50310
+ "loss": 46.0053,
50311
+ "step": 7186
50312
+ },
50313
+ {
50314
+ "epoch": 0.9812273875349853,
50315
+ "grad_norm": 0.07328132539987564,
50316
+ "learning_rate": 8.871497691691489e-08,
50317
+ "loss": 46.0056,
50318
+ "step": 7187
50319
+ },
50320
+ {
50321
+ "epoch": 0.98136391562564,
50322
+ "grad_norm": 0.09978116303682327,
50323
+ "learning_rate": 8.742497061195454e-08,
50324
+ "loss": 46.0054,
50325
+ "step": 7188
50326
+ },
50327
+ {
50328
+ "epoch": 0.9815004437162946,
50329
+ "grad_norm": 0.05884576961398125,
50330
+ "learning_rate": 8.614440392108458e-08,
50331
+ "loss": 46.0009,
50332
+ "step": 7189
50333
+ },
50334
+ {
50335
+ "epoch": 0.9816369718069493,
50336
+ "grad_norm": 0.03834863379597664,
50337
+ "learning_rate": 8.487327708648907e-08,
50338
+ "loss": 46.0009,
50339
+ "step": 7190
50340
+ },
50341
+ {
50342
+ "epoch": 0.981773499897604,
50343
+ "grad_norm": 0.03978651016950607,
50344
+ "learning_rate": 8.361159034857569e-08,
50345
+ "loss": 46.0047,
50346
+ "step": 7191
50347
+ },
50348
+ {
50349
+ "epoch": 0.9819100279882585,
50350
+ "grad_norm": 0.07296937704086304,
50351
+ "learning_rate": 8.235934394594802e-08,
50352
+ "loss": 46.0005,
50353
+ "step": 7192
50354
+ },
50355
+ {
50356
+ "epoch": 0.9820465560789132,
50357
+ "grad_norm": 0.03498697653412819,
50358
+ "learning_rate": 8.11165381154444e-08,
50359
+ "loss": 46.0101,
50360
+ "step": 7193
50361
+ },
50362
+ {
50363
+ "epoch": 0.9821830841695679,
50364
+ "grad_norm": 0.038850992918014526,
50365
+ "learning_rate": 7.988317309209902e-08,
50366
+ "loss": 46.0043,
50367
+ "step": 7194
50368
+ },
50369
+ {
50370
+ "epoch": 0.9823196122602226,
50371
+ "grad_norm": 0.10181787610054016,
50372
+ "learning_rate": 7.865924910916977e-08,
50373
+ "loss": 46.001,
50374
+ "step": 7195
50375
+ },
50376
+ {
50377
+ "epoch": 0.9824561403508771,
50378
+ "grad_norm": 0.4068288803100586,
50379
+ "learning_rate": 7.744476639813814e-08,
50380
+ "loss": 46.0063,
50381
+ "step": 7196
50382
+ },
50383
+ {
50384
+ "epoch": 0.9825926684415318,
50385
+ "grad_norm": 0.17444883286952972,
50386
+ "learning_rate": 7.623972518868705e-08,
50387
+ "loss": 46.006,
50388
+ "step": 7197
50389
+ },
50390
+ {
50391
+ "epoch": 0.9827291965321865,
50392
+ "grad_norm": 0.11943278461694717,
50393
+ "learning_rate": 7.5044125708712e-08,
50394
+ "loss": 46.0,
50395
+ "step": 7198
50396
+ },
50397
+ {
50398
+ "epoch": 0.9828657246228412,
50399
+ "grad_norm": 0.13627932965755463,
50400
+ "learning_rate": 7.38579681843321e-08,
50401
+ "loss": 46.0006,
50402
+ "step": 7199
50403
+ },
50404
+ {
50405
+ "epoch": 0.9830022527134958,
50406
+ "grad_norm": 0.09473912417888641,
50407
+ "learning_rate": 7.268125283987348e-08,
50408
+ "loss": 46.0,
50409
+ "step": 7200
50410
+ },
50411
+ {
50412
+ "epoch": 0.9831387808041504,
50413
+ "grad_norm": 0.08604719489812851,
50414
+ "learning_rate": 7.151397989788588e-08,
50415
+ "loss": 46.0028,
50416
+ "step": 7201
50417
+ },
50418
+ {
50419
+ "epoch": 0.9832753088948051,
50420
+ "grad_norm": 0.03411213681101799,
50421
+ "learning_rate": 7.035614957912606e-08,
50422
+ "loss": 46.0001,
50423
+ "step": 7202
50424
+ },
50425
+ {
50426
+ "epoch": 0.9834118369854598,
50427
+ "grad_norm": 0.039187826216220856,
50428
+ "learning_rate": 6.92077621025633e-08,
50429
+ "loss": 46.0053,
50430
+ "step": 7203
50431
+ },
50432
+ {
50433
+ "epoch": 0.9835483650761144,
50434
+ "grad_norm": 0.059648871421813965,
50435
+ "learning_rate": 6.806881768539052e-08,
50436
+ "loss": 46.0022,
50437
+ "step": 7204
50438
+ },
50439
+ {
50440
+ "epoch": 0.9836848931667691,
50441
+ "grad_norm": 0.041936662048101425,
50442
+ "learning_rate": 6.693931654299657e-08,
50443
+ "loss": 46.0009,
50444
+ "step": 7205
50445
+ },
50446
+ {
50447
+ "epoch": 0.9838214212574237,
50448
+ "grad_norm": 0.08943246304988861,
50449
+ "learning_rate": 6.581925888900498e-08,
50450
+ "loss": 46.0064,
50451
+ "step": 7206
50452
+ },
50453
+ {
50454
+ "epoch": 0.9839579493480783,
50455
+ "grad_norm": 0.09488219022750854,
50456
+ "learning_rate": 6.470864493524075e-08,
50457
+ "loss": 46.0043,
50458
+ "step": 7207
50459
+ },
50460
+ {
50461
+ "epoch": 0.984094477438733,
50462
+ "grad_norm": 0.05033031851053238,
50463
+ "learning_rate": 6.360747489175256e-08,
50464
+ "loss": 46.0046,
50465
+ "step": 7208
50466
+ },
50467
+ {
50468
+ "epoch": 0.9842310055293877,
50469
+ "grad_norm": 0.1277073323726654,
50470
+ "learning_rate": 6.251574896679046e-08,
50471
+ "loss": 46.0052,
50472
+ "step": 7209
50473
+ },
50474
+ {
50475
+ "epoch": 0.9843675336200424,
50476
+ "grad_norm": 0.05453884229063988,
50477
+ "learning_rate": 6.14334673668282e-08,
50478
+ "loss": 46.0048,
50479
+ "step": 7210
50480
+ },
50481
+ {
50482
+ "epoch": 0.9845040617106969,
50483
+ "grad_norm": 0.061872418969869614,
50484
+ "learning_rate": 6.036063029654649e-08,
50485
+ "loss": 46.0076,
50486
+ "step": 7211
50487
+ },
50488
+ {
50489
+ "epoch": 0.9846405898013516,
50490
+ "grad_norm": 0.053107887506484985,
50491
+ "learning_rate": 5.929723795884967e-08,
50492
+ "loss": 46.0111,
50493
+ "step": 7212
50494
+ },
50495
+ {
50496
+ "epoch": 0.9847771178920063,
50497
+ "grad_norm": 0.0883575901389122,
50498
+ "learning_rate": 5.8243290554838014e-08,
50499
+ "loss": 46.01,
50500
+ "step": 7213
50501
+ },
50502
+ {
50503
+ "epoch": 0.984913645982661,
50504
+ "grad_norm": 0.10030897706747055,
50505
+ "learning_rate": 5.7198788283852057e-08,
50506
+ "loss": 46.0021,
50507
+ "step": 7214
50508
+ },
50509
+ {
50510
+ "epoch": 0.9850501740733156,
50511
+ "grad_norm": 0.07091116905212402,
50512
+ "learning_rate": 5.6163731343422675e-08,
50513
+ "loss": 46.0035,
50514
+ "step": 7215
50515
+ },
50516
+ {
50517
+ "epoch": 0.9851867021639702,
50518
+ "grad_norm": 0.07437314093112946,
50519
+ "learning_rate": 5.51381199293044e-08,
50520
+ "loss": 46.0071,
50521
+ "step": 7216
50522
+ },
50523
+ {
50524
+ "epoch": 0.9853232302546249,
50525
+ "grad_norm": 0.055116403847932816,
50526
+ "learning_rate": 5.412195423545874e-08,
50527
+ "loss": 46.0039,
50528
+ "step": 7217
50529
+ },
50530
+ {
50531
+ "epoch": 0.9854597583452795,
50532
+ "grad_norm": 0.10955941677093506,
50533
+ "learning_rate": 5.31152344540764e-08,
50534
+ "loss": 46.0068,
50535
+ "step": 7218
50536
+ },
50537
+ {
50538
+ "epoch": 0.9855962864359342,
50539
+ "grad_norm": 0.03393147885799408,
50540
+ "learning_rate": 5.2117960775543986e-08,
50541
+ "loss": 46.0066,
50542
+ "step": 7219
50543
+ },
50544
+ {
50545
+ "epoch": 0.9857328145265889,
50546
+ "grad_norm": 0.04151192307472229,
50547
+ "learning_rate": 5.1130133388471724e-08,
50548
+ "loss": 46.0126,
50549
+ "step": 7220
50550
+ },
50551
+ {
50552
+ "epoch": 0.9858693426172435,
50553
+ "grad_norm": 0.10087965428829193,
50554
+ "learning_rate": 5.015175247967685e-08,
50555
+ "loss": 46.0049,
50556
+ "step": 7221
50557
+ },
50558
+ {
50559
+ "epoch": 0.9860058707078981,
50560
+ "grad_norm": 0.07426264137029648,
50561
+ "learning_rate": 4.9182818234200235e-08,
50562
+ "loss": 46.0075,
50563
+ "step": 7222
50564
+ },
50565
+ {
50566
+ "epoch": 0.9861423987985528,
50567
+ "grad_norm": 0.059685610234737396,
50568
+ "learning_rate": 4.8223330835284184e-08,
50569
+ "loss": 46.0048,
50570
+ "step": 7223
50571
+ },
50572
+ {
50573
+ "epoch": 0.9862789268892075,
50574
+ "grad_norm": 0.06613564491271973,
50575
+ "learning_rate": 4.727329046438911e-08,
50576
+ "loss": 46.002,
50577
+ "step": 7224
50578
+ },
50579
+ {
50580
+ "epoch": 0.9864154549798622,
50581
+ "grad_norm": 0.07357048243284225,
50582
+ "learning_rate": 4.6332697301193496e-08,
50583
+ "loss": 46.0064,
50584
+ "step": 7225
50585
+ },
50586
+ {
50587
+ "epoch": 0.9865519830705167,
50588
+ "grad_norm": 0.19530758261680603,
50589
+ "learning_rate": 4.540155152358283e-08,
50590
+ "loss": 46.0062,
50591
+ "step": 7226
50592
+ },
50593
+ {
50594
+ "epoch": 0.9866885111611714,
50595
+ "grad_norm": 0.09539202600717545,
50596
+ "learning_rate": 4.447985330765514e-08,
50597
+ "loss": 46.0043,
50598
+ "step": 7227
50599
+ },
50600
+ {
50601
+ "epoch": 0.9868250392518261,
50602
+ "grad_norm": 0.04313179850578308,
50603
+ "learning_rate": 4.356760282773209e-08,
50604
+ "loss": 46.0006,
50605
+ "step": 7228
50606
+ },
50607
+ {
50608
+ "epoch": 0.9869615673424808,
50609
+ "grad_norm": 0.04633820801973343,
50610
+ "learning_rate": 4.266480025633679e-08,
50611
+ "loss": 46.0093,
50612
+ "step": 7229
50613
+ },
50614
+ {
50615
+ "epoch": 0.9870980954331353,
50616
+ "grad_norm": 0.04100847616791725,
50617
+ "learning_rate": 4.177144576420489e-08,
50618
+ "loss": 46.0027,
50619
+ "step": 7230
50620
+ },
50621
+ {
50622
+ "epoch": 0.98723462352379,
50623
+ "grad_norm": 0.1251312494277954,
50624
+ "learning_rate": 4.088753952030122e-08,
50625
+ "loss": 46.0043,
50626
+ "step": 7231
50627
+ },
50628
+ {
50629
+ "epoch": 0.9873711516144447,
50630
+ "grad_norm": 0.04459778219461441,
50631
+ "learning_rate": 4.0013081691786524e-08,
50632
+ "loss": 46.0116,
50633
+ "step": 7232
50634
+ },
50635
+ {
50636
+ "epoch": 0.9875076797050993,
50637
+ "grad_norm": 0.19127972424030304,
50638
+ "learning_rate": 3.9148072444039616e-08,
50639
+ "loss": 46.0086,
50640
+ "step": 7233
50641
+ },
50642
+ {
50643
+ "epoch": 0.987644207795754,
50644
+ "grad_norm": 0.11693168431520462,
50645
+ "learning_rate": 3.8292511940657415e-08,
50646
+ "loss": 46.0036,
50647
+ "step": 7234
50648
+ },
50649
+ {
50650
+ "epoch": 0.9877807358864086,
50651
+ "grad_norm": 0.05080500990152359,
50652
+ "learning_rate": 3.744640034344382e-08,
50653
+ "loss": 46.0033,
50654
+ "step": 7235
50655
+ },
50656
+ {
50657
+ "epoch": 0.9879172639770633,
50658
+ "grad_norm": 0.18874023854732513,
50659
+ "learning_rate": 3.660973781242083e-08,
50660
+ "loss": 46.0089,
50661
+ "step": 7236
50662
+ },
50663
+ {
50664
+ "epoch": 0.9880537920677179,
50665
+ "grad_norm": 0.06047139689326286,
50666
+ "learning_rate": 3.5782524505811876e-08,
50667
+ "loss": 46.009,
50668
+ "step": 7237
50669
+ },
50670
+ {
50671
+ "epoch": 0.9881903201583726,
50672
+ "grad_norm": 0.05019622668623924,
50673
+ "learning_rate": 3.496476058006959e-08,
50674
+ "loss": 46.0073,
50675
+ "step": 7238
50676
+ },
50677
+ {
50678
+ "epoch": 0.9883268482490273,
50679
+ "grad_norm": 0.05337301269173622,
50680
+ "learning_rate": 3.415644618985359e-08,
50681
+ "loss": 46.003,
50682
+ "step": 7239
50683
+ },
50684
+ {
50685
+ "epoch": 0.9884633763396818,
50686
+ "grad_norm": 0.06359133124351501,
50687
+ "learning_rate": 3.3357581488030475e-08,
50688
+ "loss": 46.0037,
50689
+ "step": 7240
50690
+ },
50691
+ {
50692
+ "epoch": 0.9885999044303365,
50693
+ "grad_norm": 0.05159619078040123,
50694
+ "learning_rate": 3.256816662568496e-08,
50695
+ "loss": 46.011,
50696
+ "step": 7241
50697
+ },
50698
+ {
50699
+ "epoch": 0.9887364325209912,
50700
+ "grad_norm": 0.07998304069042206,
50701
+ "learning_rate": 3.178820175211428e-08,
50702
+ "loss": 46.0029,
50703
+ "step": 7242
50704
+ },
50705
+ {
50706
+ "epoch": 0.9888729606116459,
50707
+ "grad_norm": 0.03504487872123718,
50708
+ "learning_rate": 3.1017687014828215e-08,
50709
+ "loss": 46.004,
50710
+ "step": 7243
50711
+ },
50712
+ {
50713
+ "epoch": 0.9890094887023005,
50714
+ "grad_norm": 0.0823051929473877,
50715
+ "learning_rate": 3.0256622559543536e-08,
50716
+ "loss": 46.0024,
50717
+ "step": 7244
50718
+ },
50719
+ {
50720
+ "epoch": 0.9891460167929551,
50721
+ "grad_norm": 0.422367662191391,
50722
+ "learning_rate": 2.950500853020066e-08,
50723
+ "loss": 46.0013,
50724
+ "step": 7245
50725
+ },
50726
+ {
50727
+ "epoch": 0.9892825448836098,
50728
+ "grad_norm": 0.1806757152080536,
50729
+ "learning_rate": 2.8762845068941445e-08,
50730
+ "loss": 46.0088,
50731
+ "step": 7246
50732
+ },
50733
+ {
50734
+ "epoch": 0.9894190729742645,
50735
+ "grad_norm": 0.17935673892498016,
50736
+ "learning_rate": 2.8030132316136938e-08,
50737
+ "loss": 46.0132,
50738
+ "step": 7247
50739
+ },
50740
+ {
50741
+ "epoch": 0.9895556010649191,
50742
+ "grad_norm": 0.2891080975532532,
50743
+ "learning_rate": 2.730687041034852e-08,
50744
+ "loss": 46.0057,
50745
+ "step": 7248
50746
+ },
50747
+ {
50748
+ "epoch": 0.9896921291555738,
50749
+ "grad_norm": 0.04798796772956848,
50750
+ "learning_rate": 2.6593059488366766e-08,
50751
+ "loss": 46.0012,
50752
+ "step": 7249
50753
+ },
50754
+ {
50755
+ "epoch": 0.9898286572462284,
50756
+ "grad_norm": 0.425402969121933,
50757
+ "learning_rate": 2.5888699685189255e-08,
50758
+ "loss": 46.0015,
50759
+ "step": 7250
50760
+ },
50761
+ {
50762
+ "epoch": 0.989965185336883,
50763
+ "grad_norm": 0.1112116277217865,
50764
+ "learning_rate": 2.519379113402609e-08,
50765
+ "loss": 46.0062,
50766
+ "step": 7251
50767
+ },
50768
+ {
50769
+ "epoch": 0.9901017134275377,
50770
+ "grad_norm": 0.053855422884225845,
50771
+ "learning_rate": 2.4508333966305473e-08,
50772
+ "loss": 46.0038,
50773
+ "step": 7252
50774
+ },
50775
+ {
50776
+ "epoch": 0.9902382415181924,
50777
+ "grad_norm": 0.03137766197323799,
50778
+ "learning_rate": 2.3832328311651496e-08,
50779
+ "loss": 46.006,
50780
+ "step": 7253
50781
+ },
50782
+ {
50783
+ "epoch": 0.9903747696088471,
50784
+ "grad_norm": 0.10431533306837082,
50785
+ "learning_rate": 2.3165774297922992e-08,
50786
+ "loss": 46.0022,
50787
+ "step": 7254
50788
+ },
50789
+ {
50790
+ "epoch": 0.9905112976995016,
50791
+ "grad_norm": 0.04208254814147949,
50792
+ "learning_rate": 2.2508672051174685e-08,
50793
+ "loss": 46.0001,
50794
+ "step": 7255
50795
+ },
50796
+ {
50797
+ "epoch": 0.9906478257901563,
50798
+ "grad_norm": 0.045661814510822296,
50799
+ "learning_rate": 2.1861021695684935e-08,
50800
+ "loss": 46.0032,
50801
+ "step": 7256
50802
+ },
50803
+ {
50804
+ "epoch": 0.990784353880811,
50805
+ "grad_norm": 0.030271239578723907,
50806
+ "learning_rate": 2.122282335393355e-08,
50807
+ "loss": 46.0025,
50808
+ "step": 7257
50809
+ },
50810
+ {
50811
+ "epoch": 0.9909208819714657,
50812
+ "grad_norm": 0.0907672792673111,
50813
+ "learning_rate": 2.059407714662398e-08,
50814
+ "loss": 46.0016,
50815
+ "step": 7258
50816
+ },
50817
+ {
50818
+ "epoch": 0.9910574100621203,
50819
+ "grad_norm": 0.08112984895706177,
50820
+ "learning_rate": 1.9974783192661107e-08,
50821
+ "loss": 46.0013,
50822
+ "step": 7259
50823
+ },
50824
+ {
50825
+ "epoch": 0.9911939381527749,
50826
+ "grad_norm": 0.13127990067005157,
50827
+ "learning_rate": 1.9364941609167907e-08,
50828
+ "loss": 46.0027,
50829
+ "step": 7260
50830
+ },
50831
+ {
50832
+ "epoch": 0.9913304662434296,
50833
+ "grad_norm": 0.07560381293296814,
50834
+ "learning_rate": 1.8764552511485457e-08,
50835
+ "loss": 46.0118,
50836
+ "step": 7261
50837
+ },
50838
+ {
50839
+ "epoch": 0.9914669943340843,
50840
+ "grad_norm": 0.03843251243233681,
50841
+ "learning_rate": 1.817361601315626e-08,
50842
+ "loss": 46.0048,
50843
+ "step": 7262
50844
+ },
50845
+ {
50846
+ "epoch": 0.9916035224247389,
50847
+ "grad_norm": 0.03968435525894165,
50848
+ "learning_rate": 1.7592132225946468e-08,
50849
+ "loss": 46.0057,
50850
+ "step": 7263
50851
+ },
50852
+ {
50853
+ "epoch": 0.9917400505153935,
50854
+ "grad_norm": 0.054471638053655624,
50855
+ "learning_rate": 1.702010125981812e-08,
50856
+ "loss": 46.0006,
50857
+ "step": 7264
50858
+ },
50859
+ {
50860
+ "epoch": 0.9918765786060482,
50861
+ "grad_norm": 0.1916276067495346,
50862
+ "learning_rate": 1.6457523222956907e-08,
50863
+ "loss": 46.0005,
50864
+ "step": 7265
50865
+ },
50866
+ {
50867
+ "epoch": 0.9920131066967028,
50868
+ "grad_norm": 0.2849438786506653,
50869
+ "learning_rate": 1.5904398221766592e-08,
50870
+ "loss": 46.0049,
50871
+ "step": 7266
50872
+ },
50873
+ {
50874
+ "epoch": 0.9921496347873575,
50875
+ "grad_norm": 0.12652307748794556,
50876
+ "learning_rate": 1.5360726360852397e-08,
50877
+ "loss": 46.0085,
50878
+ "step": 7267
50879
+ },
50880
+ {
50881
+ "epoch": 0.9922861628780122,
50882
+ "grad_norm": 0.11404263973236084,
50883
+ "learning_rate": 1.482650774303207e-08,
50884
+ "loss": 46.0012,
50885
+ "step": 7268
50886
+ },
50887
+ {
50888
+ "epoch": 0.9924226909686668,
50889
+ "grad_norm": 0.055355679243803024,
50890
+ "learning_rate": 1.430174246934146e-08,
50891
+ "loss": 46.0068,
50892
+ "step": 7269
50893
+ },
50894
+ {
50895
+ "epoch": 0.9925592190593214,
50896
+ "grad_norm": 0.09978245943784714,
50897
+ "learning_rate": 1.3786430639023407e-08,
50898
+ "loss": 46.0037,
50899
+ "step": 7270
50900
+ },
50901
+ {
50902
+ "epoch": 0.9926957471499761,
50903
+ "grad_norm": 0.05256585776805878,
50904
+ "learning_rate": 1.3280572349538834e-08,
50905
+ "loss": 46.0084,
50906
+ "step": 7271
50907
+ },
50908
+ {
50909
+ "epoch": 0.9928322752406308,
50910
+ "grad_norm": 0.1046704575419426,
50911
+ "learning_rate": 1.278416769655566e-08,
50912
+ "loss": 46.0039,
50913
+ "step": 7272
50914
+ },
50915
+ {
50916
+ "epoch": 0.9929688033312855,
50917
+ "grad_norm": 0.10620303452014923,
50918
+ "learning_rate": 1.2297216773954346e-08,
50919
+ "loss": 46.0041,
50920
+ "step": 7273
50921
+ },
50922
+ {
50923
+ "epoch": 0.99310533142194,
50924
+ "grad_norm": 0.11976180225610733,
50925
+ "learning_rate": 1.1819719673827889e-08,
50926
+ "loss": 46.0065,
50927
+ "step": 7274
50928
+ },
50929
+ {
50930
+ "epoch": 0.9932418595125947,
50931
+ "grad_norm": 0.28400561213493347,
50932
+ "learning_rate": 1.1351676486487383e-08,
50933
+ "loss": 46.0087,
50934
+ "step": 7275
50935
+ },
50936
+ {
50937
+ "epoch": 0.9933783876032494,
50938
+ "grad_norm": 0.07839032262563705,
50939
+ "learning_rate": 1.0893087300439809e-08,
50940
+ "loss": 46.0029,
50941
+ "step": 7276
50942
+ },
50943
+ {
50944
+ "epoch": 0.993514915693904,
50945
+ "grad_norm": 0.1288733035326004,
50946
+ "learning_rate": 1.0443952202426887e-08,
50947
+ "loss": 46.0086,
50948
+ "step": 7277
50949
+ },
50950
+ {
50951
+ "epoch": 0.9936514437845587,
50952
+ "grad_norm": 0.05590146407485008,
50953
+ "learning_rate": 1.0004271277386234e-08,
50954
+ "loss": 46.0053,
50955
+ "step": 7278
50956
+ },
50957
+ {
50958
+ "epoch": 0.9937879718752133,
50959
+ "grad_norm": 0.04046904668211937,
50960
+ "learning_rate": 9.574044608468003e-09,
50961
+ "loss": 46.0029,
50962
+ "step": 7279
50963
+ },
50964
+ {
50965
+ "epoch": 0.993924499965868,
50966
+ "grad_norm": 0.05340801179409027,
50967
+ "learning_rate": 9.153272277040436e-09,
50968
+ "loss": 46.0055,
50969
+ "step": 7280
50970
+ },
50971
+ {
50972
+ "epoch": 0.9940610280565226,
50973
+ "grad_norm": 0.0846848338842392,
50974
+ "learning_rate": 8.741954362678772e-09,
50975
+ "loss": 46.0034,
50976
+ "step": 7281
50977
+ },
50978
+ {
50979
+ "epoch": 0.9941975561471773,
50980
+ "grad_norm": 0.08063561469316483,
50981
+ "learning_rate": 8.340090943176338e-09,
50982
+ "loss": 46.0035,
50983
+ "step": 7282
50984
+ },
50985
+ {
50986
+ "epoch": 0.994334084237832,
50987
+ "grad_norm": 0.049741119146347046,
50988
+ "learning_rate": 7.947682094533449e-09,
50989
+ "loss": 46.0018,
50990
+ "step": 7283
50991
+ },
50992
+ {
50993
+ "epoch": 0.9944706123284865,
50994
+ "grad_norm": 0.0649300292134285,
50995
+ "learning_rate": 7.564727890968514e-09,
50996
+ "loss": 46.0059,
50997
+ "step": 7284
50998
+ },
50999
+ {
51000
+ "epoch": 0.9946071404191412,
51001
+ "grad_norm": 0.06503751128911972,
51002
+ "learning_rate": 7.1912284048958336e-09,
51003
+ "loss": 46.0024,
51004
+ "step": 7285
51005
+ },
51006
+ {
51007
+ "epoch": 0.9947436685097959,
51008
+ "grad_norm": 0.08364452421665192,
51009
+ "learning_rate": 6.8271837069588955e-09,
51010
+ "loss": 46.0006,
51011
+ "step": 7286
51012
+ },
51013
+ {
51014
+ "epoch": 0.9948801966004506,
51015
+ "grad_norm": 0.06732216477394104,
51016
+ "learning_rate": 6.472593866013732e-09,
51017
+ "loss": 46.0,
51018
+ "step": 7287
51019
+ },
51020
+ {
51021
+ "epoch": 0.9950167246911052,
51022
+ "grad_norm": 0.08693568408489227,
51023
+ "learning_rate": 6.127458949106713e-09,
51024
+ "loss": 46.005,
51025
+ "step": 7288
51026
+ },
51027
+ {
51028
+ "epoch": 0.9951532527817598,
51029
+ "grad_norm": 0.053449422121047974,
51030
+ "learning_rate": 5.7917790215245015e-09,
51031
+ "loss": 46.0021,
51032
+ "step": 7289
51033
+ },
51034
+ {
51035
+ "epoch": 0.9952897808724145,
51036
+ "grad_norm": 0.05752718821167946,
51037
+ "learning_rate": 5.4655541467441006e-09,
51038
+ "loss": 46.0032,
51039
+ "step": 7290
51040
+ },
51041
+ {
51042
+ "epoch": 0.9954263089630692,
51043
+ "grad_norm": 0.04035717621445656,
51044
+ "learning_rate": 5.148784386460604e-09,
51045
+ "loss": 46.001,
51046
+ "step": 7291
51047
+ },
51048
+ {
51049
+ "epoch": 0.9955628370537238,
51050
+ "grad_norm": 0.05446084216237068,
51051
+ "learning_rate": 4.841469800592746e-09,
51052
+ "loss": 46.008,
51053
+ "step": 7292
51054
+ },
51055
+ {
51056
+ "epoch": 0.9956993651443784,
51057
+ "grad_norm": 0.05863227695226669,
51058
+ "learning_rate": 4.543610447249602e-09,
51059
+ "loss": 46.0042,
51060
+ "step": 7293
51061
+ },
51062
+ {
51063
+ "epoch": 0.9958358932350331,
51064
+ "grad_norm": 0.05528897047042847,
51065
+ "learning_rate": 4.2552063827694386e-09,
51066
+ "loss": 46.0031,
51067
+ "step": 7294
51068
+ },
51069
+ {
51070
+ "epoch": 0.9959724213256878,
51071
+ "grad_norm": 0.07325014472007751,
51072
+ "learning_rate": 3.976257661691962e-09,
51073
+ "loss": 46.0055,
51074
+ "step": 7295
51075
+ },
51076
+ {
51077
+ "epoch": 0.9961089494163424,
51078
+ "grad_norm": 0.20819850265979767,
51079
+ "learning_rate": 3.7067643367749705e-09,
51080
+ "loss": 46.0083,
51081
+ "step": 7296
51082
+ },
51083
+ {
51084
+ "epoch": 0.9962454775069971,
51085
+ "grad_norm": 0.16535024344921112,
51086
+ "learning_rate": 3.446726458988803e-09,
51087
+ "loss": 46.0058,
51088
+ "step": 7297
51089
+ },
51090
+ {
51091
+ "epoch": 0.9963820055976517,
51092
+ "grad_norm": 0.06618311256170273,
51093
+ "learning_rate": 3.1961440775107878e-09,
51094
+ "loss": 46.0018,
51095
+ "step": 7298
51096
+ },
51097
+ {
51098
+ "epoch": 0.9965185336883063,
51099
+ "grad_norm": 0.20235642790794373,
51100
+ "learning_rate": 2.9550172397252442e-09,
51101
+ "loss": 46.0093,
51102
+ "step": 7299
51103
+ },
51104
+ {
51105
+ "epoch": 0.996655061778961,
51106
+ "grad_norm": 0.04973575845360756,
51107
+ "learning_rate": 2.723345991245685e-09,
51108
+ "loss": 46.0,
51109
+ "step": 7300
51110
+ },
51111
+ {
51112
+ "epoch": 0.9967915898696157,
51113
+ "grad_norm": 0.07193689048290253,
51114
+ "learning_rate": 2.5011303758759596e-09,
51115
+ "loss": 46.0096,
51116
+ "step": 7301
51117
+ },
51118
+ {
51119
+ "epoch": 0.9969281179602704,
51120
+ "grad_norm": 0.057107098400592804,
51121
+ "learning_rate": 2.288370435654663e-09,
51122
+ "loss": 46.0006,
51123
+ "step": 7302
51124
+ },
51125
+ {
51126
+ "epoch": 0.9970646460509249,
51127
+ "grad_norm": 0.10516843944787979,
51128
+ "learning_rate": 2.0850662108051755e-09,
51129
+ "loss": 46.0045,
51130
+ "step": 7303
51131
+ },
51132
+ {
51133
+ "epoch": 0.9972011741415796,
51134
+ "grad_norm": 0.15865033864974976,
51135
+ "learning_rate": 1.8912177397856224e-09,
51136
+ "loss": 46.0095,
51137
+ "step": 7304
51138
+ },
51139
+ {
51140
+ "epoch": 0.9973377022322343,
51141
+ "grad_norm": 0.07561293244361877,
51142
+ "learning_rate": 1.706825059255568e-09,
51143
+ "loss": 46.0012,
51144
+ "step": 7305
51145
+ },
51146
+ {
51147
+ "epoch": 0.997474230322889,
51148
+ "grad_norm": 0.08446773886680603,
51149
+ "learning_rate": 1.5318882040926686e-09,
51150
+ "loss": 46.0098,
51151
+ "step": 7306
51152
+ },
51153
+ {
51154
+ "epoch": 0.9976107584135436,
51155
+ "grad_norm": 0.11638659983873367,
51156
+ "learning_rate": 1.3664072073704681e-09,
51157
+ "loss": 46.0044,
51158
+ "step": 7307
51159
+ },
51160
+ {
51161
+ "epoch": 0.9977472865041982,
51162
+ "grad_norm": 0.049882251769304276,
51163
+ "learning_rate": 1.210382100397256e-09,
51164
+ "loss": 46.0102,
51165
+ "step": 7308
51166
+ },
51167
+ {
51168
+ "epoch": 0.9978838145948529,
51169
+ "grad_norm": 0.08385684341192245,
51170
+ "learning_rate": 1.063812912671658e-09,
51171
+ "loss": 46.0012,
51172
+ "step": 7309
51173
+ },
51174
+ {
51175
+ "epoch": 0.9980203426855075,
51176
+ "grad_norm": 0.15993371605873108,
51177
+ "learning_rate": 9.266996719159426e-10,
51178
+ "loss": 46.003,
51179
+ "step": 7310
51180
+ },
51181
+ {
51182
+ "epoch": 0.9981568707761622,
51183
+ "grad_norm": 0.04674162715673447,
51184
+ "learning_rate": 7.990424040649202e-10,
51185
+ "loss": 46.0063,
51186
+ "step": 7311
51187
+ },
51188
+ {
51189
+ "epoch": 0.9982933988668169,
51190
+ "grad_norm": 0.17500679194927216,
51191
+ "learning_rate": 6.808411332548393e-10,
51192
+ "loss": 46.0085,
51193
+ "step": 7312
51194
+ },
51195
+ {
51196
+ "epoch": 0.9984299269574715,
51197
+ "grad_norm": 0.11019917577505112,
51198
+ "learning_rate": 5.720958818511424e-10,
51199
+ "loss": 46.0036,
51200
+ "step": 7313
51201
+ },
51202
+ {
51203
+ "epoch": 0.9985664550481261,
51204
+ "grad_norm": 0.09337335079908371,
51205
+ "learning_rate": 4.72806670409609e-10,
51206
+ "loss": 46.0022,
51207
+ "step": 7314
51208
+ },
51209
+ {
51210
+ "epoch": 0.9987029831387808,
51211
+ "grad_norm": 0.09085293859243393,
51212
+ "learning_rate": 3.829735177096616e-10,
51213
+ "loss": 46.0003,
51214
+ "step": 7315
51215
+ },
51216
+ {
51217
+ "epoch": 0.9988395112294355,
51218
+ "grad_norm": 0.1213827133178711,
51219
+ "learning_rate": 3.0259644074326353e-10,
51220
+ "loss": 46.0042,
51221
+ "step": 7316
51222
+ },
51223
+ {
51224
+ "epoch": 0.9989760393200902,
51225
+ "grad_norm": 0.07254913449287415,
51226
+ "learning_rate": 2.3167545471491914e-10,
51227
+ "loss": 46.0039,
51228
+ "step": 7317
51229
+ },
51230
+ {
51231
+ "epoch": 0.9991125674107447,
51232
+ "grad_norm": 0.05589550361037254,
51233
+ "learning_rate": 1.7021057303057142e-10,
51234
+ "loss": 46.0002,
51235
+ "step": 7318
51236
+ },
51237
+ {
51238
+ "epoch": 0.9992490955013994,
51239
+ "grad_norm": 0.0650186687707901,
51240
+ "learning_rate": 1.1820180731980656e-10,
51241
+ "loss": 46.0024,
51242
+ "step": 7319
51243
+ },
51244
+ {
51245
+ "epoch": 0.9993856235920541,
51246
+ "grad_norm": 0.07707148045301437,
51247
+ "learning_rate": 7.564916741364947e-11,
51248
+ "loss": 46.0048,
51249
+ "step": 7320
51250
+ },
51251
+ {
51252
+ "epoch": 0.9995221516827087,
51253
+ "grad_norm": 0.06899863481521606,
51254
+ "learning_rate": 4.2552661366768164e-11,
51255
+ "loss": 46.0075,
51256
+ "step": 7321
51257
+ },
51258
+ {
51259
+ "epoch": 0.9996586797733634,
51260
+ "grad_norm": 0.13619117438793182,
51261
+ "learning_rate": 1.8912295429718285e-11,
51262
+ "loss": 46.0019,
51263
+ "step": 7322
51264
+ },
51265
+ {
51266
+ "epoch": 0.999795207864018,
51267
+ "grad_norm": 0.06259801238775253,
51268
+ "learning_rate": 4.728074082249734e-12,
51269
+ "loss": 46.0084,
51270
+ "step": 7323
51271
+ },
51272
+ {
51273
+ "epoch": 0.9999317359546727,
51274
+ "grad_norm": 0.11005455255508423,
51275
+ "learning_rate": 0.0,
51276
+ "loss": 46.0028,
51277
+ "step": 7324
51278
  }
51279
  ],
51280
  "logging_steps": 1,
 
51289
  "should_evaluate": false,
51290
  "should_log": false,
51291
  "should_save": true,
51292
+ "should_training_stop": true
51293
  },
51294
  "attributes": {}
51295
  }
51296
  },
51297
+ "total_flos": 687479785488384.0,
51298
  "train_batch_size": 4,
51299
  "trial_name": null,
51300
  "trial_params": null