gabrielaltay commited on
Commit
a22c09b
1 Parent(s): d647c4a

Training in progress, step 9288, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8555c6c30b01b7b518e204262ad49bdfc8a647ffac30864d51ee8c8057b5b58b
3
  size 439648328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0015c35a2eddaec815db15463fdc85043d66de0a9c2962eda74d561b81414b6f
3
  size 439648328
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65419cfd6a880659ccadbe3db88894c9a0bc93ac16d9c1b7a0c6cf2cbc2b395
3
  size 879415866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eabcb53a0ad24de4121adf17313188b0c1f3343621a0e7c0c20bdb33797bd83f
3
  size 879415866
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5528a8a5438254c67bb6f375f3876eeca26717fef489265e3b041c5387c9fb8f
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1f62733ff0d8690c2a8797d153e8ec399303cbc92322dd118c7b88915a04be
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1d87868b3d95ab9fb053bc3e7b7216c1360a2d6ef559d5a4f71fdb1eb48e41
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da252e7b08747075dc2ad5c781c5deb49e64dd87681bbc7b4aaa89a30b6f9d5f
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c699c45754dba9f295f88b976126b3ed2ecc4605b1af134d5e1f2b88049fd75b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a78424df3b6c3f885135b5bf0deeb48c09ab08dbdc18e3e5b017f82ee6747c9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8004653868528214,
5
  "eval_steps": 500,
6
- "global_step": 8256,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14455,6 +14455,1812 @@
14455
  "learning_rate": 9.98157843707582e-06,
14456
  "loss": 5.2778,
14457
  "step": 8256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14458
  }
14459
  ],
14460
  "logging_steps": 4,
@@ -14462,7 +16268,7 @@
14462
  "num_input_tokens_seen": 0,
14463
  "num_train_epochs": 1,
14464
  "save_steps": 1032,
14465
- "total_flos": 6.953660644432282e+16,
14466
  "train_batch_size": 8,
14467
  "trial_name": null,
14468
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.900523560209424,
5
  "eval_steps": 500,
6
+ "global_step": 9288,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14455
  "learning_rate": 9.98157843707582e-06,
14456
  "loss": 5.2778,
14457
  "step": 8256
14458
+ },
14459
+ {
14460
+ "epoch": 0.8,
14461
+ "grad_norm": 1.036868691444397,
14462
+ "learning_rate": 9.962187318208263e-06,
14463
+ "loss": 5.31,
14464
+ "step": 8260
14465
+ },
14466
+ {
14467
+ "epoch": 0.8,
14468
+ "grad_norm": 1.0464235544204712,
14469
+ "learning_rate": 9.942796199340703e-06,
14470
+ "loss": 5.3512,
14471
+ "step": 8264
14472
+ },
14473
+ {
14474
+ "epoch": 0.8,
14475
+ "grad_norm": 1.001470685005188,
14476
+ "learning_rate": 9.923405080473144e-06,
14477
+ "loss": 5.2698,
14478
+ "step": 8268
14479
+ },
14480
+ {
14481
+ "epoch": 0.8,
14482
+ "grad_norm": 1.0910736322402954,
14483
+ "learning_rate": 9.904013961605584e-06,
14484
+ "loss": 5.363,
14485
+ "step": 8272
14486
+ },
14487
+ {
14488
+ "epoch": 0.8,
14489
+ "grad_norm": 1.087928056716919,
14490
+ "learning_rate": 9.884622842738026e-06,
14491
+ "loss": 5.3454,
14492
+ "step": 8276
14493
+ },
14494
+ {
14495
+ "epoch": 0.8,
14496
+ "grad_norm": 1.055014967918396,
14497
+ "learning_rate": 9.865231723870467e-06,
14498
+ "loss": 5.3134,
14499
+ "step": 8280
14500
+ },
14501
+ {
14502
+ "epoch": 0.8,
14503
+ "grad_norm": 1.1186180114746094,
14504
+ "learning_rate": 9.84584060500291e-06,
14505
+ "loss": 5.3016,
14506
+ "step": 8284
14507
+ },
14508
+ {
14509
+ "epoch": 0.8,
14510
+ "grad_norm": 1.0159074068069458,
14511
+ "learning_rate": 9.82644948613535e-06,
14512
+ "loss": 5.3974,
14513
+ "step": 8288
14514
+ },
14515
+ {
14516
+ "epoch": 0.8,
14517
+ "grad_norm": 1.1419733762741089,
14518
+ "learning_rate": 9.807058367267792e-06,
14519
+ "loss": 5.333,
14520
+ "step": 8292
14521
+ },
14522
+ {
14523
+ "epoch": 0.8,
14524
+ "grad_norm": 1.078598976135254,
14525
+ "learning_rate": 9.787667248400232e-06,
14526
+ "loss": 5.2157,
14527
+ "step": 8296
14528
+ },
14529
+ {
14530
+ "epoch": 0.8,
14531
+ "grad_norm": 1.0978525876998901,
14532
+ "learning_rate": 9.768276129532675e-06,
14533
+ "loss": 5.3084,
14534
+ "step": 8300
14535
+ },
14536
+ {
14537
+ "epoch": 0.81,
14538
+ "grad_norm": 1.0307817459106445,
14539
+ "learning_rate": 9.748885010665115e-06,
14540
+ "loss": 5.2962,
14541
+ "step": 8304
14542
+ },
14543
+ {
14544
+ "epoch": 0.81,
14545
+ "grad_norm": 0.9767160415649414,
14546
+ "learning_rate": 9.729493891797557e-06,
14547
+ "loss": 5.3765,
14548
+ "step": 8308
14549
+ },
14550
+ {
14551
+ "epoch": 0.81,
14552
+ "grad_norm": 1.048744797706604,
14553
+ "learning_rate": 9.710102772929998e-06,
14554
+ "loss": 5.3491,
14555
+ "step": 8312
14556
+ },
14557
+ {
14558
+ "epoch": 0.81,
14559
+ "grad_norm": 1.1452877521514893,
14560
+ "learning_rate": 9.69071165406244e-06,
14561
+ "loss": 5.3348,
14562
+ "step": 8316
14563
+ },
14564
+ {
14565
+ "epoch": 0.81,
14566
+ "grad_norm": 1.0742149353027344,
14567
+ "learning_rate": 9.671320535194882e-06,
14568
+ "loss": 5.4119,
14569
+ "step": 8320
14570
+ },
14571
+ {
14572
+ "epoch": 0.81,
14573
+ "grad_norm": 1.016554594039917,
14574
+ "learning_rate": 9.651929416327323e-06,
14575
+ "loss": 5.2832,
14576
+ "step": 8324
14577
+ },
14578
+ {
14579
+ "epoch": 0.81,
14580
+ "grad_norm": 1.1286094188690186,
14581
+ "learning_rate": 9.632538297459765e-06,
14582
+ "loss": 5.3436,
14583
+ "step": 8328
14584
+ },
14585
+ {
14586
+ "epoch": 0.81,
14587
+ "grad_norm": 1.0719375610351562,
14588
+ "learning_rate": 9.613147178592206e-06,
14589
+ "loss": 5.2561,
14590
+ "step": 8332
14591
+ },
14592
+ {
14593
+ "epoch": 0.81,
14594
+ "grad_norm": 1.0239946842193604,
14595
+ "learning_rate": 9.593756059724646e-06,
14596
+ "loss": 5.3411,
14597
+ "step": 8336
14598
+ },
14599
+ {
14600
+ "epoch": 0.81,
14601
+ "grad_norm": 1.16642165184021,
14602
+ "learning_rate": 9.574364940857087e-06,
14603
+ "loss": 5.2173,
14604
+ "step": 8340
14605
+ },
14606
+ {
14607
+ "epoch": 0.81,
14608
+ "grad_norm": 1.056943655014038,
14609
+ "learning_rate": 9.554973821989529e-06,
14610
+ "loss": 5.3138,
14611
+ "step": 8344
14612
+ },
14613
+ {
14614
+ "epoch": 0.81,
14615
+ "grad_norm": 1.0310717821121216,
14616
+ "learning_rate": 9.53558270312197e-06,
14617
+ "loss": 5.2147,
14618
+ "step": 8348
14619
+ },
14620
+ {
14621
+ "epoch": 0.81,
14622
+ "grad_norm": 1.0939549207687378,
14623
+ "learning_rate": 9.516191584254412e-06,
14624
+ "loss": 5.3849,
14625
+ "step": 8352
14626
+ },
14627
+ {
14628
+ "epoch": 0.81,
14629
+ "grad_norm": 1.0846009254455566,
14630
+ "learning_rate": 9.496800465386852e-06,
14631
+ "loss": 5.2793,
14632
+ "step": 8356
14633
+ },
14634
+ {
14635
+ "epoch": 0.81,
14636
+ "grad_norm": 1.0984148979187012,
14637
+ "learning_rate": 9.477409346519295e-06,
14638
+ "loss": 5.3142,
14639
+ "step": 8360
14640
+ },
14641
+ {
14642
+ "epoch": 0.81,
14643
+ "grad_norm": 1.035758376121521,
14644
+ "learning_rate": 9.458018227651735e-06,
14645
+ "loss": 5.264,
14646
+ "step": 8364
14647
+ },
14648
+ {
14649
+ "epoch": 0.81,
14650
+ "grad_norm": 1.0837132930755615,
14651
+ "learning_rate": 9.438627108784177e-06,
14652
+ "loss": 5.432,
14653
+ "step": 8368
14654
+ },
14655
+ {
14656
+ "epoch": 0.81,
14657
+ "grad_norm": 1.0333995819091797,
14658
+ "learning_rate": 9.419235989916618e-06,
14659
+ "loss": 5.2622,
14660
+ "step": 8372
14661
+ },
14662
+ {
14663
+ "epoch": 0.81,
14664
+ "grad_norm": 1.054474949836731,
14665
+ "learning_rate": 9.39984487104906e-06,
14666
+ "loss": 5.2721,
14667
+ "step": 8376
14668
+ },
14669
+ {
14670
+ "epoch": 0.81,
14671
+ "grad_norm": 1.0750809907913208,
14672
+ "learning_rate": 9.380453752181502e-06,
14673
+ "loss": 5.2433,
14674
+ "step": 8380
14675
+ },
14676
+ {
14677
+ "epoch": 0.81,
14678
+ "grad_norm": 1.054914116859436,
14679
+ "learning_rate": 9.361062633313943e-06,
14680
+ "loss": 5.3407,
14681
+ "step": 8384
14682
+ },
14683
+ {
14684
+ "epoch": 0.81,
14685
+ "grad_norm": 1.1101247072219849,
14686
+ "learning_rate": 9.341671514446385e-06,
14687
+ "loss": 5.2774,
14688
+ "step": 8388
14689
+ },
14690
+ {
14691
+ "epoch": 0.81,
14692
+ "grad_norm": 1.1054069995880127,
14693
+ "learning_rate": 9.322280395578826e-06,
14694
+ "loss": 5.3978,
14695
+ "step": 8392
14696
+ },
14697
+ {
14698
+ "epoch": 0.81,
14699
+ "grad_norm": 1.0813637971878052,
14700
+ "learning_rate": 9.302889276711268e-06,
14701
+ "loss": 5.3735,
14702
+ "step": 8396
14703
+ },
14704
+ {
14705
+ "epoch": 0.81,
14706
+ "grad_norm": 1.151734709739685,
14707
+ "learning_rate": 9.283498157843708e-06,
14708
+ "loss": 5.4109,
14709
+ "step": 8400
14710
+ },
14711
+ {
14712
+ "epoch": 0.81,
14713
+ "grad_norm": 1.0774791240692139,
14714
+ "learning_rate": 9.264107038976149e-06,
14715
+ "loss": 5.2963,
14716
+ "step": 8404
14717
+ },
14718
+ {
14719
+ "epoch": 0.82,
14720
+ "grad_norm": 1.0468578338623047,
14721
+ "learning_rate": 9.24471592010859e-06,
14722
+ "loss": 5.334,
14723
+ "step": 8408
14724
+ },
14725
+ {
14726
+ "epoch": 0.82,
14727
+ "grad_norm": 1.0247350931167603,
14728
+ "learning_rate": 9.225324801241032e-06,
14729
+ "loss": 5.3307,
14730
+ "step": 8412
14731
+ },
14732
+ {
14733
+ "epoch": 0.82,
14734
+ "grad_norm": 1.021700143814087,
14735
+ "learning_rate": 9.205933682373472e-06,
14736
+ "loss": 5.3315,
14737
+ "step": 8416
14738
+ },
14739
+ {
14740
+ "epoch": 0.82,
14741
+ "grad_norm": 1.0852890014648438,
14742
+ "learning_rate": 9.186542563505915e-06,
14743
+ "loss": 5.23,
14744
+ "step": 8420
14745
+ },
14746
+ {
14747
+ "epoch": 0.82,
14748
+ "grad_norm": 1.0435699224472046,
14749
+ "learning_rate": 9.167151444638355e-06,
14750
+ "loss": 5.3353,
14751
+ "step": 8424
14752
+ },
14753
+ {
14754
+ "epoch": 0.82,
14755
+ "grad_norm": 1.0124831199645996,
14756
+ "learning_rate": 9.147760325770797e-06,
14757
+ "loss": 5.3257,
14758
+ "step": 8428
14759
+ },
14760
+ {
14761
+ "epoch": 0.82,
14762
+ "grad_norm": 1.0061957836151123,
14763
+ "learning_rate": 9.128369206903238e-06,
14764
+ "loss": 5.3437,
14765
+ "step": 8432
14766
+ },
14767
+ {
14768
+ "epoch": 0.82,
14769
+ "grad_norm": 1.0675718784332275,
14770
+ "learning_rate": 9.10897808803568e-06,
14771
+ "loss": 5.3543,
14772
+ "step": 8436
14773
+ },
14774
+ {
14775
+ "epoch": 0.82,
14776
+ "grad_norm": 1.0940582752227783,
14777
+ "learning_rate": 9.089586969168122e-06,
14778
+ "loss": 5.3205,
14779
+ "step": 8440
14780
+ },
14781
+ {
14782
+ "epoch": 0.82,
14783
+ "grad_norm": 1.0253922939300537,
14784
+ "learning_rate": 9.070195850300563e-06,
14785
+ "loss": 5.2836,
14786
+ "step": 8444
14787
+ },
14788
+ {
14789
+ "epoch": 0.82,
14790
+ "grad_norm": 1.0632801055908203,
14791
+ "learning_rate": 9.050804731433005e-06,
14792
+ "loss": 5.3634,
14793
+ "step": 8448
14794
+ },
14795
+ {
14796
+ "epoch": 0.82,
14797
+ "grad_norm": 1.151405692100525,
14798
+ "learning_rate": 9.031413612565446e-06,
14799
+ "loss": 5.266,
14800
+ "step": 8452
14801
+ },
14802
+ {
14803
+ "epoch": 0.82,
14804
+ "grad_norm": 1.1215803623199463,
14805
+ "learning_rate": 9.012022493697888e-06,
14806
+ "loss": 5.3605,
14807
+ "step": 8456
14808
+ },
14809
+ {
14810
+ "epoch": 0.82,
14811
+ "grad_norm": 1.0406687259674072,
14812
+ "learning_rate": 8.992631374830328e-06,
14813
+ "loss": 5.3037,
14814
+ "step": 8460
14815
+ },
14816
+ {
14817
+ "epoch": 0.82,
14818
+ "grad_norm": 1.0305143594741821,
14819
+ "learning_rate": 8.97324025596277e-06,
14820
+ "loss": 5.2419,
14821
+ "step": 8464
14822
+ },
14823
+ {
14824
+ "epoch": 0.82,
14825
+ "grad_norm": 1.0159248113632202,
14826
+ "learning_rate": 8.953849137095211e-06,
14827
+ "loss": 5.2576,
14828
+ "step": 8468
14829
+ },
14830
+ {
14831
+ "epoch": 0.82,
14832
+ "grad_norm": 1.0444971323013306,
14833
+ "learning_rate": 8.934458018227652e-06,
14834
+ "loss": 5.2461,
14835
+ "step": 8472
14836
+ },
14837
+ {
14838
+ "epoch": 0.82,
14839
+ "grad_norm": 1.062738299369812,
14840
+ "learning_rate": 8.915066899360092e-06,
14841
+ "loss": 5.2934,
14842
+ "step": 8476
14843
+ },
14844
+ {
14845
+ "epoch": 0.82,
14846
+ "grad_norm": 1.0445396900177002,
14847
+ "learning_rate": 8.895675780492534e-06,
14848
+ "loss": 5.3011,
14849
+ "step": 8480
14850
+ },
14851
+ {
14852
+ "epoch": 0.82,
14853
+ "grad_norm": 1.1299471855163574,
14854
+ "learning_rate": 8.876284661624975e-06,
14855
+ "loss": 5.1977,
14856
+ "step": 8484
14857
+ },
14858
+ {
14859
+ "epoch": 0.82,
14860
+ "grad_norm": 1.0241918563842773,
14861
+ "learning_rate": 8.856893542757417e-06,
14862
+ "loss": 5.2821,
14863
+ "step": 8488
14864
+ },
14865
+ {
14866
+ "epoch": 0.82,
14867
+ "grad_norm": 1.0978903770446777,
14868
+ "learning_rate": 8.837502423889858e-06,
14869
+ "loss": 5.3411,
14870
+ "step": 8492
14871
+ },
14872
+ {
14873
+ "epoch": 0.82,
14874
+ "grad_norm": 1.0630090236663818,
14875
+ "learning_rate": 8.8181113050223e-06,
14876
+ "loss": 5.2487,
14877
+ "step": 8496
14878
+ },
14879
+ {
14880
+ "epoch": 0.82,
14881
+ "grad_norm": 1.0468335151672363,
14882
+ "learning_rate": 8.798720186154742e-06,
14883
+ "loss": 5.2238,
14884
+ "step": 8500
14885
+ },
14886
+ {
14887
+ "epoch": 0.82,
14888
+ "grad_norm": 1.0701797008514404,
14889
+ "learning_rate": 8.779329067287183e-06,
14890
+ "loss": 5.2461,
14891
+ "step": 8504
14892
+ },
14893
+ {
14894
+ "epoch": 0.82,
14895
+ "grad_norm": 1.0644505023956299,
14896
+ "learning_rate": 8.759937948419625e-06,
14897
+ "loss": 5.2462,
14898
+ "step": 8508
14899
+ },
14900
+ {
14901
+ "epoch": 0.83,
14902
+ "grad_norm": 1.0857114791870117,
14903
+ "learning_rate": 8.740546829552066e-06,
14904
+ "loss": 5.3715,
14905
+ "step": 8512
14906
+ },
14907
+ {
14908
+ "epoch": 0.83,
14909
+ "grad_norm": 1.0911511182785034,
14910
+ "learning_rate": 8.721155710684508e-06,
14911
+ "loss": 5.3341,
14912
+ "step": 8516
14913
+ },
14914
+ {
14915
+ "epoch": 0.83,
14916
+ "grad_norm": 1.066658854484558,
14917
+ "learning_rate": 8.701764591816948e-06,
14918
+ "loss": 5.2962,
14919
+ "step": 8520
14920
+ },
14921
+ {
14922
+ "epoch": 0.83,
14923
+ "grad_norm": 1.0484204292297363,
14924
+ "learning_rate": 8.68237347294939e-06,
14925
+ "loss": 5.2937,
14926
+ "step": 8524
14927
+ },
14928
+ {
14929
+ "epoch": 0.83,
14930
+ "grad_norm": 1.1011348962783813,
14931
+ "learning_rate": 8.662982354081831e-06,
14932
+ "loss": 5.3372,
14933
+ "step": 8528
14934
+ },
14935
+ {
14936
+ "epoch": 0.83,
14937
+ "grad_norm": 1.1982569694519043,
14938
+ "learning_rate": 8.643591235214273e-06,
14939
+ "loss": 5.1987,
14940
+ "step": 8532
14941
+ },
14942
+ {
14943
+ "epoch": 0.83,
14944
+ "grad_norm": 1.1187928915023804,
14945
+ "learning_rate": 8.624200116346714e-06,
14946
+ "loss": 5.3326,
14947
+ "step": 8536
14948
+ },
14949
+ {
14950
+ "epoch": 0.83,
14951
+ "grad_norm": 1.0191898345947266,
14952
+ "learning_rate": 8.604808997479154e-06,
14953
+ "loss": 5.384,
14954
+ "step": 8540
14955
+ },
14956
+ {
14957
+ "epoch": 0.83,
14958
+ "grad_norm": 1.1834492683410645,
14959
+ "learning_rate": 8.585417878611597e-06,
14960
+ "loss": 5.3774,
14961
+ "step": 8544
14962
+ },
14963
+ {
14964
+ "epoch": 0.83,
14965
+ "grad_norm": 1.0459861755371094,
14966
+ "learning_rate": 8.566026759744037e-06,
14967
+ "loss": 5.3309,
14968
+ "step": 8548
14969
+ },
14970
+ {
14971
+ "epoch": 0.83,
14972
+ "grad_norm": 1.019656777381897,
14973
+ "learning_rate": 8.546635640876478e-06,
14974
+ "loss": 5.3024,
14975
+ "step": 8552
14976
+ },
14977
+ {
14978
+ "epoch": 0.83,
14979
+ "grad_norm": 1.1104713678359985,
14980
+ "learning_rate": 8.52724452200892e-06,
14981
+ "loss": 5.2309,
14982
+ "step": 8556
14983
+ },
14984
+ {
14985
+ "epoch": 0.83,
14986
+ "grad_norm": 1.0480828285217285,
14987
+ "learning_rate": 8.507853403141362e-06,
14988
+ "loss": 5.256,
14989
+ "step": 8560
14990
+ },
14991
+ {
14992
+ "epoch": 0.83,
14993
+ "grad_norm": 1.0698785781860352,
14994
+ "learning_rate": 8.488462284273803e-06,
14995
+ "loss": 5.2322,
14996
+ "step": 8564
14997
+ },
14998
+ {
14999
+ "epoch": 0.83,
15000
+ "grad_norm": 1.0998084545135498,
15001
+ "learning_rate": 8.469071165406245e-06,
15002
+ "loss": 5.2919,
15003
+ "step": 8568
15004
+ },
15005
+ {
15006
+ "epoch": 0.83,
15007
+ "grad_norm": 1.0241094827651978,
15008
+ "learning_rate": 8.449680046538685e-06,
15009
+ "loss": 5.3195,
15010
+ "step": 8572
15011
+ },
15012
+ {
15013
+ "epoch": 0.83,
15014
+ "grad_norm": 1.1492643356323242,
15015
+ "learning_rate": 8.430288927671128e-06,
15016
+ "loss": 5.2311,
15017
+ "step": 8576
15018
+ },
15019
+ {
15020
+ "epoch": 0.83,
15021
+ "grad_norm": 1.1048632860183716,
15022
+ "learning_rate": 8.410897808803568e-06,
15023
+ "loss": 5.3227,
15024
+ "step": 8580
15025
+ },
15026
+ {
15027
+ "epoch": 0.83,
15028
+ "grad_norm": 1.0702450275421143,
15029
+ "learning_rate": 8.39150668993601e-06,
15030
+ "loss": 5.3173,
15031
+ "step": 8584
15032
+ },
15033
+ {
15034
+ "epoch": 0.83,
15035
+ "grad_norm": 1.0409200191497803,
15036
+ "learning_rate": 8.372115571068451e-06,
15037
+ "loss": 5.3185,
15038
+ "step": 8588
15039
+ },
15040
+ {
15041
+ "epoch": 0.83,
15042
+ "grad_norm": 1.046713948249817,
15043
+ "learning_rate": 8.352724452200893e-06,
15044
+ "loss": 5.3996,
15045
+ "step": 8592
15046
+ },
15047
+ {
15048
+ "epoch": 0.83,
15049
+ "grad_norm": 1.039919376373291,
15050
+ "learning_rate": 8.333333333333334e-06,
15051
+ "loss": 5.2483,
15052
+ "step": 8596
15053
+ },
15054
+ {
15055
+ "epoch": 0.83,
15056
+ "grad_norm": 1.0520331859588623,
15057
+ "learning_rate": 8.313942214465776e-06,
15058
+ "loss": 5.3082,
15059
+ "step": 8600
15060
+ },
15061
+ {
15062
+ "epoch": 0.83,
15063
+ "grad_norm": 1.0794312953948975,
15064
+ "learning_rate": 8.294551095598217e-06,
15065
+ "loss": 5.382,
15066
+ "step": 8604
15067
+ },
15068
+ {
15069
+ "epoch": 0.83,
15070
+ "grad_norm": 1.0222445726394653,
15071
+ "learning_rate": 8.275159976730657e-06,
15072
+ "loss": 5.2186,
15073
+ "step": 8608
15074
+ },
15075
+ {
15076
+ "epoch": 0.83,
15077
+ "grad_norm": 1.043550729751587,
15078
+ "learning_rate": 8.2557688578631e-06,
15079
+ "loss": 5.2447,
15080
+ "step": 8612
15081
+ },
15082
+ {
15083
+ "epoch": 0.84,
15084
+ "grad_norm": 1.065027117729187,
15085
+ "learning_rate": 8.23637773899554e-06,
15086
+ "loss": 5.3149,
15087
+ "step": 8616
15088
+ },
15089
+ {
15090
+ "epoch": 0.84,
15091
+ "grad_norm": 1.089449405670166,
15092
+ "learning_rate": 8.216986620127982e-06,
15093
+ "loss": 5.231,
15094
+ "step": 8620
15095
+ },
15096
+ {
15097
+ "epoch": 0.84,
15098
+ "grad_norm": 1.0330005884170532,
15099
+ "learning_rate": 8.197595501260423e-06,
15100
+ "loss": 5.3099,
15101
+ "step": 8624
15102
+ },
15103
+ {
15104
+ "epoch": 0.84,
15105
+ "grad_norm": 1.088131070137024,
15106
+ "learning_rate": 8.178204382392865e-06,
15107
+ "loss": 5.3941,
15108
+ "step": 8628
15109
+ },
15110
+ {
15111
+ "epoch": 0.84,
15112
+ "grad_norm": 1.0130773782730103,
15113
+ "learning_rate": 8.158813263525305e-06,
15114
+ "loss": 5.2442,
15115
+ "step": 8632
15116
+ },
15117
+ {
15118
+ "epoch": 0.84,
15119
+ "grad_norm": 1.035882592201233,
15120
+ "learning_rate": 8.139422144657748e-06,
15121
+ "loss": 5.21,
15122
+ "step": 8636
15123
+ },
15124
+ {
15125
+ "epoch": 0.84,
15126
+ "grad_norm": 1.0550565719604492,
15127
+ "learning_rate": 8.120031025790188e-06,
15128
+ "loss": 5.2888,
15129
+ "step": 8640
15130
+ },
15131
+ {
15132
+ "epoch": 0.84,
15133
+ "grad_norm": 1.1436634063720703,
15134
+ "learning_rate": 8.10063990692263e-06,
15135
+ "loss": 5.3587,
15136
+ "step": 8644
15137
+ },
15138
+ {
15139
+ "epoch": 0.84,
15140
+ "grad_norm": 1.1211497783660889,
15141
+ "learning_rate": 8.081248788055071e-06,
15142
+ "loss": 5.3188,
15143
+ "step": 8648
15144
+ },
15145
+ {
15146
+ "epoch": 0.84,
15147
+ "grad_norm": 1.1205918788909912,
15148
+ "learning_rate": 8.061857669187513e-06,
15149
+ "loss": 5.2688,
15150
+ "step": 8652
15151
+ },
15152
+ {
15153
+ "epoch": 0.84,
15154
+ "grad_norm": 1.0907244682312012,
15155
+ "learning_rate": 8.042466550319954e-06,
15156
+ "loss": 5.2518,
15157
+ "step": 8656
15158
+ },
15159
+ {
15160
+ "epoch": 0.84,
15161
+ "grad_norm": 1.0856692790985107,
15162
+ "learning_rate": 8.023075431452396e-06,
15163
+ "loss": 5.3384,
15164
+ "step": 8660
15165
+ },
15166
+ {
15167
+ "epoch": 0.84,
15168
+ "grad_norm": 1.0303173065185547,
15169
+ "learning_rate": 8.003684312584836e-06,
15170
+ "loss": 5.2906,
15171
+ "step": 8664
15172
+ },
15173
+ {
15174
+ "epoch": 0.84,
15175
+ "grad_norm": 1.0908282995224,
15176
+ "learning_rate": 7.984293193717279e-06,
15177
+ "loss": 5.3331,
15178
+ "step": 8668
15179
+ },
15180
+ {
15181
+ "epoch": 0.84,
15182
+ "grad_norm": 1.088040828704834,
15183
+ "learning_rate": 7.96490207484972e-06,
15184
+ "loss": 5.3521,
15185
+ "step": 8672
15186
+ },
15187
+ {
15188
+ "epoch": 0.84,
15189
+ "grad_norm": 1.0086363554000854,
15190
+ "learning_rate": 7.94551095598216e-06,
15191
+ "loss": 5.3294,
15192
+ "step": 8676
15193
+ },
15194
+ {
15195
+ "epoch": 0.84,
15196
+ "grad_norm": 1.1544169187545776,
15197
+ "learning_rate": 7.926119837114602e-06,
15198
+ "loss": 5.3184,
15199
+ "step": 8680
15200
+ },
15201
+ {
15202
+ "epoch": 0.84,
15203
+ "grad_norm": 1.0530931949615479,
15204
+ "learning_rate": 7.906728718247042e-06,
15205
+ "loss": 5.2866,
15206
+ "step": 8684
15207
+ },
15208
+ {
15209
+ "epoch": 0.84,
15210
+ "grad_norm": 1.080121397972107,
15211
+ "learning_rate": 7.887337599379485e-06,
15212
+ "loss": 5.2793,
15213
+ "step": 8688
15214
+ },
15215
+ {
15216
+ "epoch": 0.84,
15217
+ "grad_norm": 1.0175666809082031,
15218
+ "learning_rate": 7.867946480511925e-06,
15219
+ "loss": 5.3282,
15220
+ "step": 8692
15221
+ },
15222
+ {
15223
+ "epoch": 0.84,
15224
+ "grad_norm": 1.0283890962600708,
15225
+ "learning_rate": 7.848555361644367e-06,
15226
+ "loss": 5.3439,
15227
+ "step": 8696
15228
+ },
15229
+ {
15230
+ "epoch": 0.84,
15231
+ "grad_norm": 1.0480095148086548,
15232
+ "learning_rate": 7.829164242776808e-06,
15233
+ "loss": 5.3042,
15234
+ "step": 8700
15235
+ },
15236
+ {
15237
+ "epoch": 0.84,
15238
+ "grad_norm": 0.9947773814201355,
15239
+ "learning_rate": 7.80977312390925e-06,
15240
+ "loss": 5.251,
15241
+ "step": 8704
15242
+ },
15243
+ {
15244
+ "epoch": 0.84,
15245
+ "grad_norm": 1.033823847770691,
15246
+ "learning_rate": 7.79038200504169e-06,
15247
+ "loss": 5.4016,
15248
+ "step": 8708
15249
+ },
15250
+ {
15251
+ "epoch": 0.84,
15252
+ "grad_norm": 1.129824161529541,
15253
+ "learning_rate": 7.770990886174133e-06,
15254
+ "loss": 5.2176,
15255
+ "step": 8712
15256
+ },
15257
+ {
15258
+ "epoch": 0.85,
15259
+ "grad_norm": 1.0202304124832153,
15260
+ "learning_rate": 7.751599767306574e-06,
15261
+ "loss": 5.3219,
15262
+ "step": 8716
15263
+ },
15264
+ {
15265
+ "epoch": 0.85,
15266
+ "grad_norm": 1.0748639106750488,
15267
+ "learning_rate": 7.732208648439016e-06,
15268
+ "loss": 5.2945,
15269
+ "step": 8720
15270
+ },
15271
+ {
15272
+ "epoch": 0.85,
15273
+ "grad_norm": 1.0026463270187378,
15274
+ "learning_rate": 7.712817529571456e-06,
15275
+ "loss": 5.3474,
15276
+ "step": 8724
15277
+ },
15278
+ {
15279
+ "epoch": 0.85,
15280
+ "grad_norm": 0.98891282081604,
15281
+ "learning_rate": 7.693426410703899e-06,
15282
+ "loss": 5.2612,
15283
+ "step": 8728
15284
+ },
15285
+ {
15286
+ "epoch": 0.85,
15287
+ "grad_norm": 1.079750418663025,
15288
+ "learning_rate": 7.674035291836339e-06,
15289
+ "loss": 5.2398,
15290
+ "step": 8732
15291
+ },
15292
+ {
15293
+ "epoch": 0.85,
15294
+ "grad_norm": 1.057255744934082,
15295
+ "learning_rate": 7.654644172968781e-06,
15296
+ "loss": 5.4364,
15297
+ "step": 8736
15298
+ },
15299
+ {
15300
+ "epoch": 0.85,
15301
+ "grad_norm": 1.0570470094680786,
15302
+ "learning_rate": 7.635253054101222e-06,
15303
+ "loss": 5.2695,
15304
+ "step": 8740
15305
+ },
15306
+ {
15307
+ "epoch": 0.85,
15308
+ "grad_norm": 1.0664699077606201,
15309
+ "learning_rate": 7.615861935233663e-06,
15310
+ "loss": 5.3378,
15311
+ "step": 8744
15312
+ },
15313
+ {
15314
+ "epoch": 0.85,
15315
+ "grad_norm": 1.0890289545059204,
15316
+ "learning_rate": 7.5964708163661055e-06,
15317
+ "loss": 5.375,
15318
+ "step": 8748
15319
+ },
15320
+ {
15321
+ "epoch": 0.85,
15322
+ "grad_norm": 1.0607807636260986,
15323
+ "learning_rate": 7.577079697498546e-06,
15324
+ "loss": 5.3258,
15325
+ "step": 8752
15326
+ },
15327
+ {
15328
+ "epoch": 0.85,
15329
+ "grad_norm": 1.0911225080490112,
15330
+ "learning_rate": 7.557688578630988e-06,
15331
+ "loss": 5.242,
15332
+ "step": 8756
15333
+ },
15334
+ {
15335
+ "epoch": 0.85,
15336
+ "grad_norm": 1.1005817651748657,
15337
+ "learning_rate": 7.538297459763429e-06,
15338
+ "loss": 5.2734,
15339
+ "step": 8760
15340
+ },
15341
+ {
15342
+ "epoch": 0.85,
15343
+ "grad_norm": 1.0650907754898071,
15344
+ "learning_rate": 7.51890634089587e-06,
15345
+ "loss": 5.2939,
15346
+ "step": 8764
15347
+ },
15348
+ {
15349
+ "epoch": 0.85,
15350
+ "grad_norm": 1.0343334674835205,
15351
+ "learning_rate": 7.499515222028311e-06,
15352
+ "loss": 5.2601,
15353
+ "step": 8768
15354
+ },
15355
+ {
15356
+ "epoch": 0.85,
15357
+ "grad_norm": 1.092239260673523,
15358
+ "learning_rate": 7.480124103160753e-06,
15359
+ "loss": 5.266,
15360
+ "step": 8772
15361
+ },
15362
+ {
15363
+ "epoch": 0.85,
15364
+ "grad_norm": 1.140648603439331,
15365
+ "learning_rate": 7.4607329842931935e-06,
15366
+ "loss": 5.2353,
15367
+ "step": 8776
15368
+ },
15369
+ {
15370
+ "epoch": 0.85,
15371
+ "grad_norm": 1.0743423700332642,
15372
+ "learning_rate": 7.441341865425636e-06,
15373
+ "loss": 5.389,
15374
+ "step": 8780
15375
+ },
15376
+ {
15377
+ "epoch": 0.85,
15378
+ "grad_norm": 1.0870285034179688,
15379
+ "learning_rate": 7.421950746558076e-06,
15380
+ "loss": 5.3212,
15381
+ "step": 8784
15382
+ },
15383
+ {
15384
+ "epoch": 0.85,
15385
+ "grad_norm": 1.0318245887756348,
15386
+ "learning_rate": 7.4025596276905185e-06,
15387
+ "loss": 5.2854,
15388
+ "step": 8788
15389
+ },
15390
+ {
15391
+ "epoch": 0.85,
15392
+ "grad_norm": 1.0597593784332275,
15393
+ "learning_rate": 7.383168508822959e-06,
15394
+ "loss": 5.4204,
15395
+ "step": 8792
15396
+ },
15397
+ {
15398
+ "epoch": 0.85,
15399
+ "grad_norm": 1.0621132850646973,
15400
+ "learning_rate": 7.3637773899554e-06,
15401
+ "loss": 5.2903,
15402
+ "step": 8796
15403
+ },
15404
+ {
15405
+ "epoch": 0.85,
15406
+ "grad_norm": 1.057024598121643,
15407
+ "learning_rate": 7.344386271087843e-06,
15408
+ "loss": 5.3975,
15409
+ "step": 8800
15410
+ },
15411
+ {
15412
+ "epoch": 0.85,
15413
+ "grad_norm": 1.1537240743637085,
15414
+ "learning_rate": 7.324995152220283e-06,
15415
+ "loss": 5.3118,
15416
+ "step": 8804
15417
+ },
15418
+ {
15419
+ "epoch": 0.85,
15420
+ "grad_norm": 1.161657691001892,
15421
+ "learning_rate": 7.305604033352725e-06,
15422
+ "loss": 5.3516,
15423
+ "step": 8808
15424
+ },
15425
+ {
15426
+ "epoch": 0.85,
15427
+ "grad_norm": 1.0824769735336304,
15428
+ "learning_rate": 7.286212914485166e-06,
15429
+ "loss": 5.3601,
15430
+ "step": 8812
15431
+ },
15432
+ {
15433
+ "epoch": 0.85,
15434
+ "grad_norm": 1.0471476316452026,
15435
+ "learning_rate": 7.266821795617608e-06,
15436
+ "loss": 5.2858,
15437
+ "step": 8816
15438
+ },
15439
+ {
15440
+ "epoch": 0.86,
15441
+ "grad_norm": 1.0438990592956543,
15442
+ "learning_rate": 7.247430676750049e-06,
15443
+ "loss": 5.2688,
15444
+ "step": 8820
15445
+ },
15446
+ {
15447
+ "epoch": 0.86,
15448
+ "grad_norm": 1.0383694171905518,
15449
+ "learning_rate": 7.228039557882491e-06,
15450
+ "loss": 5.3404,
15451
+ "step": 8824
15452
+ },
15453
+ {
15454
+ "epoch": 0.86,
15455
+ "grad_norm": 1.039699673652649,
15456
+ "learning_rate": 7.2086484390149315e-06,
15457
+ "loss": 5.2846,
15458
+ "step": 8828
15459
+ },
15460
+ {
15461
+ "epoch": 0.86,
15462
+ "grad_norm": 1.1256271600723267,
15463
+ "learning_rate": 7.189257320147373e-06,
15464
+ "loss": 5.327,
15465
+ "step": 8832
15466
+ },
15467
+ {
15468
+ "epoch": 0.86,
15469
+ "grad_norm": 0.9789720177650452,
15470
+ "learning_rate": 7.169866201279813e-06,
15471
+ "loss": 5.3369,
15472
+ "step": 8836
15473
+ },
15474
+ {
15475
+ "epoch": 0.86,
15476
+ "grad_norm": 1.0387988090515137,
15477
+ "learning_rate": 7.150475082412256e-06,
15478
+ "loss": 5.3402,
15479
+ "step": 8840
15480
+ },
15481
+ {
15482
+ "epoch": 0.86,
15483
+ "grad_norm": 1.0737075805664062,
15484
+ "learning_rate": 7.131083963544696e-06,
15485
+ "loss": 5.2837,
15486
+ "step": 8844
15487
+ },
15488
+ {
15489
+ "epoch": 0.86,
15490
+ "grad_norm": 1.1215327978134155,
15491
+ "learning_rate": 7.111692844677138e-06,
15492
+ "loss": 5.2237,
15493
+ "step": 8848
15494
+ },
15495
+ {
15496
+ "epoch": 0.86,
15497
+ "grad_norm": 1.0533177852630615,
15498
+ "learning_rate": 7.092301725809579e-06,
15499
+ "loss": 5.3783,
15500
+ "step": 8852
15501
+ },
15502
+ {
15503
+ "epoch": 0.86,
15504
+ "grad_norm": 1.1658439636230469,
15505
+ "learning_rate": 7.072910606942021e-06,
15506
+ "loss": 5.3175,
15507
+ "step": 8856
15508
+ },
15509
+ {
15510
+ "epoch": 0.86,
15511
+ "grad_norm": 1.0966906547546387,
15512
+ "learning_rate": 7.0535194880744625e-06,
15513
+ "loss": 5.4216,
15514
+ "step": 8860
15515
+ },
15516
+ {
15517
+ "epoch": 0.86,
15518
+ "grad_norm": 1.0952768325805664,
15519
+ "learning_rate": 7.034128369206904e-06,
15520
+ "loss": 5.2748,
15521
+ "step": 8864
15522
+ },
15523
+ {
15524
+ "epoch": 0.86,
15525
+ "grad_norm": 1.096529483795166,
15526
+ "learning_rate": 7.014737250339345e-06,
15527
+ "loss": 5.1666,
15528
+ "step": 8868
15529
+ },
15530
+ {
15531
+ "epoch": 0.86,
15532
+ "grad_norm": 1.0736936330795288,
15533
+ "learning_rate": 6.995346131471786e-06,
15534
+ "loss": 5.24,
15535
+ "step": 8872
15536
+ },
15537
+ {
15538
+ "epoch": 0.86,
15539
+ "grad_norm": 1.0133376121520996,
15540
+ "learning_rate": 6.975955012604228e-06,
15541
+ "loss": 5.2827,
15542
+ "step": 8876
15543
+ },
15544
+ {
15545
+ "epoch": 0.86,
15546
+ "grad_norm": 1.0580708980560303,
15547
+ "learning_rate": 6.956563893736669e-06,
15548
+ "loss": 5.3801,
15549
+ "step": 8880
15550
+ },
15551
+ {
15552
+ "epoch": 0.86,
15553
+ "grad_norm": 1.1220327615737915,
15554
+ "learning_rate": 6.937172774869111e-06,
15555
+ "loss": 5.2949,
15556
+ "step": 8884
15557
+ },
15558
+ {
15559
+ "epoch": 0.86,
15560
+ "grad_norm": 1.136806845664978,
15561
+ "learning_rate": 6.917781656001551e-06,
15562
+ "loss": 5.347,
15563
+ "step": 8888
15564
+ },
15565
+ {
15566
+ "epoch": 0.86,
15567
+ "grad_norm": 1.1147714853286743,
15568
+ "learning_rate": 6.898390537133994e-06,
15569
+ "loss": 5.2648,
15570
+ "step": 8892
15571
+ },
15572
+ {
15573
+ "epoch": 0.86,
15574
+ "grad_norm": 1.0525692701339722,
15575
+ "learning_rate": 6.878999418266434e-06,
15576
+ "loss": 5.2665,
15577
+ "step": 8896
15578
+ },
15579
+ {
15580
+ "epoch": 0.86,
15581
+ "grad_norm": 1.0400636196136475,
15582
+ "learning_rate": 6.8596082993988755e-06,
15583
+ "loss": 5.3571,
15584
+ "step": 8900
15585
+ },
15586
+ {
15587
+ "epoch": 0.86,
15588
+ "grad_norm": 1.0699836015701294,
15589
+ "learning_rate": 6.840217180531316e-06,
15590
+ "loss": 5.3079,
15591
+ "step": 8904
15592
+ },
15593
+ {
15594
+ "epoch": 0.86,
15595
+ "grad_norm": 1.0171644687652588,
15596
+ "learning_rate": 6.820826061663758e-06,
15597
+ "loss": 5.3438,
15598
+ "step": 8908
15599
+ },
15600
+ {
15601
+ "epoch": 0.86,
15602
+ "grad_norm": 1.0426756143569946,
15603
+ "learning_rate": 6.801434942796199e-06,
15604
+ "loss": 5.417,
15605
+ "step": 8912
15606
+ },
15607
+ {
15608
+ "epoch": 0.86,
15609
+ "grad_norm": 1.1138461828231812,
15610
+ "learning_rate": 6.782043823928641e-06,
15611
+ "loss": 5.2769,
15612
+ "step": 8916
15613
+ },
15614
+ {
15615
+ "epoch": 0.86,
15616
+ "grad_norm": 1.0219694375991821,
15617
+ "learning_rate": 6.762652705061083e-06,
15618
+ "loss": 5.2811,
15619
+ "step": 8920
15620
+ },
15621
+ {
15622
+ "epoch": 0.87,
15623
+ "grad_norm": 1.1686511039733887,
15624
+ "learning_rate": 6.743261586193524e-06,
15625
+ "loss": 5.3218,
15626
+ "step": 8924
15627
+ },
15628
+ {
15629
+ "epoch": 0.87,
15630
+ "grad_norm": 1.0407147407531738,
15631
+ "learning_rate": 6.723870467325965e-06,
15632
+ "loss": 5.339,
15633
+ "step": 8928
15634
+ },
15635
+ {
15636
+ "epoch": 0.87,
15637
+ "grad_norm": 1.1056681871414185,
15638
+ "learning_rate": 6.704479348458407e-06,
15639
+ "loss": 5.2758,
15640
+ "step": 8932
15641
+ },
15642
+ {
15643
+ "epoch": 0.87,
15644
+ "grad_norm": 1.0969740152359009,
15645
+ "learning_rate": 6.685088229590848e-06,
15646
+ "loss": 5.2432,
15647
+ "step": 8936
15648
+ },
15649
+ {
15650
+ "epoch": 0.87,
15651
+ "grad_norm": 0.9841113090515137,
15652
+ "learning_rate": 6.6656971107232885e-06,
15653
+ "loss": 5.2596,
15654
+ "step": 8940
15655
+ },
15656
+ {
15657
+ "epoch": 0.87,
15658
+ "grad_norm": 1.1172292232513428,
15659
+ "learning_rate": 6.646305991855731e-06,
15660
+ "loss": 5.2884,
15661
+ "step": 8944
15662
+ },
15663
+ {
15664
+ "epoch": 0.87,
15665
+ "grad_norm": 0.9936596155166626,
15666
+ "learning_rate": 6.626914872988171e-06,
15667
+ "loss": 5.2968,
15668
+ "step": 8948
15669
+ },
15670
+ {
15671
+ "epoch": 0.87,
15672
+ "grad_norm": 1.0389301776885986,
15673
+ "learning_rate": 6.6075237541206135e-06,
15674
+ "loss": 5.2827,
15675
+ "step": 8952
15676
+ },
15677
+ {
15678
+ "epoch": 0.87,
15679
+ "grad_norm": 1.020494818687439,
15680
+ "learning_rate": 6.588132635253054e-06,
15681
+ "loss": 5.3882,
15682
+ "step": 8956
15683
+ },
15684
+ {
15685
+ "epoch": 0.87,
15686
+ "grad_norm": 1.0391160249710083,
15687
+ "learning_rate": 6.568741516385496e-06,
15688
+ "loss": 5.2172,
15689
+ "step": 8960
15690
+ },
15691
+ {
15692
+ "epoch": 0.87,
15693
+ "grad_norm": 1.0213825702667236,
15694
+ "learning_rate": 6.549350397517937e-06,
15695
+ "loss": 5.3307,
15696
+ "step": 8964
15697
+ },
15698
+ {
15699
+ "epoch": 0.87,
15700
+ "grad_norm": 1.0745649337768555,
15701
+ "learning_rate": 6.529959278650378e-06,
15702
+ "loss": 5.2638,
15703
+ "step": 8968
15704
+ },
15705
+ {
15706
+ "epoch": 0.87,
15707
+ "grad_norm": 1.0567609071731567,
15708
+ "learning_rate": 6.510568159782819e-06,
15709
+ "loss": 5.3164,
15710
+ "step": 8972
15711
+ },
15712
+ {
15713
+ "epoch": 0.87,
15714
+ "grad_norm": 1.0450811386108398,
15715
+ "learning_rate": 6.491177040915261e-06,
15716
+ "loss": 5.3648,
15717
+ "step": 8976
15718
+ },
15719
+ {
15720
+ "epoch": 0.87,
15721
+ "grad_norm": 1.0880790948867798,
15722
+ "learning_rate": 6.471785922047703e-06,
15723
+ "loss": 5.3617,
15724
+ "step": 8980
15725
+ },
15726
+ {
15727
+ "epoch": 0.87,
15728
+ "grad_norm": 1.0606417655944824,
15729
+ "learning_rate": 6.452394803180144e-06,
15730
+ "loss": 5.2201,
15731
+ "step": 8984
15732
+ },
15733
+ {
15734
+ "epoch": 0.87,
15735
+ "grad_norm": 1.0124664306640625,
15736
+ "learning_rate": 6.433003684312586e-06,
15737
+ "loss": 5.2499,
15738
+ "step": 8988
15739
+ },
15740
+ {
15741
+ "epoch": 0.87,
15742
+ "grad_norm": 1.1352604627609253,
15743
+ "learning_rate": 6.4136125654450265e-06,
15744
+ "loss": 5.3782,
15745
+ "step": 8992
15746
+ },
15747
+ {
15748
+ "epoch": 0.87,
15749
+ "grad_norm": 1.1061619520187378,
15750
+ "learning_rate": 6.394221446577468e-06,
15751
+ "loss": 5.3408,
15752
+ "step": 8996
15753
+ },
15754
+ {
15755
+ "epoch": 0.87,
15756
+ "grad_norm": 1.1135718822479248,
15757
+ "learning_rate": 6.374830327709909e-06,
15758
+ "loss": 5.2323,
15759
+ "step": 9000
15760
+ },
15761
+ {
15762
+ "epoch": 0.87,
15763
+ "grad_norm": 1.0155010223388672,
15764
+ "learning_rate": 6.355439208842351e-06,
15765
+ "loss": 5.2942,
15766
+ "step": 9004
15767
+ },
15768
+ {
15769
+ "epoch": 0.87,
15770
+ "grad_norm": 1.044931173324585,
15771
+ "learning_rate": 6.336048089974791e-06,
15772
+ "loss": 5.3267,
15773
+ "step": 9008
15774
+ },
15775
+ {
15776
+ "epoch": 0.87,
15777
+ "grad_norm": 1.0643398761749268,
15778
+ "learning_rate": 6.316656971107233e-06,
15779
+ "loss": 5.2651,
15780
+ "step": 9012
15781
+ },
15782
+ {
15783
+ "epoch": 0.87,
15784
+ "grad_norm": 1.0203381776809692,
15785
+ "learning_rate": 6.297265852239674e-06,
15786
+ "loss": 5.3493,
15787
+ "step": 9016
15788
+ },
15789
+ {
15790
+ "epoch": 0.87,
15791
+ "grad_norm": 1.0563126802444458,
15792
+ "learning_rate": 6.277874733372116e-06,
15793
+ "loss": 5.362,
15794
+ "step": 9020
15795
+ },
15796
+ {
15797
+ "epoch": 0.87,
15798
+ "grad_norm": 1.0667084455490112,
15799
+ "learning_rate": 6.258483614504557e-06,
15800
+ "loss": 5.2151,
15801
+ "step": 9024
15802
+ },
15803
+ {
15804
+ "epoch": 0.88,
15805
+ "grad_norm": 1.0885251760482788,
15806
+ "learning_rate": 6.239092495636999e-06,
15807
+ "loss": 5.3404,
15808
+ "step": 9028
15809
+ },
15810
+ {
15811
+ "epoch": 0.88,
15812
+ "grad_norm": 1.0210630893707275,
15813
+ "learning_rate": 6.21970137676944e-06,
15814
+ "loss": 5.2937,
15815
+ "step": 9032
15816
+ },
15817
+ {
15818
+ "epoch": 0.88,
15819
+ "grad_norm": 1.1303844451904297,
15820
+ "learning_rate": 6.200310257901881e-06,
15821
+ "loss": 5.3373,
15822
+ "step": 9036
15823
+ },
15824
+ {
15825
+ "epoch": 0.88,
15826
+ "grad_norm": 1.1023499965667725,
15827
+ "learning_rate": 6.180919139034322e-06,
15828
+ "loss": 5.2946,
15829
+ "step": 9040
15830
+ },
15831
+ {
15832
+ "epoch": 0.88,
15833
+ "grad_norm": 1.0469759702682495,
15834
+ "learning_rate": 6.161528020166764e-06,
15835
+ "loss": 5.3265,
15836
+ "step": 9044
15837
+ },
15838
+ {
15839
+ "epoch": 0.88,
15840
+ "grad_norm": 0.9917576313018799,
15841
+ "learning_rate": 6.142136901299205e-06,
15842
+ "loss": 5.2984,
15843
+ "step": 9048
15844
+ },
15845
+ {
15846
+ "epoch": 0.88,
15847
+ "grad_norm": 1.0630229711532593,
15848
+ "learning_rate": 6.122745782431646e-06,
15849
+ "loss": 5.2025,
15850
+ "step": 9052
15851
+ },
15852
+ {
15853
+ "epoch": 0.88,
15854
+ "grad_norm": 1.0385984182357788,
15855
+ "learning_rate": 6.103354663564088e-06,
15856
+ "loss": 5.2934,
15857
+ "step": 9056
15858
+ },
15859
+ {
15860
+ "epoch": 0.88,
15861
+ "grad_norm": 1.0480278730392456,
15862
+ "learning_rate": 6.083963544696529e-06,
15863
+ "loss": 5.2933,
15864
+ "step": 9060
15865
+ },
15866
+ {
15867
+ "epoch": 0.88,
15868
+ "grad_norm": 1.0395824909210205,
15869
+ "learning_rate": 6.0645724258289706e-06,
15870
+ "loss": 5.267,
15871
+ "step": 9064
15872
+ },
15873
+ {
15874
+ "epoch": 0.88,
15875
+ "grad_norm": 1.0996421575546265,
15876
+ "learning_rate": 6.045181306961412e-06,
15877
+ "loss": 5.3497,
15878
+ "step": 9068
15879
+ },
15880
+ {
15881
+ "epoch": 0.88,
15882
+ "grad_norm": 1.0920591354370117,
15883
+ "learning_rate": 6.025790188093853e-06,
15884
+ "loss": 5.2752,
15885
+ "step": 9072
15886
+ },
15887
+ {
15888
+ "epoch": 0.88,
15889
+ "grad_norm": 1.0730946063995361,
15890
+ "learning_rate": 6.006399069226295e-06,
15891
+ "loss": 5.3234,
15892
+ "step": 9076
15893
+ },
15894
+ {
15895
+ "epoch": 0.88,
15896
+ "grad_norm": 1.077646017074585,
15897
+ "learning_rate": 5.987007950358736e-06,
15898
+ "loss": 5.3009,
15899
+ "step": 9080
15900
+ },
15901
+ {
15902
+ "epoch": 0.88,
15903
+ "grad_norm": 1.0901986360549927,
15904
+ "learning_rate": 5.9676168314911775e-06,
15905
+ "loss": 5.2653,
15906
+ "step": 9084
15907
+ },
15908
+ {
15909
+ "epoch": 0.88,
15910
+ "grad_norm": 1.1307499408721924,
15911
+ "learning_rate": 5.948225712623619e-06,
15912
+ "loss": 5.393,
15913
+ "step": 9088
15914
+ },
15915
+ {
15916
+ "epoch": 0.88,
15917
+ "grad_norm": 1.1002899408340454,
15918
+ "learning_rate": 5.92883459375606e-06,
15919
+ "loss": 5.2785,
15920
+ "step": 9092
15921
+ },
15922
+ {
15923
+ "epoch": 0.88,
15924
+ "grad_norm": 1.1348730325698853,
15925
+ "learning_rate": 5.909443474888502e-06,
15926
+ "loss": 5.3749,
15927
+ "step": 9096
15928
+ },
15929
+ {
15930
+ "epoch": 0.88,
15931
+ "grad_norm": 1.2404577732086182,
15932
+ "learning_rate": 5.890052356020943e-06,
15933
+ "loss": 5.2968,
15934
+ "step": 9100
15935
+ },
15936
+ {
15937
+ "epoch": 0.88,
15938
+ "grad_norm": 1.1860145330429077,
15939
+ "learning_rate": 5.8706612371533835e-06,
15940
+ "loss": 5.2951,
15941
+ "step": 9104
15942
+ },
15943
+ {
15944
+ "epoch": 0.88,
15945
+ "grad_norm": 1.0747588872909546,
15946
+ "learning_rate": 5.851270118285825e-06,
15947
+ "loss": 5.2725,
15948
+ "step": 9108
15949
+ },
15950
+ {
15951
+ "epoch": 0.88,
15952
+ "grad_norm": 1.0642809867858887,
15953
+ "learning_rate": 5.831878999418266e-06,
15954
+ "loss": 5.3347,
15955
+ "step": 9112
15956
+ },
15957
+ {
15958
+ "epoch": 0.88,
15959
+ "grad_norm": 1.0361077785491943,
15960
+ "learning_rate": 5.812487880550708e-06,
15961
+ "loss": 5.2899,
15962
+ "step": 9116
15963
+ },
15964
+ {
15965
+ "epoch": 0.88,
15966
+ "grad_norm": 1.027950406074524,
15967
+ "learning_rate": 5.793096761683149e-06,
15968
+ "loss": 5.3016,
15969
+ "step": 9120
15970
+ },
15971
+ {
15972
+ "epoch": 0.88,
15973
+ "grad_norm": 1.0341978073120117,
15974
+ "learning_rate": 5.7737056428155905e-06,
15975
+ "loss": 5.3066,
15976
+ "step": 9124
15977
+ },
15978
+ {
15979
+ "epoch": 0.89,
15980
+ "grad_norm": 1.0980345010757446,
15981
+ "learning_rate": 5.754314523948033e-06,
15982
+ "loss": 5.2685,
15983
+ "step": 9128
15984
+ },
15985
+ {
15986
+ "epoch": 0.89,
15987
+ "grad_norm": 1.0036580562591553,
15988
+ "learning_rate": 5.734923405080474e-06,
15989
+ "loss": 5.2972,
15990
+ "step": 9132
15991
+ },
15992
+ {
15993
+ "epoch": 0.89,
15994
+ "grad_norm": 1.0951625108718872,
15995
+ "learning_rate": 5.715532286212915e-06,
15996
+ "loss": 5.2363,
15997
+ "step": 9136
15998
+ },
15999
+ {
16000
+ "epoch": 0.89,
16001
+ "grad_norm": 1.010358214378357,
16002
+ "learning_rate": 5.696141167345356e-06,
16003
+ "loss": 5.2715,
16004
+ "step": 9140
16005
+ },
16006
+ {
16007
+ "epoch": 0.89,
16008
+ "grad_norm": 1.0721516609191895,
16009
+ "learning_rate": 5.676750048477797e-06,
16010
+ "loss": 5.3006,
16011
+ "step": 9144
16012
+ },
16013
+ {
16014
+ "epoch": 0.89,
16015
+ "grad_norm": 0.9922645688056946,
16016
+ "learning_rate": 5.657358929610239e-06,
16017
+ "loss": 5.2985,
16018
+ "step": 9148
16019
+ },
16020
+ {
16021
+ "epoch": 0.89,
16022
+ "grad_norm": 1.0958447456359863,
16023
+ "learning_rate": 5.63796781074268e-06,
16024
+ "loss": 5.2388,
16025
+ "step": 9152
16026
+ },
16027
+ {
16028
+ "epoch": 0.89,
16029
+ "grad_norm": 0.9977266788482666,
16030
+ "learning_rate": 5.6185766918751215e-06,
16031
+ "loss": 5.4273,
16032
+ "step": 9156
16033
+ },
16034
+ {
16035
+ "epoch": 0.89,
16036
+ "grad_norm": 1.1025915145874023,
16037
+ "learning_rate": 5.599185573007563e-06,
16038
+ "loss": 5.3768,
16039
+ "step": 9160
16040
+ },
16041
+ {
16042
+ "epoch": 0.89,
16043
+ "grad_norm": 1.0521866083145142,
16044
+ "learning_rate": 5.579794454140004e-06,
16045
+ "loss": 5.2555,
16046
+ "step": 9164
16047
+ },
16048
+ {
16049
+ "epoch": 0.89,
16050
+ "grad_norm": 1.0546320676803589,
16051
+ "learning_rate": 5.560403335272446e-06,
16052
+ "loss": 5.259,
16053
+ "step": 9168
16054
+ },
16055
+ {
16056
+ "epoch": 0.89,
16057
+ "grad_norm": 1.084153413772583,
16058
+ "learning_rate": 5.541012216404887e-06,
16059
+ "loss": 5.2733,
16060
+ "step": 9172
16061
+ },
16062
+ {
16063
+ "epoch": 0.89,
16064
+ "grad_norm": 1.1298420429229736,
16065
+ "learning_rate": 5.521621097537328e-06,
16066
+ "loss": 5.2499,
16067
+ "step": 9176
16068
+ },
16069
+ {
16070
+ "epoch": 0.89,
16071
+ "grad_norm": 1.0296047925949097,
16072
+ "learning_rate": 5.502229978669769e-06,
16073
+ "loss": 5.3293,
16074
+ "step": 9180
16075
+ },
16076
+ {
16077
+ "epoch": 0.89,
16078
+ "grad_norm": 1.0231281518936157,
16079
+ "learning_rate": 5.48283885980221e-06,
16080
+ "loss": 5.2614,
16081
+ "step": 9184
16082
+ },
16083
+ {
16084
+ "epoch": 0.89,
16085
+ "grad_norm": 1.1063759326934814,
16086
+ "learning_rate": 5.463447740934653e-06,
16087
+ "loss": 5.3569,
16088
+ "step": 9188
16089
+ },
16090
+ {
16091
+ "epoch": 0.89,
16092
+ "grad_norm": 1.0762827396392822,
16093
+ "learning_rate": 5.444056622067094e-06,
16094
+ "loss": 5.3734,
16095
+ "step": 9192
16096
+ },
16097
+ {
16098
+ "epoch": 0.89,
16099
+ "grad_norm": 1.0667394399642944,
16100
+ "learning_rate": 5.424665503199535e-06,
16101
+ "loss": 5.3136,
16102
+ "step": 9196
16103
+ },
16104
+ {
16105
+ "epoch": 0.89,
16106
+ "grad_norm": 1.0663567781448364,
16107
+ "learning_rate": 5.405274384331977e-06,
16108
+ "loss": 5.2178,
16109
+ "step": 9200
16110
+ },
16111
+ {
16112
+ "epoch": 0.89,
16113
+ "grad_norm": 1.1014020442962646,
16114
+ "learning_rate": 5.385883265464417e-06,
16115
+ "loss": 5.3121,
16116
+ "step": 9204
16117
+ },
16118
+ {
16119
+ "epoch": 0.89,
16120
+ "grad_norm": 1.0944761037826538,
16121
+ "learning_rate": 5.366492146596859e-06,
16122
+ "loss": 5.3321,
16123
+ "step": 9208
16124
+ },
16125
+ {
16126
+ "epoch": 0.89,
16127
+ "grad_norm": 1.0576825141906738,
16128
+ "learning_rate": 5.3471010277293e-06,
16129
+ "loss": 5.187,
16130
+ "step": 9212
16131
+ },
16132
+ {
16133
+ "epoch": 0.89,
16134
+ "grad_norm": 1.102414608001709,
16135
+ "learning_rate": 5.3277099088617414e-06,
16136
+ "loss": 5.2198,
16137
+ "step": 9216
16138
+ },
16139
+ {
16140
+ "epoch": 0.89,
16141
+ "grad_norm": 1.0515443086624146,
16142
+ "learning_rate": 5.308318789994183e-06,
16143
+ "loss": 5.3509,
16144
+ "step": 9220
16145
+ },
16146
+ {
16147
+ "epoch": 0.89,
16148
+ "grad_norm": 1.0225639343261719,
16149
+ "learning_rate": 5.288927671126624e-06,
16150
+ "loss": 5.3264,
16151
+ "step": 9224
16152
+ },
16153
+ {
16154
+ "epoch": 0.89,
16155
+ "grad_norm": 1.0482409000396729,
16156
+ "learning_rate": 5.269536552259066e-06,
16157
+ "loss": 5.314,
16158
+ "step": 9228
16159
+ },
16160
+ {
16161
+ "epoch": 0.9,
16162
+ "grad_norm": 1.0829366445541382,
16163
+ "learning_rate": 5.250145433391507e-06,
16164
+ "loss": 5.3288,
16165
+ "step": 9232
16166
+ },
16167
+ {
16168
+ "epoch": 0.9,
16169
+ "grad_norm": 1.018813967704773,
16170
+ "learning_rate": 5.230754314523948e-06,
16171
+ "loss": 5.245,
16172
+ "step": 9236
16173
+ },
16174
+ {
16175
+ "epoch": 0.9,
16176
+ "grad_norm": 1.1042280197143555,
16177
+ "learning_rate": 5.21136319565639e-06,
16178
+ "loss": 5.3871,
16179
+ "step": 9240
16180
+ },
16181
+ {
16182
+ "epoch": 0.9,
16183
+ "grad_norm": 1.0457403659820557,
16184
+ "learning_rate": 5.19197207678883e-06,
16185
+ "loss": 5.3858,
16186
+ "step": 9244
16187
+ },
16188
+ {
16189
+ "epoch": 0.9,
16190
+ "grad_norm": 1.0873547792434692,
16191
+ "learning_rate": 5.1725809579212725e-06,
16192
+ "loss": 5.2907,
16193
+ "step": 9248
16194
+ },
16195
+ {
16196
+ "epoch": 0.9,
16197
+ "grad_norm": 1.0615798234939575,
16198
+ "learning_rate": 5.153189839053714e-06,
16199
+ "loss": 5.2729,
16200
+ "step": 9252
16201
+ },
16202
+ {
16203
+ "epoch": 0.9,
16204
+ "grad_norm": 1.1086784601211548,
16205
+ "learning_rate": 5.133798720186155e-06,
16206
+ "loss": 5.388,
16207
+ "step": 9256
16208
+ },
16209
+ {
16210
+ "epoch": 0.9,
16211
+ "grad_norm": 1.0895764827728271,
16212
+ "learning_rate": 5.114407601318597e-06,
16213
+ "loss": 5.2892,
16214
+ "step": 9260
16215
+ },
16216
+ {
16217
+ "epoch": 0.9,
16218
+ "grad_norm": 1.0369954109191895,
16219
+ "learning_rate": 5.095016482451038e-06,
16220
+ "loss": 5.2989,
16221
+ "step": 9264
16222
+ },
16223
+ {
16224
+ "epoch": 0.9,
16225
+ "grad_norm": 1.049892544746399,
16226
+ "learning_rate": 5.0756253635834794e-06,
16227
+ "loss": 5.3419,
16228
+ "step": 9268
16229
+ },
16230
+ {
16231
+ "epoch": 0.9,
16232
+ "grad_norm": 1.0920242071151733,
16233
+ "learning_rate": 5.056234244715921e-06,
16234
+ "loss": 5.3116,
16235
+ "step": 9272
16236
+ },
16237
+ {
16238
+ "epoch": 0.9,
16239
+ "grad_norm": 1.0999177694320679,
16240
+ "learning_rate": 5.036843125848361e-06,
16241
+ "loss": 5.2779,
16242
+ "step": 9276
16243
+ },
16244
+ {
16245
+ "epoch": 0.9,
16246
+ "grad_norm": 1.0239474773406982,
16247
+ "learning_rate": 5.017452006980803e-06,
16248
+ "loss": 5.35,
16249
+ "step": 9280
16250
+ },
16251
+ {
16252
+ "epoch": 0.9,
16253
+ "grad_norm": 1.0571128129959106,
16254
+ "learning_rate": 4.998060888113244e-06,
16255
+ "loss": 5.3139,
16256
+ "step": 9284
16257
+ },
16258
+ {
16259
+ "epoch": 0.9,
16260
+ "grad_norm": 1.1082773208618164,
16261
+ "learning_rate": 4.9786697692456855e-06,
16262
+ "loss": 5.2779,
16263
+ "step": 9288
16264
  }
16265
  ],
16266
  "logging_steps": 4,
 
16268
  "num_input_tokens_seen": 0,
16269
  "num_train_epochs": 1,
16270
  "save_steps": 1032,
16271
+ "total_flos": 7.822868224986317e+16,
16272
  "train_batch_size": 8,
16273
  "trial_name": null,
16274
  "trial_params": null