diaenra commited on
Commit
f7006cb
·
verified ·
1 Parent(s): 9d7c41c

Training in progress, step 2868, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09161988ab35182d0f172df37ed2df3516cbe35067a0869c0d073c78b0f1e3a1
3
  size 2145944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96170c4566c1e7efa993538ecb2474b7056c80c01787e9d312ce31d75111369
3
  size 2145944
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:254f73a33601897761011a4368934a7338dc1d9d6a504f80f066e6406bcbb939
3
  size 4310020
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635f9699d60977f7f49fef0fac4d8dd2e9540f390cee9cca0ae8d57bdec47c46
3
  size 4310020
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e51ecfacb21da1acc5735448bc8ed70887cf2b3c5d1710aa9335fefd25428b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9966c454e255711c675f388ff89a77f36c50534180a271dc2d40e7f43b870bf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78bb6a385f1d33d78b10d1602eb1b4b8e3b002f2928cfd515cb8f96e488f678a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237b866b1fce3ebf6f30679e5de802610141e1fcea25e04865362188175cac93
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9068644360124181,
5
  "eval_steps": 500,
6
- "global_step": 2629,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18410,6 +18410,1679 @@
18410
  "learning_rate": 2.2784275177278934e-06,
18411
  "loss": 9.3997,
18412
  "step": 2629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18413
  }
18414
  ],
18415
  "logging_steps": 1,
@@ -18429,7 +20102,7 @@
18429
  "attributes": {}
18430
  }
18431
  },
18432
- "total_flos": 545736876883968.0,
18433
  "train_batch_size": 4,
18434
  "trial_name": null,
18435
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9893066574680924,
5
  "eval_steps": 500,
6
+ "global_step": 2868,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18410
  "learning_rate": 2.2784275177278934e-06,
18411
  "loss": 9.3997,
18412
  "step": 2629
18413
+ },
18414
+ {
18415
+ "epoch": 0.9072093825457054,
18416
+ "grad_norm": 1.3021107912063599,
18417
+ "learning_rate": 2.2617097018509613e-06,
18418
+ "loss": 9.5147,
18419
+ "step": 2630
18420
+ },
18421
+ {
18422
+ "epoch": 0.9075543290789928,
18423
+ "grad_norm": 1.280267357826233,
18424
+ "learning_rate": 2.2450520256057038e-06,
18425
+ "loss": 9.5056,
18426
+ "step": 2631
18427
+ },
18428
+ {
18429
+ "epoch": 0.9078992756122801,
18430
+ "grad_norm": 1.3371871709823608,
18431
+ "learning_rate": 2.22845450997709e-06,
18432
+ "loss": 9.521,
18433
+ "step": 2632
18434
+ },
18435
+ {
18436
+ "epoch": 0.9082442221455674,
18437
+ "grad_norm": 1.3242119550704956,
18438
+ "learning_rate": 2.2119171758743117e-06,
18439
+ "loss": 9.4992,
18440
+ "step": 2633
18441
+ },
18442
+ {
18443
+ "epoch": 0.9085891686788548,
18444
+ "grad_norm": 1.357699990272522,
18445
+ "learning_rate": 2.19544004413072e-06,
18446
+ "loss": 9.4417,
18447
+ "step": 2634
18448
+ },
18449
+ {
18450
+ "epoch": 0.9089341152121421,
18451
+ "grad_norm": 1.3349027633666992,
18452
+ "learning_rate": 2.1790231355038495e-06,
18453
+ "loss": 9.462,
18454
+ "step": 2635
18455
+ },
18456
+ {
18457
+ "epoch": 0.9092790617454295,
18458
+ "grad_norm": 1.4189931154251099,
18459
+ "learning_rate": 2.162666470675334e-06,
18460
+ "loss": 9.4083,
18461
+ "step": 2636
18462
+ },
18463
+ {
18464
+ "epoch": 0.9096240082787168,
18465
+ "grad_norm": 1.5067139863967896,
18466
+ "learning_rate": 2.146370070250958e-06,
18467
+ "loss": 9.4424,
18468
+ "step": 2637
18469
+ },
18470
+ {
18471
+ "epoch": 0.9099689548120041,
18472
+ "grad_norm": 1.5032390356063843,
18473
+ "learning_rate": 2.130133954760538e-06,
18474
+ "loss": 9.4019,
18475
+ "step": 2638
18476
+ },
18477
+ {
18478
+ "epoch": 0.9103139013452914,
18479
+ "grad_norm": 1.3871209621429443,
18480
+ "learning_rate": 2.1139581446580017e-06,
18481
+ "loss": 9.5032,
18482
+ "step": 2639
18483
+ },
18484
+ {
18485
+ "epoch": 0.9106588478785789,
18486
+ "grad_norm": 1.5128953456878662,
18487
+ "learning_rate": 2.097842660321242e-06,
18488
+ "loss": 9.3989,
18489
+ "step": 2640
18490
+ },
18491
+ {
18492
+ "epoch": 0.9110037944118662,
18493
+ "grad_norm": 1.3251484632492065,
18494
+ "learning_rate": 2.081787522052203e-06,
18495
+ "loss": 9.5056,
18496
+ "step": 2641
18497
+ },
18498
+ {
18499
+ "epoch": 0.9113487409451535,
18500
+ "grad_norm": 1.3176518678665161,
18501
+ "learning_rate": 2.0657927500767894e-06,
18502
+ "loss": 9.4636,
18503
+ "step": 2642
18504
+ },
18505
+ {
18506
+ "epoch": 0.9116936874784408,
18507
+ "grad_norm": 1.5699663162231445,
18508
+ "learning_rate": 2.0498583645448487e-06,
18509
+ "loss": 9.4639,
18510
+ "step": 2643
18511
+ },
18512
+ {
18513
+ "epoch": 0.9120386340117281,
18514
+ "grad_norm": 1.4477311372756958,
18515
+ "learning_rate": 2.0339843855301744e-06,
18516
+ "loss": 9.4008,
18517
+ "step": 2644
18518
+ },
18519
+ {
18520
+ "epoch": 0.9123835805450156,
18521
+ "grad_norm": 1.6083184480667114,
18522
+ "learning_rate": 2.018170833030436e-06,
18523
+ "loss": 9.4396,
18524
+ "step": 2645
18525
+ },
18526
+ {
18527
+ "epoch": 0.9127285270783029,
18528
+ "grad_norm": 1.6080248355865479,
18529
+ "learning_rate": 2.0024177269672094e-06,
18530
+ "loss": 9.4432,
18531
+ "step": 2646
18532
+ },
18533
+ {
18534
+ "epoch": 0.9130734736115902,
18535
+ "grad_norm": 1.4727885723114014,
18536
+ "learning_rate": 1.986725087185898e-06,
18537
+ "loss": 9.4662,
18538
+ "step": 2647
18539
+ },
18540
+ {
18541
+ "epoch": 0.9134184201448775,
18542
+ "grad_norm": 1.6938202381134033,
18543
+ "learning_rate": 1.9710929334557484e-06,
18544
+ "loss": 9.332,
18545
+ "step": 2648
18546
+ },
18547
+ {
18548
+ "epoch": 0.9137633666781649,
18549
+ "grad_norm": 1.6540645360946655,
18550
+ "learning_rate": 1.9555212854697803e-06,
18551
+ "loss": 9.3841,
18552
+ "step": 2649
18553
+ },
18554
+ {
18555
+ "epoch": 0.9141083132114523,
18556
+ "grad_norm": 1.7224019765853882,
18557
+ "learning_rate": 1.940010162844824e-06,
18558
+ "loss": 9.3694,
18559
+ "step": 2650
18560
+ },
18561
+ {
18562
+ "epoch": 0.9144532597447396,
18563
+ "grad_norm": 0.7851664423942566,
18564
+ "learning_rate": 1.9245595851214328e-06,
18565
+ "loss": 9.6529,
18566
+ "step": 2651
18567
+ },
18568
+ {
18569
+ "epoch": 0.9147982062780269,
18570
+ "grad_norm": 0.9760227203369141,
18571
+ "learning_rate": 1.909169571763908e-06,
18572
+ "loss": 9.5619,
18573
+ "step": 2652
18574
+ },
18575
+ {
18576
+ "epoch": 0.9151431528113142,
18577
+ "grad_norm": 1.0906972885131836,
18578
+ "learning_rate": 1.8938401421602359e-06,
18579
+ "loss": 9.4804,
18580
+ "step": 2653
18581
+ },
18582
+ {
18583
+ "epoch": 0.9154880993446016,
18584
+ "grad_norm": 1.1999458074569702,
18585
+ "learning_rate": 1.8785713156221018e-06,
18586
+ "loss": 9.527,
18587
+ "step": 2654
18588
+ },
18589
+ {
18590
+ "epoch": 0.9158330458778889,
18591
+ "grad_norm": 1.1160448789596558,
18592
+ "learning_rate": 1.863363111384836e-06,
18593
+ "loss": 9.5069,
18594
+ "step": 2655
18595
+ },
18596
+ {
18597
+ "epoch": 0.9161779924111763,
18598
+ "grad_norm": 1.0647850036621094,
18599
+ "learning_rate": 1.8482155486073739e-06,
18600
+ "loss": 9.5693,
18601
+ "step": 2656
18602
+ },
18603
+ {
18604
+ "epoch": 0.9165229389444636,
18605
+ "grad_norm": 1.2221078872680664,
18606
+ "learning_rate": 1.8331286463722951e-06,
18607
+ "loss": 9.4765,
18608
+ "step": 2657
18609
+ },
18610
+ {
18611
+ "epoch": 0.916867885477751,
18612
+ "grad_norm": 1.1700892448425293,
18613
+ "learning_rate": 1.8181024236857246e-06,
18614
+ "loss": 9.5458,
18615
+ "step": 2658
18616
+ },
18617
+ {
18618
+ "epoch": 0.9172128320110383,
18619
+ "grad_norm": 1.2416577339172363,
18620
+ "learning_rate": 1.8031368994773756e-06,
18621
+ "loss": 9.5379,
18622
+ "step": 2659
18623
+ },
18624
+ {
18625
+ "epoch": 0.9175577785443256,
18626
+ "grad_norm": 1.1829198598861694,
18627
+ "learning_rate": 1.788232092600478e-06,
18628
+ "loss": 9.5421,
18629
+ "step": 2660
18630
+ },
18631
+ {
18632
+ "epoch": 0.917902725077613,
18633
+ "grad_norm": 1.1688567399978638,
18634
+ "learning_rate": 1.7733880218317788e-06,
18635
+ "loss": 9.4909,
18636
+ "step": 2661
18637
+ },
18638
+ {
18639
+ "epoch": 0.9182476716109003,
18640
+ "grad_norm": 1.1145144701004028,
18641
+ "learning_rate": 1.7586047058714972e-06,
18642
+ "loss": 9.5701,
18643
+ "step": 2662
18644
+ },
18645
+ {
18646
+ "epoch": 0.9185926181441877,
18647
+ "grad_norm": 1.1775919198989868,
18648
+ "learning_rate": 1.74388216334333e-06,
18649
+ "loss": 9.5376,
18650
+ "step": 2663
18651
+ },
18652
+ {
18653
+ "epoch": 0.918937564677475,
18654
+ "grad_norm": 1.2331610918045044,
18655
+ "learning_rate": 1.7292204127944134e-06,
18656
+ "loss": 9.4995,
18657
+ "step": 2664
18658
+ },
18659
+ {
18660
+ "epoch": 0.9192825112107623,
18661
+ "grad_norm": 1.2776384353637695,
18662
+ "learning_rate": 1.714619472695278e-06,
18663
+ "loss": 9.5589,
18664
+ "step": 2665
18665
+ },
18666
+ {
18667
+ "epoch": 0.9196274577440496,
18668
+ "grad_norm": 1.1205222606658936,
18669
+ "learning_rate": 1.7000793614398714e-06,
18670
+ "loss": 9.586,
18671
+ "step": 2666
18672
+ },
18673
+ {
18674
+ "epoch": 0.9199724042773371,
18675
+ "grad_norm": 1.172733187675476,
18676
+ "learning_rate": 1.6856000973455022e-06,
18677
+ "loss": 9.5626,
18678
+ "step": 2667
18679
+ },
18680
+ {
18681
+ "epoch": 0.9203173508106244,
18682
+ "grad_norm": 1.1725226640701294,
18683
+ "learning_rate": 1.6711816986528238e-06,
18684
+ "loss": 9.5409,
18685
+ "step": 2668
18686
+ },
18687
+ {
18688
+ "epoch": 0.9206622973439117,
18689
+ "grad_norm": 1.3246572017669678,
18690
+ "learning_rate": 1.6568241835258068e-06,
18691
+ "loss": 9.5028,
18692
+ "step": 2669
18693
+ },
18694
+ {
18695
+ "epoch": 0.921007243877199,
18696
+ "grad_norm": 1.4405431747436523,
18697
+ "learning_rate": 1.6425275700517385e-06,
18698
+ "loss": 9.4395,
18699
+ "step": 2670
18700
+ },
18701
+ {
18702
+ "epoch": 0.9213521904104863,
18703
+ "grad_norm": 1.2822954654693604,
18704
+ "learning_rate": 1.6282918762411614e-06,
18705
+ "loss": 9.5234,
18706
+ "step": 2671
18707
+ },
18708
+ {
18709
+ "epoch": 0.9216971369437738,
18710
+ "grad_norm": 1.3265615701675415,
18711
+ "learning_rate": 1.614117120027886e-06,
18712
+ "loss": 9.4913,
18713
+ "step": 2672
18714
+ },
18715
+ {
18716
+ "epoch": 0.9220420834770611,
18717
+ "grad_norm": 1.2790005207061768,
18718
+ "learning_rate": 1.6000033192689611e-06,
18719
+ "loss": 9.4907,
18720
+ "step": 2673
18721
+ },
18722
+ {
18723
+ "epoch": 0.9223870300103484,
18724
+ "grad_norm": 1.3700817823410034,
18725
+ "learning_rate": 1.5859504917446366e-06,
18726
+ "loss": 9.48,
18727
+ "step": 2674
18728
+ },
18729
+ {
18730
+ "epoch": 0.9227319765436357,
18731
+ "grad_norm": 1.3636354207992554,
18732
+ "learning_rate": 1.5719586551583454e-06,
18733
+ "loss": 9.4867,
18734
+ "step": 2675
18735
+ },
18736
+ {
18737
+ "epoch": 0.9230769230769231,
18738
+ "grad_norm": 1.2950481176376343,
18739
+ "learning_rate": 1.5580278271366878e-06,
18740
+ "loss": 9.4936,
18741
+ "step": 2676
18742
+ },
18743
+ {
18744
+ "epoch": 0.9234218696102104,
18745
+ "grad_norm": 1.3804364204406738,
18746
+ "learning_rate": 1.5441580252294253e-06,
18747
+ "loss": 9.4613,
18748
+ "step": 2677
18749
+ },
18750
+ {
18751
+ "epoch": 0.9237668161434978,
18752
+ "grad_norm": 1.369942307472229,
18753
+ "learning_rate": 1.5303492669094089e-06,
18754
+ "loss": 9.5052,
18755
+ "step": 2678
18756
+ },
18757
+ {
18758
+ "epoch": 0.9241117626767851,
18759
+ "grad_norm": 1.2750253677368164,
18760
+ "learning_rate": 1.5166015695726122e-06,
18761
+ "loss": 9.4562,
18762
+ "step": 2679
18763
+ },
18764
+ {
18765
+ "epoch": 0.9244567092100724,
18766
+ "grad_norm": 1.348370909690857,
18767
+ "learning_rate": 1.5029149505380646e-06,
18768
+ "loss": 9.4602,
18769
+ "step": 2680
18770
+ },
18771
+ {
18772
+ "epoch": 0.9248016557433598,
18773
+ "grad_norm": 1.2539187669754028,
18774
+ "learning_rate": 1.4892894270478853e-06,
18775
+ "loss": 9.525,
18776
+ "step": 2681
18777
+ },
18778
+ {
18779
+ "epoch": 0.9251466022766471,
18780
+ "grad_norm": 1.474637508392334,
18781
+ "learning_rate": 1.4757250162671822e-06,
18782
+ "loss": 9.449,
18783
+ "step": 2682
18784
+ },
18785
+ {
18786
+ "epoch": 0.9254915488099345,
18787
+ "grad_norm": 1.484521508216858,
18788
+ "learning_rate": 1.4622217352841138e-06,
18789
+ "loss": 9.3875,
18790
+ "step": 2683
18791
+ },
18792
+ {
18793
+ "epoch": 0.9258364953432218,
18794
+ "grad_norm": 1.2464600801467896,
18795
+ "learning_rate": 1.448779601109801e-06,
18796
+ "loss": 9.4846,
18797
+ "step": 2684
18798
+ },
18799
+ {
18800
+ "epoch": 0.9261814418765092,
18801
+ "grad_norm": 1.404441237449646,
18802
+ "learning_rate": 1.4353986306783418e-06,
18803
+ "loss": 9.516,
18804
+ "step": 2685
18805
+ },
18806
+ {
18807
+ "epoch": 0.9265263884097965,
18808
+ "grad_norm": 1.3945449590682983,
18809
+ "learning_rate": 1.4220788408468021e-06,
18810
+ "loss": 9.4727,
18811
+ "step": 2686
18812
+ },
18813
+ {
18814
+ "epoch": 0.9268713349430838,
18815
+ "grad_norm": 1.3472886085510254,
18816
+ "learning_rate": 1.4088202483951374e-06,
18817
+ "loss": 9.4367,
18818
+ "step": 2687
18819
+ },
18820
+ {
18821
+ "epoch": 0.9272162814763711,
18822
+ "grad_norm": 1.377700686454773,
18823
+ "learning_rate": 1.3956228700262252e-06,
18824
+ "loss": 9.5139,
18825
+ "step": 2688
18826
+ },
18827
+ {
18828
+ "epoch": 0.9275612280096585,
18829
+ "grad_norm": 1.389609456062317,
18830
+ "learning_rate": 1.3824867223658388e-06,
18831
+ "loss": 9.4322,
18832
+ "step": 2689
18833
+ },
18834
+ {
18835
+ "epoch": 0.9279061745429459,
18836
+ "grad_norm": 1.4232558012008667,
18837
+ "learning_rate": 1.3694118219626074e-06,
18838
+ "loss": 9.5109,
18839
+ "step": 2690
18840
+ },
18841
+ {
18842
+ "epoch": 0.9282511210762332,
18843
+ "grad_norm": 1.3743571043014526,
18844
+ "learning_rate": 1.3563981852879827e-06,
18845
+ "loss": 9.4921,
18846
+ "step": 2691
18847
+ },
18848
+ {
18849
+ "epoch": 0.9285960676095205,
18850
+ "grad_norm": 1.4875822067260742,
18851
+ "learning_rate": 1.3434458287362672e-06,
18852
+ "loss": 9.463,
18853
+ "step": 2692
18854
+ },
18855
+ {
18856
+ "epoch": 0.9289410141428078,
18857
+ "grad_norm": 1.3445839881896973,
18858
+ "learning_rate": 1.3305547686245422e-06,
18859
+ "loss": 9.4212,
18860
+ "step": 2693
18861
+ },
18862
+ {
18863
+ "epoch": 0.9292859606760953,
18864
+ "grad_norm": 1.6044926643371582,
18865
+ "learning_rate": 1.3177250211926728e-06,
18866
+ "loss": 9.4379,
18867
+ "step": 2694
18868
+ },
18869
+ {
18870
+ "epoch": 0.9296309072093826,
18871
+ "grad_norm": 1.5688241720199585,
18872
+ "learning_rate": 1.3049566026033023e-06,
18873
+ "loss": 9.4778,
18874
+ "step": 2695
18875
+ },
18876
+ {
18877
+ "epoch": 0.9299758537426699,
18878
+ "grad_norm": 1.6058199405670166,
18879
+ "learning_rate": 1.2922495289417913e-06,
18880
+ "loss": 9.421,
18881
+ "step": 2696
18882
+ },
18883
+ {
18884
+ "epoch": 0.9303208002759572,
18885
+ "grad_norm": 1.693510890007019,
18886
+ "learning_rate": 1.2796038162162239e-06,
18887
+ "loss": 9.354,
18888
+ "step": 2697
18889
+ },
18890
+ {
18891
+ "epoch": 0.9306657468092445,
18892
+ "grad_norm": 1.5772984027862549,
18893
+ "learning_rate": 1.2670194803573954e-06,
18894
+ "loss": 9.3735,
18895
+ "step": 2698
18896
+ },
18897
+ {
18898
+ "epoch": 0.931010693342532,
18899
+ "grad_norm": 1.614292860031128,
18900
+ "learning_rate": 1.2544965372187635e-06,
18901
+ "loss": 9.3702,
18902
+ "step": 2699
18903
+ },
18904
+ {
18905
+ "epoch": 0.9313556398758193,
18906
+ "grad_norm": 1.9061168432235718,
18907
+ "learning_rate": 1.2420350025764528e-06,
18908
+ "loss": 9.2773,
18909
+ "step": 2700
18910
+ },
18911
+ {
18912
+ "epoch": 0.9317005864091066,
18913
+ "grad_norm": 0.8620436787605286,
18914
+ "learning_rate": 1.2296348921292333e-06,
18915
+ "loss": 9.6332,
18916
+ "step": 2701
18917
+ },
18918
+ {
18919
+ "epoch": 0.9320455329423939,
18920
+ "grad_norm": 0.8850879073143005,
18921
+ "learning_rate": 1.2172962214984763e-06,
18922
+ "loss": 9.6099,
18923
+ "step": 2702
18924
+ },
18925
+ {
18926
+ "epoch": 0.9323904794756813,
18927
+ "grad_norm": 1.0009554624557495,
18928
+ "learning_rate": 1.2050190062281752e-06,
18929
+ "loss": 9.5471,
18930
+ "step": 2703
18931
+ },
18932
+ {
18933
+ "epoch": 0.9327354260089686,
18934
+ "grad_norm": 1.0762373208999634,
18935
+ "learning_rate": 1.1928032617848805e-06,
18936
+ "loss": 9.5482,
18937
+ "step": 2704
18938
+ },
18939
+ {
18940
+ "epoch": 0.933080372542256,
18941
+ "grad_norm": 1.0936650037765503,
18942
+ "learning_rate": 1.1806490035577267e-06,
18943
+ "loss": 9.5399,
18944
+ "step": 2705
18945
+ },
18946
+ {
18947
+ "epoch": 0.9334253190755433,
18948
+ "grad_norm": 1.043621301651001,
18949
+ "learning_rate": 1.16855624685836e-06,
18950
+ "loss": 9.5344,
18951
+ "step": 2706
18952
+ },
18953
+ {
18954
+ "epoch": 0.9337702656088306,
18955
+ "grad_norm": 1.2308648824691772,
18956
+ "learning_rate": 1.1565250069209776e-06,
18957
+ "loss": 9.5064,
18958
+ "step": 2707
18959
+ },
18960
+ {
18961
+ "epoch": 0.934115212142118,
18962
+ "grad_norm": 1.162726640701294,
18963
+ "learning_rate": 1.1445552989022668e-06,
18964
+ "loss": 9.5275,
18965
+ "step": 2708
18966
+ },
18967
+ {
18968
+ "epoch": 0.9344601586754053,
18969
+ "grad_norm": 1.1094176769256592,
18970
+ "learning_rate": 1.132647137881393e-06,
18971
+ "loss": 9.5538,
18972
+ "step": 2709
18973
+ },
18974
+ {
18975
+ "epoch": 0.9348051052086926,
18976
+ "grad_norm": 1.255376935005188,
18977
+ "learning_rate": 1.120800538859995e-06,
18978
+ "loss": 9.463,
18979
+ "step": 2710
18980
+ },
18981
+ {
18982
+ "epoch": 0.93515005174198,
18983
+ "grad_norm": 1.1070146560668945,
18984
+ "learning_rate": 1.1090155167621518e-06,
18985
+ "loss": 9.5253,
18986
+ "step": 2711
18987
+ },
18988
+ {
18989
+ "epoch": 0.9354949982752674,
18990
+ "grad_norm": 1.3558119535446167,
18991
+ "learning_rate": 1.0972920864343705e-06,
18992
+ "loss": 9.4959,
18993
+ "step": 2712
18994
+ },
18995
+ {
18996
+ "epoch": 0.9358399448085547,
18997
+ "grad_norm": 1.2252329587936401,
18998
+ "learning_rate": 1.085630262645565e-06,
18999
+ "loss": 9.5362,
19000
+ "step": 2713
19001
+ },
19002
+ {
19003
+ "epoch": 0.936184891341842,
19004
+ "grad_norm": 1.1986970901489258,
19005
+ "learning_rate": 1.07403006008705e-06,
19006
+ "loss": 9.4938,
19007
+ "step": 2714
19008
+ },
19009
+ {
19010
+ "epoch": 0.9365298378751293,
19011
+ "grad_norm": 1.1853275299072266,
19012
+ "learning_rate": 1.062491493372486e-06,
19013
+ "loss": 9.5321,
19014
+ "step": 2715
19015
+ },
19016
+ {
19017
+ "epoch": 0.9368747844084166,
19018
+ "grad_norm": 1.169337511062622,
19019
+ "learning_rate": 1.0510145770379177e-06,
19020
+ "loss": 9.5066,
19021
+ "step": 2716
19022
+ },
19023
+ {
19024
+ "epoch": 0.9372197309417041,
19025
+ "grad_norm": 1.2148215770721436,
19026
+ "learning_rate": 1.0395993255416957e-06,
19027
+ "loss": 9.5333,
19028
+ "step": 2717
19029
+ },
19030
+ {
19031
+ "epoch": 0.9375646774749914,
19032
+ "grad_norm": 1.3632956743240356,
19033
+ "learning_rate": 1.0282457532645119e-06,
19034
+ "loss": 9.4924,
19035
+ "step": 2718
19036
+ },
19037
+ {
19038
+ "epoch": 0.9379096240082787,
19039
+ "grad_norm": 1.1406636238098145,
19040
+ "learning_rate": 1.0169538745093242e-06,
19041
+ "loss": 9.5274,
19042
+ "step": 2719
19043
+ },
19044
+ {
19045
+ "epoch": 0.938254570541566,
19046
+ "grad_norm": 1.1914732456207275,
19047
+ "learning_rate": 1.0057237035014044e-06,
19048
+ "loss": 9.4695,
19049
+ "step": 2720
19050
+ },
19051
+ {
19052
+ "epoch": 0.9385995170748535,
19053
+ "grad_norm": 1.4516353607177734,
19054
+ "learning_rate": 9.945552543882685e-07,
19055
+ "loss": 9.4253,
19056
+ "step": 2721
19057
+ },
19058
+ {
19059
+ "epoch": 0.9389444636081408,
19060
+ "grad_norm": 1.3653576374053955,
19061
+ "learning_rate": 9.834485412396677e-07,
19062
+ "loss": 9.4781,
19063
+ "step": 2722
19064
+ },
19065
+ {
19066
+ "epoch": 0.9392894101414281,
19067
+ "grad_norm": 1.1982650756835938,
19068
+ "learning_rate": 9.724035780476092e-07,
19069
+ "loss": 9.599,
19070
+ "step": 2723
19071
+ },
19072
+ {
19073
+ "epoch": 0.9396343566747154,
19074
+ "grad_norm": 1.244680643081665,
19075
+ "learning_rate": 9.6142037872628e-07,
19076
+ "loss": 9.5415,
19077
+ "step": 2724
19078
+ },
19079
+ {
19080
+ "epoch": 0.9399793032080027,
19081
+ "grad_norm": 1.288313388824463,
19082
+ "learning_rate": 9.504989571120726e-07,
19083
+ "loss": 9.5385,
19084
+ "step": 2725
19085
+ },
19086
+ {
19087
+ "epoch": 0.9403242497412901,
19088
+ "grad_norm": 1.185012698173523,
19089
+ "learning_rate": 9.396393269635484e-07,
19090
+ "loss": 9.5646,
19091
+ "step": 2726
19092
+ },
19093
+ {
19094
+ "epoch": 0.9406691962745775,
19095
+ "grad_norm": 1.2785496711730957,
19096
+ "learning_rate": 9.28841501961425e-07,
19097
+ "loss": 9.496,
19098
+ "step": 2727
19099
+ },
19100
+ {
19101
+ "epoch": 0.9410141428078648,
19102
+ "grad_norm": 1.2989834547042847,
19103
+ "learning_rate": 9.1810549570856e-07,
19104
+ "loss": 9.5301,
19105
+ "step": 2728
19106
+ },
19107
+ {
19108
+ "epoch": 0.9413590893411521,
19109
+ "grad_norm": 1.2136149406433105,
19110
+ "learning_rate": 9.074313217299457e-07,
19111
+ "loss": 9.5197,
19112
+ "step": 2729
19113
+ },
19114
+ {
19115
+ "epoch": 0.9417040358744395,
19116
+ "grad_norm": 1.4056475162506104,
19117
+ "learning_rate": 8.968189934726534e-07,
19118
+ "loss": 9.5294,
19119
+ "step": 2730
19120
+ },
19121
+ {
19122
+ "epoch": 0.9420489824077268,
19123
+ "grad_norm": 1.2778812646865845,
19124
+ "learning_rate": 8.862685243058666e-07,
19125
+ "loss": 9.455,
19126
+ "step": 2731
19127
+ },
19128
+ {
19129
+ "epoch": 0.9423939289410141,
19130
+ "grad_norm": 1.3463318347930908,
19131
+ "learning_rate": 8.757799275208311e-07,
19132
+ "loss": 9.5346,
19133
+ "step": 2732
19134
+ },
19135
+ {
19136
+ "epoch": 0.9427388754743015,
19137
+ "grad_norm": 1.2876229286193848,
19138
+ "learning_rate": 8.653532163308387e-07,
19139
+ "loss": 9.5036,
19140
+ "step": 2733
19141
+ },
19142
+ {
19143
+ "epoch": 0.9430838220075888,
19144
+ "grad_norm": 1.3723164796829224,
19145
+ "learning_rate": 8.549884038712375e-07,
19146
+ "loss": 9.5081,
19147
+ "step": 2734
19148
+ },
19149
+ {
19150
+ "epoch": 0.9434287685408762,
19151
+ "grad_norm": 1.4172887802124023,
19152
+ "learning_rate": 8.446855031993717e-07,
19153
+ "loss": 9.4852,
19154
+ "step": 2735
19155
+ },
19156
+ {
19157
+ "epoch": 0.9437737150741635,
19158
+ "grad_norm": 1.4235061407089233,
19159
+ "learning_rate": 8.344445272946199e-07,
19160
+ "loss": 9.486,
19161
+ "step": 2736
19162
+ },
19163
+ {
19164
+ "epoch": 0.9441186616074508,
19165
+ "grad_norm": 1.449661374092102,
19166
+ "learning_rate": 8.24265489058329e-07,
19167
+ "loss": 9.4349,
19168
+ "step": 2737
19169
+ },
19170
+ {
19171
+ "epoch": 0.9444636081407382,
19172
+ "grad_norm": 1.4169197082519531,
19173
+ "learning_rate": 8.1414840131383e-07,
19174
+ "loss": 9.4753,
19175
+ "step": 2738
19176
+ },
19177
+ {
19178
+ "epoch": 0.9448085546740256,
19179
+ "grad_norm": 1.4273600578308105,
19180
+ "learning_rate": 8.040932768063947e-07,
19181
+ "loss": 9.4446,
19182
+ "step": 2739
19183
+ },
19184
+ {
19185
+ "epoch": 0.9451535012073129,
19186
+ "grad_norm": 1.4585797786712646,
19187
+ "learning_rate": 7.941001282032512e-07,
19188
+ "loss": 9.4683,
19189
+ "step": 2740
19190
+ },
19191
+ {
19192
+ "epoch": 0.9454984477406002,
19193
+ "grad_norm": 1.433382272720337,
19194
+ "learning_rate": 7.841689680935349e-07,
19195
+ "loss": 9.4656,
19196
+ "step": 2741
19197
+ },
19198
+ {
19199
+ "epoch": 0.9458433942738875,
19200
+ "grad_norm": 1.298423409461975,
19201
+ "learning_rate": 7.742998089883102e-07,
19202
+ "loss": 9.4483,
19203
+ "step": 2742
19204
+ },
19205
+ {
19206
+ "epoch": 0.9461883408071748,
19207
+ "grad_norm": 1.4828376770019531,
19208
+ "learning_rate": 7.644926633205208e-07,
19209
+ "loss": 9.4068,
19210
+ "step": 2743
19211
+ },
19212
+ {
19213
+ "epoch": 0.9465332873404623,
19214
+ "grad_norm": 1.4661272764205933,
19215
+ "learning_rate": 7.547475434449835e-07,
19216
+ "loss": 9.4218,
19217
+ "step": 2744
19218
+ },
19219
+ {
19220
+ "epoch": 0.9468782338737496,
19221
+ "grad_norm": 1.4254372119903564,
19222
+ "learning_rate": 7.450644616383951e-07,
19223
+ "loss": 9.4599,
19224
+ "step": 2745
19225
+ },
19226
+ {
19227
+ "epoch": 0.9472231804070369,
19228
+ "grad_norm": 1.6096229553222656,
19229
+ "learning_rate": 7.354434300992752e-07,
19230
+ "loss": 9.4529,
19231
+ "step": 2746
19232
+ },
19233
+ {
19234
+ "epoch": 0.9475681269403242,
19235
+ "grad_norm": 1.6057524681091309,
19236
+ "learning_rate": 7.258844609479953e-07,
19237
+ "loss": 9.3971,
19238
+ "step": 2747
19239
+ },
19240
+ {
19241
+ "epoch": 0.9479130734736116,
19242
+ "grad_norm": 1.5125082731246948,
19243
+ "learning_rate": 7.163875662267117e-07,
19244
+ "loss": 9.417,
19245
+ "step": 2748
19246
+ },
19247
+ {
19248
+ "epoch": 0.948258020006899,
19249
+ "grad_norm": 1.7600480318069458,
19250
+ "learning_rate": 7.069527578994151e-07,
19251
+ "loss": 9.2774,
19252
+ "step": 2749
19253
+ },
19254
+ {
19255
+ "epoch": 0.9486029665401863,
19256
+ "grad_norm": 1.6905750036239624,
19257
+ "learning_rate": 6.975800478518646e-07,
19258
+ "loss": 9.3521,
19259
+ "step": 2750
19260
+ },
19261
+ {
19262
+ "epoch": 0.9489479130734736,
19263
+ "grad_norm": 0.99737948179245,
19264
+ "learning_rate": 6.88269447891593e-07,
19265
+ "loss": 9.533,
19266
+ "step": 2751
19267
+ },
19268
+ {
19269
+ "epoch": 0.9492928596067609,
19270
+ "grad_norm": 0.9953950643539429,
19271
+ "learning_rate": 6.790209697478789e-07,
19272
+ "loss": 9.5993,
19273
+ "step": 2752
19274
+ },
19275
+ {
19276
+ "epoch": 0.9496378061400483,
19277
+ "grad_norm": 1.0281240940093994,
19278
+ "learning_rate": 6.698346250717524e-07,
19279
+ "loss": 9.5267,
19280
+ "step": 2753
19281
+ },
19282
+ {
19283
+ "epoch": 0.9499827526733357,
19284
+ "grad_norm": 1.020925760269165,
19285
+ "learning_rate": 6.607104254359675e-07,
19286
+ "loss": 9.5643,
19287
+ "step": 2754
19288
+ },
19289
+ {
19290
+ "epoch": 0.950327699206623,
19291
+ "grad_norm": 1.0711194276809692,
19292
+ "learning_rate": 6.516483823349795e-07,
19293
+ "loss": 9.5639,
19294
+ "step": 2755
19295
+ },
19296
+ {
19297
+ "epoch": 0.9506726457399103,
19298
+ "grad_norm": 1.0857421159744263,
19299
+ "learning_rate": 6.426485071849564e-07,
19300
+ "loss": 9.4995,
19301
+ "step": 2756
19302
+ },
19303
+ {
19304
+ "epoch": 0.9510175922731977,
19305
+ "grad_norm": 1.262204885482788,
19306
+ "learning_rate": 6.337108113237344e-07,
19307
+ "loss": 9.4864,
19308
+ "step": 2757
19309
+ },
19310
+ {
19311
+ "epoch": 0.951362538806485,
19312
+ "grad_norm": 1.0766891241073608,
19313
+ "learning_rate": 6.248353060108292e-07,
19314
+ "loss": 9.5252,
19315
+ "step": 2758
19316
+ },
19317
+ {
19318
+ "epoch": 0.9517074853397723,
19319
+ "grad_norm": 1.153833031654358,
19320
+ "learning_rate": 6.160220024273966e-07,
19321
+ "loss": 9.5178,
19322
+ "step": 2759
19323
+ },
19324
+ {
19325
+ "epoch": 0.9520524318730597,
19326
+ "grad_norm": 1.1806145906448364,
19327
+ "learning_rate": 6.072709116762442e-07,
19328
+ "loss": 9.538,
19329
+ "step": 2760
19330
+ },
19331
+ {
19332
+ "epoch": 0.952397378406347,
19333
+ "grad_norm": 1.146377682685852,
19334
+ "learning_rate": 5.98582044781798e-07,
19335
+ "loss": 9.534,
19336
+ "step": 2761
19337
+ },
19338
+ {
19339
+ "epoch": 0.9527423249396344,
19340
+ "grad_norm": 1.1945569515228271,
19341
+ "learning_rate": 5.899554126901075e-07,
19342
+ "loss": 9.5208,
19343
+ "step": 2762
19344
+ },
19345
+ {
19346
+ "epoch": 0.9530872714729217,
19347
+ "grad_norm": 1.286086916923523,
19348
+ "learning_rate": 5.813910262687905e-07,
19349
+ "loss": 9.4976,
19350
+ "step": 2763
19351
+ },
19352
+ {
19353
+ "epoch": 0.953432218006209,
19354
+ "grad_norm": 1.2750720977783203,
19355
+ "learning_rate": 5.728888963070945e-07,
19356
+ "loss": 9.5186,
19357
+ "step": 2764
19358
+ },
19359
+ {
19360
+ "epoch": 0.9537771645394963,
19361
+ "grad_norm": 1.2140814065933228,
19362
+ "learning_rate": 5.644490335157959e-07,
19363
+ "loss": 9.4896,
19364
+ "step": 2765
19365
+ },
19366
+ {
19367
+ "epoch": 0.9541221110727838,
19368
+ "grad_norm": 1.1700353622436523,
19369
+ "learning_rate": 5.560714485272512e-07,
19370
+ "loss": 9.6212,
19371
+ "step": 2766
19372
+ },
19373
+ {
19374
+ "epoch": 0.9544670576060711,
19375
+ "grad_norm": 1.3271950483322144,
19376
+ "learning_rate": 5.477561518953566e-07,
19377
+ "loss": 9.4967,
19378
+ "step": 2767
19379
+ },
19380
+ {
19381
+ "epoch": 0.9548120041393584,
19382
+ "grad_norm": 1.1459087133407593,
19383
+ "learning_rate": 5.395031540955275e-07,
19384
+ "loss": 9.5539,
19385
+ "step": 2768
19386
+ },
19387
+ {
19388
+ "epoch": 0.9551569506726457,
19389
+ "grad_norm": 1.122616171836853,
19390
+ "learning_rate": 5.313124655247192e-07,
19391
+ "loss": 9.5894,
19392
+ "step": 2769
19393
+ },
19394
+ {
19395
+ "epoch": 0.955501897205933,
19396
+ "grad_norm": 1.3789931535720825,
19397
+ "learning_rate": 5.231840965013668e-07,
19398
+ "loss": 9.4521,
19399
+ "step": 2770
19400
+ },
19401
+ {
19402
+ "epoch": 0.9558468437392205,
19403
+ "grad_norm": 1.2291425466537476,
19404
+ "learning_rate": 5.151180572654235e-07,
19405
+ "loss": 9.5169,
19406
+ "step": 2771
19407
+ },
19408
+ {
19409
+ "epoch": 0.9561917902725078,
19410
+ "grad_norm": 1.1856980323791504,
19411
+ "learning_rate": 5.071143579782889e-07,
19412
+ "loss": 9.4942,
19413
+ "step": 2772
19414
+ },
19415
+ {
19416
+ "epoch": 0.9565367368057951,
19417
+ "grad_norm": 1.2218326330184937,
19418
+ "learning_rate": 4.99173008722853e-07,
19419
+ "loss": 9.55,
19420
+ "step": 2773
19421
+ },
19422
+ {
19423
+ "epoch": 0.9568816833390824,
19424
+ "grad_norm": 1.2583118677139282,
19425
+ "learning_rate": 4.91294019503441e-07,
19426
+ "loss": 9.538,
19427
+ "step": 2774
19428
+ },
19429
+ {
19430
+ "epoch": 0.9572266298723697,
19431
+ "grad_norm": 1.3726310729980469,
19432
+ "learning_rate": 4.834774002458409e-07,
19433
+ "loss": 9.4831,
19434
+ "step": 2775
19435
+ },
19436
+ {
19437
+ "epoch": 0.9575715764056572,
19438
+ "grad_norm": 1.3480925559997559,
19439
+ "learning_rate": 4.757231607972534e-07,
19440
+ "loss": 9.4674,
19441
+ "step": 2776
19442
+ },
19443
+ {
19444
+ "epoch": 0.9579165229389445,
19445
+ "grad_norm": 1.2078763246536255,
19446
+ "learning_rate": 4.680313109262813e-07,
19447
+ "loss": 9.4641,
19448
+ "step": 2777
19449
+ },
19450
+ {
19451
+ "epoch": 0.9582614694722318,
19452
+ "grad_norm": 1.2556449174880981,
19453
+ "learning_rate": 4.6040186032296206e-07,
19454
+ "loss": 9.4801,
19455
+ "step": 2778
19456
+ },
19457
+ {
19458
+ "epoch": 0.9586064160055191,
19459
+ "grad_norm": 1.2832894325256348,
19460
+ "learning_rate": 4.5283481859869635e-07,
19461
+ "loss": 9.5116,
19462
+ "step": 2779
19463
+ },
19464
+ {
19465
+ "epoch": 0.9589513625388065,
19466
+ "grad_norm": 1.3231313228607178,
19467
+ "learning_rate": 4.4533019528628093e-07,
19468
+ "loss": 9.4913,
19469
+ "step": 2780
19470
+ },
19471
+ {
19472
+ "epoch": 0.9592963090720938,
19473
+ "grad_norm": 1.2611724138259888,
19474
+ "learning_rate": 4.3788799983986997e-07,
19475
+ "loss": 9.521,
19476
+ "step": 2781
19477
+ },
19478
+ {
19479
+ "epoch": 0.9596412556053812,
19480
+ "grad_norm": 1.2805002927780151,
19481
+ "learning_rate": 4.305082416349804e-07,
19482
+ "loss": 9.502,
19483
+ "step": 2782
19484
+ },
19485
+ {
19486
+ "epoch": 0.9599862021386685,
19487
+ "grad_norm": 1.3639451265335083,
19488
+ "learning_rate": 4.231909299684533e-07,
19489
+ "loss": 9.4736,
19490
+ "step": 2783
19491
+ },
19492
+ {
19493
+ "epoch": 0.9603311486719558,
19494
+ "grad_norm": 1.4248766899108887,
19495
+ "learning_rate": 4.159360740584817e-07,
19496
+ "loss": 9.5154,
19497
+ "step": 2784
19498
+ },
19499
+ {
19500
+ "epoch": 0.9606760952052432,
19501
+ "grad_norm": 1.5050346851348877,
19502
+ "learning_rate": 4.0874368304457676e-07,
19503
+ "loss": 9.4806,
19504
+ "step": 2785
19505
+ },
19506
+ {
19507
+ "epoch": 0.9610210417385305,
19508
+ "grad_norm": 1.28929603099823,
19509
+ "learning_rate": 4.016137659875463e-07,
19510
+ "loss": 9.5005,
19511
+ "step": 2786
19512
+ },
19513
+ {
19514
+ "epoch": 0.9613659882718179,
19515
+ "grad_norm": 1.1755974292755127,
19516
+ "learning_rate": 3.945463318695053e-07,
19517
+ "loss": 9.4845,
19518
+ "step": 2787
19519
+ },
19520
+ {
19521
+ "epoch": 0.9617109348051052,
19522
+ "grad_norm": 1.4888191223144531,
19523
+ "learning_rate": 3.8754138959383733e-07,
19524
+ "loss": 9.4333,
19525
+ "step": 2788
19526
+ },
19527
+ {
19528
+ "epoch": 0.9620558813383926,
19529
+ "grad_norm": 1.3838120698928833,
19530
+ "learning_rate": 3.805989479852279e-07,
19531
+ "loss": 9.4622,
19532
+ "step": 2789
19533
+ },
19534
+ {
19535
+ "epoch": 0.9624008278716799,
19536
+ "grad_norm": 1.3965057134628296,
19537
+ "learning_rate": 3.7371901578959756e-07,
19538
+ "loss": 9.4135,
19539
+ "step": 2790
19540
+ },
19541
+ {
19542
+ "epoch": 0.9627457744049672,
19543
+ "grad_norm": 1.3354548215866089,
19544
+ "learning_rate": 3.6690160167413554e-07,
19545
+ "loss": 9.4748,
19546
+ "step": 2791
19547
+ },
19548
+ {
19549
+ "epoch": 0.9630907209382545,
19550
+ "grad_norm": 1.4212675094604492,
19551
+ "learning_rate": 3.6014671422727185e-07,
19552
+ "loss": 9.4848,
19553
+ "step": 2792
19554
+ },
19555
+ {
19556
+ "epoch": 0.9634356674715419,
19557
+ "grad_norm": 1.4828577041625977,
19558
+ "learning_rate": 3.5345436195866053e-07,
19559
+ "loss": 9.4245,
19560
+ "step": 2793
19561
+ },
19562
+ {
19563
+ "epoch": 0.9637806140048293,
19564
+ "grad_norm": 1.4509541988372803,
19565
+ "learning_rate": 3.468245532991743e-07,
19566
+ "loss": 9.4091,
19567
+ "step": 2794
19568
+ },
19569
+ {
19570
+ "epoch": 0.9641255605381166,
19571
+ "grad_norm": 1.5361698865890503,
19572
+ "learning_rate": 3.4025729660089877e-07,
19573
+ "loss": 9.4698,
19574
+ "step": 2795
19575
+ },
19576
+ {
19577
+ "epoch": 0.9644705070714039,
19578
+ "grad_norm": 1.5707025527954102,
19579
+ "learning_rate": 3.3375260013711604e-07,
19580
+ "loss": 9.4206,
19581
+ "step": 2796
19582
+ },
19583
+ {
19584
+ "epoch": 0.9648154536046912,
19585
+ "grad_norm": 1.47184157371521,
19586
+ "learning_rate": 3.273104721023046e-07,
19587
+ "loss": 9.3935,
19588
+ "step": 2797
19589
+ },
19590
+ {
19591
+ "epoch": 0.9651604001379787,
19592
+ "grad_norm": 1.5686029195785522,
19593
+ "learning_rate": 3.209309206121058e-07,
19594
+ "loss": 9.3636,
19595
+ "step": 2798
19596
+ },
19597
+ {
19598
+ "epoch": 0.965505346671266,
19599
+ "grad_norm": 1.5438534021377563,
19600
+ "learning_rate": 3.1461395370334104e-07,
19601
+ "loss": 9.3861,
19602
+ "step": 2799
19603
+ },
19604
+ {
19605
+ "epoch": 0.9658502932045533,
19606
+ "grad_norm": 1.6539479494094849,
19607
+ "learning_rate": 3.0835957933397773e-07,
19608
+ "loss": 9.398,
19609
+ "step": 2800
19610
+ },
19611
+ {
19612
+ "epoch": 0.9661952397378406,
19613
+ "grad_norm": 0.774260401725769,
19614
+ "learning_rate": 3.0216780538314116e-07,
19615
+ "loss": 9.6004,
19616
+ "step": 2801
19617
+ },
19618
+ {
19619
+ "epoch": 0.9665401862711279,
19620
+ "grad_norm": 0.9886148571968079,
19621
+ "learning_rate": 2.960386396510972e-07,
19622
+ "loss": 9.5417,
19623
+ "step": 2802
19624
+ },
19625
+ {
19626
+ "epoch": 0.9668851328044153,
19627
+ "grad_norm": 1.0178803205490112,
19628
+ "learning_rate": 2.8997208985921953e-07,
19629
+ "loss": 9.5666,
19630
+ "step": 2803
19631
+ },
19632
+ {
19633
+ "epoch": 0.9672300793377027,
19634
+ "grad_norm": 1.0676816701889038,
19635
+ "learning_rate": 2.8396816365001687e-07,
19636
+ "loss": 9.499,
19637
+ "step": 2804
19638
+ },
19639
+ {
19640
+ "epoch": 0.96757502587099,
19641
+ "grad_norm": 1.0291966199874878,
19642
+ "learning_rate": 2.7802686858710016e-07,
19643
+ "loss": 9.5364,
19644
+ "step": 2805
19645
+ },
19646
+ {
19647
+ "epoch": 0.9679199724042773,
19648
+ "grad_norm": 1.100340723991394,
19649
+ "learning_rate": 2.7214821215518214e-07,
19650
+ "loss": 9.5105,
19651
+ "step": 2806
19652
+ },
19653
+ {
19654
+ "epoch": 0.9682649189375647,
19655
+ "grad_norm": 1.1685614585876465,
19656
+ "learning_rate": 2.6633220176006667e-07,
19657
+ "loss": 9.5038,
19658
+ "step": 2807
19659
+ },
19660
+ {
19661
+ "epoch": 0.968609865470852,
19662
+ "grad_norm": 1.1775288581848145,
19663
+ "learning_rate": 2.6057884472862617e-07,
19664
+ "loss": 9.5383,
19665
+ "step": 2808
19666
+ },
19667
+ {
19668
+ "epoch": 0.9689548120041394,
19669
+ "grad_norm": 1.2399282455444336,
19670
+ "learning_rate": 2.548881483088128e-07,
19671
+ "loss": 9.4643,
19672
+ "step": 2809
19673
+ },
19674
+ {
19675
+ "epoch": 0.9692997585374267,
19676
+ "grad_norm": 1.2756638526916504,
19677
+ "learning_rate": 2.49260119669642e-07,
19678
+ "loss": 9.4916,
19679
+ "step": 2810
19680
+ },
19681
+ {
19682
+ "epoch": 0.969644705070714,
19683
+ "grad_norm": 1.3724730014801025,
19684
+ "learning_rate": 2.4369476590118123e-07,
19685
+ "loss": 9.4872,
19686
+ "step": 2811
19687
+ },
19688
+ {
19689
+ "epoch": 0.9699896516040014,
19690
+ "grad_norm": 1.1623852252960205,
19691
+ "learning_rate": 2.381920940145277e-07,
19692
+ "loss": 9.5659,
19693
+ "step": 2812
19694
+ },
19695
+ {
19696
+ "epoch": 0.9703345981372887,
19697
+ "grad_norm": 1.3232759237289429,
19698
+ "learning_rate": 2.3275211094183623e-07,
19699
+ "loss": 9.4726,
19700
+ "step": 2813
19701
+ },
19702
+ {
19703
+ "epoch": 0.970679544670576,
19704
+ "grad_norm": 1.105460286140442,
19705
+ "learning_rate": 2.2737482353626937e-07,
19706
+ "loss": 9.5364,
19707
+ "step": 2814
19708
+ },
19709
+ {
19710
+ "epoch": 0.9710244912038634,
19711
+ "grad_norm": 1.130436897277832,
19712
+ "learning_rate": 2.2206023857201385e-07,
19713
+ "loss": 9.5565,
19714
+ "step": 2815
19715
+ },
19716
+ {
19717
+ "epoch": 0.9713694377371508,
19718
+ "grad_norm": 1.2147849798202515,
19719
+ "learning_rate": 2.1680836274426962e-07,
19720
+ "loss": 9.5194,
19721
+ "step": 2816
19722
+ },
19723
+ {
19724
+ "epoch": 0.9717143842704381,
19725
+ "grad_norm": 1.2784674167633057,
19726
+ "learning_rate": 2.1161920266922763e-07,
19727
+ "loss": 9.4549,
19728
+ "step": 2817
19729
+ },
19730
+ {
19731
+ "epoch": 0.9720593308037254,
19732
+ "grad_norm": 1.3010627031326294,
19733
+ "learning_rate": 2.0649276488408086e-07,
19734
+ "loss": 9.4602,
19735
+ "step": 2818
19736
+ },
19737
+ {
19738
+ "epoch": 0.9724042773370127,
19739
+ "grad_norm": 1.2807101011276245,
19740
+ "learning_rate": 2.014290558469911e-07,
19741
+ "loss": 9.5456,
19742
+ "step": 2819
19743
+ },
19744
+ {
19745
+ "epoch": 0.9727492238703,
19746
+ "grad_norm": 1.301635503768921,
19747
+ "learning_rate": 1.964280819371167e-07,
19748
+ "loss": 9.5044,
19749
+ "step": 2820
19750
+ },
19751
+ {
19752
+ "epoch": 0.9730941704035875,
19753
+ "grad_norm": 1.1405683755874634,
19754
+ "learning_rate": 1.914898494545736e-07,
19755
+ "loss": 9.5604,
19756
+ "step": 2821
19757
+ },
19758
+ {
19759
+ "epoch": 0.9734391169368748,
19760
+ "grad_norm": 1.261732816696167,
19761
+ "learning_rate": 1.8661436462042437e-07,
19762
+ "loss": 9.4851,
19763
+ "step": 2822
19764
+ },
19765
+ {
19766
+ "epoch": 0.9737840634701621,
19767
+ "grad_norm": 1.2781423330307007,
19768
+ "learning_rate": 1.8180163357671143e-07,
19769
+ "loss": 9.5201,
19770
+ "step": 2823
19771
+ },
19772
+ {
19773
+ "epoch": 0.9741290100034494,
19774
+ "grad_norm": 1.2555526494979858,
19775
+ "learning_rate": 1.7705166238639047e-07,
19776
+ "loss": 9.4532,
19777
+ "step": 2824
19778
+ },
19779
+ {
19780
+ "epoch": 0.9744739565367369,
19781
+ "grad_norm": 1.2804704904556274,
19782
+ "learning_rate": 1.7236445703338044e-07,
19783
+ "loss": 9.5293,
19784
+ "step": 2825
19785
+ },
19786
+ {
19787
+ "epoch": 0.9748189030700242,
19788
+ "grad_norm": 1.3036941289901733,
19789
+ "learning_rate": 1.677400234225135e-07,
19790
+ "loss": 9.536,
19791
+ "step": 2826
19792
+ },
19793
+ {
19794
+ "epoch": 0.9751638496033115,
19795
+ "grad_norm": 1.2297347784042358,
19796
+ "learning_rate": 1.6317836737955172e-07,
19797
+ "loss": 9.496,
19798
+ "step": 2827
19799
+ },
19800
+ {
19801
+ "epoch": 0.9755087961365988,
19802
+ "grad_norm": 1.2707699537277222,
19803
+ "learning_rate": 1.586794946511594e-07,
19804
+ "loss": 9.4632,
19805
+ "step": 2828
19806
+ },
19807
+ {
19808
+ "epoch": 0.9758537426698861,
19809
+ "grad_norm": 1.3694789409637451,
19810
+ "learning_rate": 1.542434109049251e-07,
19811
+ "loss": 9.4874,
19812
+ "step": 2829
19813
+ },
19814
+ {
19815
+ "epoch": 0.9761986892031735,
19816
+ "grad_norm": 1.3027503490447998,
19817
+ "learning_rate": 1.4987012172932302e-07,
19818
+ "loss": 9.4417,
19819
+ "step": 2830
19820
+ },
19821
+ {
19822
+ "epoch": 0.9765436357364609,
19823
+ "grad_norm": 1.300711750984192,
19824
+ "learning_rate": 1.4555963263372385e-07,
19825
+ "loss": 9.5223,
19826
+ "step": 2831
19827
+ },
19828
+ {
19829
+ "epoch": 0.9768885822697482,
19830
+ "grad_norm": 1.29239821434021,
19831
+ "learning_rate": 1.413119490483894e-07,
19832
+ "loss": 9.4713,
19833
+ "step": 2832
19834
+ },
19835
+ {
19836
+ "epoch": 0.9772335288030355,
19837
+ "grad_norm": 1.3773186206817627,
19838
+ "learning_rate": 1.3712707632445032e-07,
19839
+ "loss": 9.4819,
19840
+ "step": 2833
19841
+ },
19842
+ {
19843
+ "epoch": 0.9775784753363229,
19844
+ "grad_norm": 1.4483076333999634,
19845
+ "learning_rate": 1.3300501973392277e-07,
19846
+ "loss": 9.4047,
19847
+ "step": 2834
19848
+ },
19849
+ {
19850
+ "epoch": 0.9779234218696102,
19851
+ "grad_norm": 1.4170414209365845,
19852
+ "learning_rate": 1.2894578446968065e-07,
19853
+ "loss": 9.5096,
19854
+ "step": 2835
19855
+ },
19856
+ {
19857
+ "epoch": 0.9782683684028975,
19858
+ "grad_norm": 1.5087085962295532,
19859
+ "learning_rate": 1.2494937564545562e-07,
19860
+ "loss": 9.4874,
19861
+ "step": 2836
19862
+ },
19863
+ {
19864
+ "epoch": 0.9786133149361849,
19865
+ "grad_norm": 1.5026702880859375,
19866
+ "learning_rate": 1.2101579829583154e-07,
19867
+ "loss": 9.4319,
19868
+ "step": 2837
19869
+ },
19870
+ {
19871
+ "epoch": 0.9789582614694722,
19872
+ "grad_norm": 1.3315801620483398,
19873
+ "learning_rate": 1.1714505737625004e-07,
19874
+ "loss": 9.4616,
19875
+ "step": 2838
19876
+ },
19877
+ {
19878
+ "epoch": 0.9793032080027596,
19879
+ "grad_norm": 1.5373655557632446,
19880
+ "learning_rate": 1.133371577629716e-07,
19881
+ "loss": 9.4924,
19882
+ "step": 2839
19883
+ },
19884
+ {
19885
+ "epoch": 0.9796481545360469,
19886
+ "grad_norm": 1.4027314186096191,
19887
+ "learning_rate": 1.095921042531145e-07,
19888
+ "loss": 9.3959,
19889
+ "step": 2840
19890
+ },
19891
+ {
19892
+ "epoch": 0.9799931010693342,
19893
+ "grad_norm": 1.4827989339828491,
19894
+ "learning_rate": 1.0590990156461034e-07,
19895
+ "loss": 9.4222,
19896
+ "step": 2841
19897
+ },
19898
+ {
19899
+ "epoch": 0.9803380476026216,
19900
+ "grad_norm": 1.365110158920288,
19901
+ "learning_rate": 1.022905543362096e-07,
19902
+ "loss": 9.4457,
19903
+ "step": 2842
19904
+ },
19905
+ {
19906
+ "epoch": 0.980682994135909,
19907
+ "grad_norm": 1.4800422191619873,
19908
+ "learning_rate": 9.873406712749279e-08,
19909
+ "loss": 9.4845,
19910
+ "step": 2843
19911
+ },
19912
+ {
19913
+ "epoch": 0.9810279406691963,
19914
+ "grad_norm": 1.482553243637085,
19915
+ "learning_rate": 9.524044441883706e-08,
19916
+ "loss": 9.4586,
19917
+ "step": 2844
19918
+ },
19919
+ {
19920
+ "epoch": 0.9813728872024836,
19921
+ "grad_norm": 1.5155751705169678,
19922
+ "learning_rate": 9.180969061143852e-08,
19923
+ "loss": 9.4402,
19924
+ "step": 2845
19925
+ },
19926
+ {
19927
+ "epoch": 0.9817178337357709,
19928
+ "grad_norm": 1.5355010032653809,
19929
+ "learning_rate": 8.844181002727325e-08,
19930
+ "loss": 9.4685,
19931
+ "step": 2846
19932
+ },
19933
+ {
19934
+ "epoch": 0.9820627802690582,
19935
+ "grad_norm": 1.5655204057693481,
19936
+ "learning_rate": 8.513680690913073e-08,
19937
+ "loss": 9.4197,
19938
+ "step": 2847
19939
+ },
19940
+ {
19941
+ "epoch": 0.9824077268023457,
19942
+ "grad_norm": 1.5435409545898438,
19943
+ "learning_rate": 8.189468542057488e-08,
19944
+ "loss": 9.4517,
19945
+ "step": 2848
19946
+ },
19947
+ {
19948
+ "epoch": 0.982752673335633,
19949
+ "grad_norm": 1.6485852003097534,
19950
+ "learning_rate": 7.871544964596633e-08,
19951
+ "loss": 9.4181,
19952
+ "step": 2849
19953
+ },
19954
+ {
19955
+ "epoch": 0.9830976198689203,
19956
+ "grad_norm": 1.6750664710998535,
19957
+ "learning_rate": 7.559910359042355e-08,
19958
+ "loss": 9.361,
19959
+ "step": 2850
19960
+ },
19961
+ {
19962
+ "epoch": 0.9834425664022076,
19963
+ "grad_norm": 0.9129282832145691,
19964
+ "learning_rate": 7.254565117985613e-08,
19965
+ "loss": 9.5804,
19966
+ "step": 2851
19967
+ },
19968
+ {
19969
+ "epoch": 0.983787512935495,
19970
+ "grad_norm": 1.0424609184265137,
19971
+ "learning_rate": 6.955509626093703e-08,
19972
+ "loss": 9.5371,
19973
+ "step": 2852
19974
+ },
19975
+ {
19976
+ "epoch": 0.9841324594687824,
19977
+ "grad_norm": 0.9472970366477966,
19978
+ "learning_rate": 6.662744260109155e-08,
19979
+ "loss": 9.6361,
19980
+ "step": 2853
19981
+ },
19982
+ {
19983
+ "epoch": 0.9844774060020697,
19984
+ "grad_norm": 1.1693047285079956,
19985
+ "learning_rate": 6.376269388852496e-08,
19986
+ "loss": 9.4819,
19987
+ "step": 2854
19988
+ },
19989
+ {
19990
+ "epoch": 0.984822352535357,
19991
+ "grad_norm": 0.99554044008255,
19992
+ "learning_rate": 6.096085373217264e-08,
19993
+ "loss": 9.5079,
19994
+ "step": 2855
19995
+ },
19996
+ {
19997
+ "epoch": 0.9851672990686443,
19998
+ "grad_norm": 0.9872909188270569,
19999
+ "learning_rate": 5.822192566173334e-08,
20000
+ "loss": 9.5455,
20001
+ "step": 2856
20002
+ },
20003
+ {
20004
+ "epoch": 0.9855122456019317,
20005
+ "grad_norm": 1.08110773563385,
20006
+ "learning_rate": 5.554591312765811e-08,
20007
+ "loss": 9.5288,
20008
+ "step": 2857
20009
+ },
20010
+ {
20011
+ "epoch": 0.985857192135219,
20012
+ "grad_norm": 1.1271926164627075,
20013
+ "learning_rate": 5.2932819501111395e-08,
20014
+ "loss": 9.5112,
20015
+ "step": 2858
20016
+ },
20017
+ {
20018
+ "epoch": 0.9862021386685064,
20019
+ "grad_norm": 1.2304562330245972,
20020
+ "learning_rate": 5.038264807402105e-08,
20021
+ "loss": 9.5027,
20022
+ "step": 2859
20023
+ },
20024
+ {
20025
+ "epoch": 0.9865470852017937,
20026
+ "grad_norm": 1.0815626382827759,
20027
+ "learning_rate": 4.789540205902832e-08,
20028
+ "loss": 9.5418,
20029
+ "step": 2860
20030
+ },
20031
+ {
20032
+ "epoch": 0.9868920317350811,
20033
+ "grad_norm": 1.279129147529602,
20034
+ "learning_rate": 4.547108458951566e-08,
20035
+ "loss": 9.5028,
20036
+ "step": 2861
20037
+ },
20038
+ {
20039
+ "epoch": 0.9872369782683684,
20040
+ "grad_norm": 1.1527676582336426,
20041
+ "learning_rate": 4.310969871958448e-08,
20042
+ "loss": 9.541,
20043
+ "step": 2862
20044
+ },
20045
+ {
20046
+ "epoch": 0.9875819248016557,
20047
+ "grad_norm": 1.260764241218567,
20048
+ "learning_rate": 4.0811247424049625e-08,
20049
+ "loss": 9.4835,
20050
+ "step": 2863
20051
+ },
20052
+ {
20053
+ "epoch": 0.9879268713349431,
20054
+ "grad_norm": 1.153011679649353,
20055
+ "learning_rate": 3.857573359845601e-08,
20056
+ "loss": 9.5011,
20057
+ "step": 2864
20058
+ },
20059
+ {
20060
+ "epoch": 0.9882718178682304,
20061
+ "grad_norm": 1.124871015548706,
20062
+ "learning_rate": 3.6403160059050865e-08,
20063
+ "loss": 9.5657,
20064
+ "step": 2865
20065
+ },
20066
+ {
20067
+ "epoch": 0.9886167644015178,
20068
+ "grad_norm": 1.1694023609161377,
20069
+ "learning_rate": 3.4293529542800406e-08,
20070
+ "loss": 9.4759,
20071
+ "step": 2866
20072
+ },
20073
+ {
20074
+ "epoch": 0.9889617109348051,
20075
+ "grad_norm": 1.1712218523025513,
20076
+ "learning_rate": 3.224684470735651e-08,
20077
+ "loss": 9.5351,
20078
+ "step": 2867
20079
+ },
20080
+ {
20081
+ "epoch": 0.9893066574680924,
20082
+ "grad_norm": 1.2313039302825928,
20083
+ "learning_rate": 3.0263108131095566e-08,
20084
+ "loss": 9.5316,
20085
+ "step": 2868
20086
  }
20087
  ],
20088
  "logging_steps": 1,
 
20102
  "attributes": {}
20103
  }
20104
  },
20105
+ "total_flos": 595349320237056.0,
20106
  "train_batch_size": 4,
20107
  "trial_name": null,
20108
  "trial_params": null