MyBGE_Model_Contrast / trainer_state.json
FlySulfur's picture
Upload 15 files
f77a04f verified
raw
history blame contribute delete
No virus
188 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.387568555758683,
"eval_steps": 500,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003656307129798903,
"grad_norm": 0.0,
"learning_rate": 2.9978062157221207e-05,
"loss": 0.0015,
"step": 10
},
{
"epoch": 0.007312614259597806,
"grad_norm": 0.0,
"learning_rate": 2.9956124314442413e-05,
"loss": 0.0,
"step": 20
},
{
"epoch": 0.010968921389396709,
"grad_norm": 0.0,
"learning_rate": 2.9934186471663623e-05,
"loss": 0.0,
"step": 30
},
{
"epoch": 0.014625228519195612,
"grad_norm": 0.0,
"learning_rate": 2.991224862888483e-05,
"loss": 0.0001,
"step": 40
},
{
"epoch": 0.018281535648994516,
"grad_norm": 0.0,
"learning_rate": 2.9890310786106035e-05,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.021937842778793418,
"grad_norm": 0.0,
"learning_rate": 2.9868372943327238e-05,
"loss": 0.0,
"step": 60
},
{
"epoch": 0.025594149908592323,
"grad_norm": 0.0,
"learning_rate": 2.9846435100548447e-05,
"loss": 0.0004,
"step": 70
},
{
"epoch": 0.029250457038391225,
"grad_norm": 0.0,
"learning_rate": 2.9824497257769653e-05,
"loss": 0.0,
"step": 80
},
{
"epoch": 0.03290676416819013,
"grad_norm": 0.0,
"learning_rate": 2.980255941499086e-05,
"loss": 0.0007,
"step": 90
},
{
"epoch": 0.03656307129798903,
"grad_norm": 0.0,
"learning_rate": 2.9780621572212066e-05,
"loss": 0.0042,
"step": 100
},
{
"epoch": 0.04021937842778794,
"grad_norm": 0.0,
"learning_rate": 2.9758683729433275e-05,
"loss": 0.0,
"step": 110
},
{
"epoch": 0.043875685557586835,
"grad_norm": 1.104537010192871,
"learning_rate": 2.973674588665448e-05,
"loss": 0.0002,
"step": 120
},
{
"epoch": 0.04753199268738574,
"grad_norm": 0.0,
"learning_rate": 2.9714808043875687e-05,
"loss": 0.0,
"step": 130
},
{
"epoch": 0.051188299817184646,
"grad_norm": 0.0,
"learning_rate": 2.9692870201096894e-05,
"loss": 0.0,
"step": 140
},
{
"epoch": 0.054844606946983544,
"grad_norm": 0.0,
"learning_rate": 2.96709323583181e-05,
"loss": 0.0,
"step": 150
},
{
"epoch": 0.05850091407678245,
"grad_norm": 0.0,
"learning_rate": 2.9648994515539306e-05,
"loss": 0.0,
"step": 160
},
{
"epoch": 0.062157221206581355,
"grad_norm": 0.0,
"learning_rate": 2.9627056672760512e-05,
"loss": 0.0,
"step": 170
},
{
"epoch": 0.06581352833638025,
"grad_norm": 0.0,
"learning_rate": 2.9605118829981718e-05,
"loss": 0.0,
"step": 180
},
{
"epoch": 0.06946983546617916,
"grad_norm": 0.0,
"learning_rate": 2.9583180987202924e-05,
"loss": 0.0,
"step": 190
},
{
"epoch": 0.07312614259597806,
"grad_norm": 0.0,
"learning_rate": 2.9561243144424134e-05,
"loss": 0.0,
"step": 200
},
{
"epoch": 0.07678244972577697,
"grad_norm": 0.0,
"learning_rate": 2.953930530164534e-05,
"loss": 0.0,
"step": 210
},
{
"epoch": 0.08043875685557587,
"grad_norm": 0.0,
"learning_rate": 2.9517367458866546e-05,
"loss": 0.0,
"step": 220
},
{
"epoch": 0.08409506398537477,
"grad_norm": 0.0,
"learning_rate": 2.9495429616087752e-05,
"loss": 0.0,
"step": 230
},
{
"epoch": 0.08775137111517367,
"grad_norm": 0.0,
"learning_rate": 2.947349177330896e-05,
"loss": 0.0,
"step": 240
},
{
"epoch": 0.09140767824497258,
"grad_norm": 0.0,
"learning_rate": 2.9451553930530168e-05,
"loss": 0.0099,
"step": 250
},
{
"epoch": 0.09506398537477148,
"grad_norm": 0.0,
"learning_rate": 2.942961608775137e-05,
"loss": 0.0,
"step": 260
},
{
"epoch": 0.09872029250457039,
"grad_norm": 0.0,
"learning_rate": 2.9407678244972577e-05,
"loss": 0.0,
"step": 270
},
{
"epoch": 0.10237659963436929,
"grad_norm": 0.0,
"learning_rate": 2.9385740402193783e-05,
"loss": 0.0,
"step": 280
},
{
"epoch": 0.10603290676416818,
"grad_norm": 0.0,
"learning_rate": 2.9363802559414992e-05,
"loss": 0.0,
"step": 290
},
{
"epoch": 0.10968921389396709,
"grad_norm": 0.0,
"learning_rate": 2.93418647166362e-05,
"loss": 0.0,
"step": 300
},
{
"epoch": 0.113345521023766,
"grad_norm": 0.0,
"learning_rate": 2.9319926873857405e-05,
"loss": 0.0,
"step": 310
},
{
"epoch": 0.1170018281535649,
"grad_norm": 0.0,
"learning_rate": 2.929798903107861e-05,
"loss": 0.0001,
"step": 320
},
{
"epoch": 0.1206581352833638,
"grad_norm": 0.0,
"learning_rate": 2.927605118829982e-05,
"loss": 0.0,
"step": 330
},
{
"epoch": 0.12431444241316271,
"grad_norm": 0.0,
"learning_rate": 2.9254113345521026e-05,
"loss": 0.0,
"step": 340
},
{
"epoch": 0.12797074954296161,
"grad_norm": 0.0,
"learning_rate": 2.9232175502742233e-05,
"loss": 0.0,
"step": 350
},
{
"epoch": 0.1316270566727605,
"grad_norm": 0.0,
"learning_rate": 2.9210237659963435e-05,
"loss": 0.0,
"step": 360
},
{
"epoch": 0.13528336380255943,
"grad_norm": 0.0,
"learning_rate": 2.918829981718464e-05,
"loss": 0.0,
"step": 370
},
{
"epoch": 0.13893967093235832,
"grad_norm": 0.0,
"learning_rate": 2.916636197440585e-05,
"loss": 0.0,
"step": 380
},
{
"epoch": 0.1425959780621572,
"grad_norm": 0.0,
"learning_rate": 2.9144424131627057e-05,
"loss": 0.0,
"step": 390
},
{
"epoch": 0.14625228519195613,
"grad_norm": 0.0,
"learning_rate": 2.9122486288848263e-05,
"loss": 0.0,
"step": 400
},
{
"epoch": 0.14990859232175502,
"grad_norm": 0.0,
"learning_rate": 2.910054844606947e-05,
"loss": 0.0001,
"step": 410
},
{
"epoch": 0.15356489945155394,
"grad_norm": 0.0,
"learning_rate": 2.907861060329068e-05,
"loss": 0.0,
"step": 420
},
{
"epoch": 0.15722120658135283,
"grad_norm": 0.0,
"learning_rate": 2.9056672760511885e-05,
"loss": 0.0,
"step": 430
},
{
"epoch": 0.16087751371115175,
"grad_norm": 0.0,
"learning_rate": 2.903473491773309e-05,
"loss": 0.0,
"step": 440
},
{
"epoch": 0.16453382084095064,
"grad_norm": 0.0,
"learning_rate": 2.9012797074954297e-05,
"loss": 0.0,
"step": 450
},
{
"epoch": 0.16819012797074953,
"grad_norm": 0.0,
"learning_rate": 2.8990859232175504e-05,
"loss": 0.0,
"step": 460
},
{
"epoch": 0.17184643510054845,
"grad_norm": 0.0,
"learning_rate": 2.896892138939671e-05,
"loss": 0.0,
"step": 470
},
{
"epoch": 0.17550274223034734,
"grad_norm": 0.0,
"learning_rate": 2.8946983546617916e-05,
"loss": 0.0,
"step": 480
},
{
"epoch": 0.17915904936014626,
"grad_norm": 0.0,
"learning_rate": 2.8925045703839122e-05,
"loss": 0.0,
"step": 490
},
{
"epoch": 0.18281535648994515,
"grad_norm": 0.0,
"learning_rate": 2.8903107861060328e-05,
"loss": 0.0,
"step": 500
},
{
"epoch": 0.18647166361974407,
"grad_norm": 0.0,
"learning_rate": 2.8881170018281538e-05,
"loss": 0.0,
"step": 510
},
{
"epoch": 0.19012797074954296,
"grad_norm": 0.0,
"learning_rate": 2.8859232175502744e-05,
"loss": 0.0,
"step": 520
},
{
"epoch": 0.19378427787934185,
"grad_norm": 0.0,
"learning_rate": 2.883729433272395e-05,
"loss": 0.0,
"step": 530
},
{
"epoch": 0.19744058500914077,
"grad_norm": 0.0,
"learning_rate": 2.8815356489945156e-05,
"loss": 0.0,
"step": 540
},
{
"epoch": 0.20109689213893966,
"grad_norm": 0.0,
"learning_rate": 2.8793418647166366e-05,
"loss": 0.0,
"step": 550
},
{
"epoch": 0.20475319926873858,
"grad_norm": 0.0,
"learning_rate": 2.877148080438757e-05,
"loss": 0.0,
"step": 560
},
{
"epoch": 0.20840950639853748,
"grad_norm": 0.0,
"learning_rate": 2.8749542961608775e-05,
"loss": 0.0,
"step": 570
},
{
"epoch": 0.21206581352833637,
"grad_norm": 0.0,
"learning_rate": 2.872760511882998e-05,
"loss": 0.0,
"step": 580
},
{
"epoch": 0.21572212065813529,
"grad_norm": 0.0,
"learning_rate": 2.870566727605119e-05,
"loss": 0.0,
"step": 590
},
{
"epoch": 0.21937842778793418,
"grad_norm": 0.0,
"learning_rate": 2.8683729433272396e-05,
"loss": 0.0,
"step": 600
},
{
"epoch": 0.2230347349177331,
"grad_norm": 0.0,
"learning_rate": 2.8661791590493602e-05,
"loss": 0.0,
"step": 610
},
{
"epoch": 0.226691042047532,
"grad_norm": 0.0,
"learning_rate": 2.863985374771481e-05,
"loss": 0.0,
"step": 620
},
{
"epoch": 0.2303473491773309,
"grad_norm": 0.0,
"learning_rate": 2.8617915904936015e-05,
"loss": 0.0,
"step": 630
},
{
"epoch": 0.2340036563071298,
"grad_norm": 0.0,
"learning_rate": 2.8595978062157224e-05,
"loss": 0.0,
"step": 640
},
{
"epoch": 0.2376599634369287,
"grad_norm": 0.0,
"learning_rate": 2.857404021937843e-05,
"loss": 0.0002,
"step": 650
},
{
"epoch": 0.2413162705667276,
"grad_norm": 0.0,
"learning_rate": 2.8552102376599633e-05,
"loss": 0.0,
"step": 660
},
{
"epoch": 0.2449725776965265,
"grad_norm": 0.0,
"learning_rate": 2.853016453382084e-05,
"loss": 0.0,
"step": 670
},
{
"epoch": 0.24862888482632542,
"grad_norm": 0.0,
"learning_rate": 2.850822669104205e-05,
"loss": 0.0,
"step": 680
},
{
"epoch": 0.2522851919561243,
"grad_norm": 0.0,
"learning_rate": 2.8486288848263255e-05,
"loss": 0.0,
"step": 690
},
{
"epoch": 0.25594149908592323,
"grad_norm": 0.0,
"learning_rate": 2.846435100548446e-05,
"loss": 0.0,
"step": 700
},
{
"epoch": 0.2595978062157221,
"grad_norm": 0.0,
"learning_rate": 2.8442413162705667e-05,
"loss": 0.0005,
"step": 710
},
{
"epoch": 0.263254113345521,
"grad_norm": 0.0,
"learning_rate": 2.8420475319926873e-05,
"loss": 0.0,
"step": 720
},
{
"epoch": 0.26691042047531993,
"grad_norm": 0.0,
"learning_rate": 2.8398537477148083e-05,
"loss": 0.0,
"step": 730
},
{
"epoch": 0.27056672760511885,
"grad_norm": 0.0,
"learning_rate": 2.837659963436929e-05,
"loss": 0.0073,
"step": 740
},
{
"epoch": 0.2742230347349177,
"grad_norm": 0.0,
"learning_rate": 2.8354661791590495e-05,
"loss": 0.0,
"step": 750
},
{
"epoch": 0.27787934186471663,
"grad_norm": 0.0,
"learning_rate": 2.8332723948811698e-05,
"loss": 0.0,
"step": 760
},
{
"epoch": 0.28153564899451555,
"grad_norm": 0.0,
"learning_rate": 2.8310786106032907e-05,
"loss": 0.0,
"step": 770
},
{
"epoch": 0.2851919561243144,
"grad_norm": 0.01952667348086834,
"learning_rate": 2.8288848263254114e-05,
"loss": 0.0094,
"step": 780
},
{
"epoch": 0.28884826325411334,
"grad_norm": 1.364105224609375,
"learning_rate": 2.826691042047532e-05,
"loss": 0.0086,
"step": 790
},
{
"epoch": 0.29250457038391225,
"grad_norm": 0.0,
"learning_rate": 2.8244972577696526e-05,
"loss": 0.0,
"step": 800
},
{
"epoch": 0.2961608775137112,
"grad_norm": 0.0,
"learning_rate": 2.8223034734917735e-05,
"loss": 0.0,
"step": 810
},
{
"epoch": 0.29981718464351004,
"grad_norm": 0.0,
"learning_rate": 2.820109689213894e-05,
"loss": 0.0,
"step": 820
},
{
"epoch": 0.30347349177330896,
"grad_norm": 0.0,
"learning_rate": 2.8179159049360148e-05,
"loss": 0.0,
"step": 830
},
{
"epoch": 0.3071297989031079,
"grad_norm": 0.0,
"learning_rate": 2.8157221206581354e-05,
"loss": 0.0002,
"step": 840
},
{
"epoch": 0.31078610603290674,
"grad_norm": 0.0,
"learning_rate": 2.813528336380256e-05,
"loss": 0.0,
"step": 850
},
{
"epoch": 0.31444241316270566,
"grad_norm": 0.0,
"learning_rate": 2.8113345521023766e-05,
"loss": 0.0,
"step": 860
},
{
"epoch": 0.3180987202925046,
"grad_norm": 0.0,
"learning_rate": 2.8091407678244972e-05,
"loss": 0.0,
"step": 870
},
{
"epoch": 0.3217550274223035,
"grad_norm": 0.0,
"learning_rate": 2.806946983546618e-05,
"loss": 0.0,
"step": 880
},
{
"epoch": 0.32541133455210236,
"grad_norm": 0.0,
"learning_rate": 2.8047531992687385e-05,
"loss": 0.0,
"step": 890
},
{
"epoch": 0.3290676416819013,
"grad_norm": 0.0,
"learning_rate": 2.8025594149908594e-05,
"loss": 0.0,
"step": 900
},
{
"epoch": 0.3327239488117002,
"grad_norm": 0.0,
"learning_rate": 2.80036563071298e-05,
"loss": 0.0,
"step": 910
},
{
"epoch": 0.33638025594149906,
"grad_norm": 0.0,
"learning_rate": 2.7981718464351006e-05,
"loss": 0.0,
"step": 920
},
{
"epoch": 0.340036563071298,
"grad_norm": 0.0,
"learning_rate": 2.7959780621572213e-05,
"loss": 0.0,
"step": 930
},
{
"epoch": 0.3436928702010969,
"grad_norm": 0.0,
"learning_rate": 2.7937842778793422e-05,
"loss": 0.0,
"step": 940
},
{
"epoch": 0.3473491773308958,
"grad_norm": 0.0,
"learning_rate": 2.7915904936014628e-05,
"loss": 0.0,
"step": 950
},
{
"epoch": 0.3510054844606947,
"grad_norm": 0.0,
"learning_rate": 2.789396709323583e-05,
"loss": 0.0,
"step": 960
},
{
"epoch": 0.3546617915904936,
"grad_norm": 0.0,
"learning_rate": 2.7872029250457037e-05,
"loss": 0.0,
"step": 970
},
{
"epoch": 0.3583180987202925,
"grad_norm": 0.0,
"learning_rate": 2.7850091407678243e-05,
"loss": 0.0,
"step": 980
},
{
"epoch": 0.3619744058500914,
"grad_norm": 0.0,
"learning_rate": 2.7828153564899453e-05,
"loss": 0.0,
"step": 990
},
{
"epoch": 0.3656307129798903,
"grad_norm": 0.0,
"learning_rate": 2.780621572212066e-05,
"loss": 0.0,
"step": 1000
},
{
"epoch": 0.3692870201096892,
"grad_norm": 0.0,
"learning_rate": 2.7784277879341865e-05,
"loss": 0.0,
"step": 1010
},
{
"epoch": 0.37294332723948814,
"grad_norm": 0.0,
"learning_rate": 2.776234003656307e-05,
"loss": 0.0,
"step": 1020
},
{
"epoch": 0.376599634369287,
"grad_norm": 0.0,
"learning_rate": 2.774040219378428e-05,
"loss": 0.0,
"step": 1030
},
{
"epoch": 0.3802559414990859,
"grad_norm": 0.0,
"learning_rate": 2.7718464351005487e-05,
"loss": 0.0,
"step": 1040
},
{
"epoch": 0.38391224862888484,
"grad_norm": 0.0,
"learning_rate": 2.7696526508226693e-05,
"loss": 0.0,
"step": 1050
},
{
"epoch": 0.3875685557586837,
"grad_norm": 0.0,
"learning_rate": 2.76745886654479e-05,
"loss": 0.0,
"step": 1060
},
{
"epoch": 0.3912248628884826,
"grad_norm": 0.0,
"learning_rate": 2.7652650822669102e-05,
"loss": 0.0,
"step": 1070
},
{
"epoch": 0.39488117001828155,
"grad_norm": 0.0,
"learning_rate": 2.763071297989031e-05,
"loss": 0.0,
"step": 1080
},
{
"epoch": 0.39853747714808047,
"grad_norm": 0.0,
"learning_rate": 2.7608775137111518e-05,
"loss": 0.0,
"step": 1090
},
{
"epoch": 0.40219378427787933,
"grad_norm": 0.0,
"learning_rate": 2.7586837294332724e-05,
"loss": 0.0,
"step": 1100
},
{
"epoch": 0.40585009140767825,
"grad_norm": 0.0,
"learning_rate": 2.756489945155393e-05,
"loss": 0.0,
"step": 1110
},
{
"epoch": 0.40950639853747717,
"grad_norm": 0.0,
"learning_rate": 2.754296160877514e-05,
"loss": 0.0038,
"step": 1120
},
{
"epoch": 0.41316270566727603,
"grad_norm": 2.969595432281494,
"learning_rate": 2.7521023765996346e-05,
"loss": 0.1412,
"step": 1130
},
{
"epoch": 0.41681901279707495,
"grad_norm": 0.8748220205307007,
"learning_rate": 2.749908592321755e-05,
"loss": 0.0621,
"step": 1140
},
{
"epoch": 0.42047531992687387,
"grad_norm": 0.1589801162481308,
"learning_rate": 2.7477148080438758e-05,
"loss": 0.0083,
"step": 1150
},
{
"epoch": 0.42413162705667273,
"grad_norm": 0.0,
"learning_rate": 2.7455210237659964e-05,
"loss": 0.0007,
"step": 1160
},
{
"epoch": 0.42778793418647165,
"grad_norm": 0.0,
"learning_rate": 2.743327239488117e-05,
"loss": 0.0,
"step": 1170
},
{
"epoch": 0.43144424131627057,
"grad_norm": 0.0,
"learning_rate": 2.7411334552102376e-05,
"loss": 0.0001,
"step": 1180
},
{
"epoch": 0.4351005484460695,
"grad_norm": 0.0,
"learning_rate": 2.7389396709323582e-05,
"loss": 0.0001,
"step": 1190
},
{
"epoch": 0.43875685557586835,
"grad_norm": 0.0,
"learning_rate": 2.736745886654479e-05,
"loss": 0.0,
"step": 1200
},
{
"epoch": 0.4424131627056673,
"grad_norm": 0.0,
"learning_rate": 2.7345521023765998e-05,
"loss": 0.0,
"step": 1210
},
{
"epoch": 0.4460694698354662,
"grad_norm": 0.0,
"learning_rate": 2.7323583180987204e-05,
"loss": 0.0,
"step": 1220
},
{
"epoch": 0.44972577696526506,
"grad_norm": 0.0,
"learning_rate": 2.730164533820841e-05,
"loss": 0.0,
"step": 1230
},
{
"epoch": 0.453382084095064,
"grad_norm": 0.0,
"learning_rate": 2.7279707495429616e-05,
"loss": 0.0,
"step": 1240
},
{
"epoch": 0.4570383912248629,
"grad_norm": 0.0,
"learning_rate": 2.7257769652650826e-05,
"loss": 0.0,
"step": 1250
},
{
"epoch": 0.4606946983546618,
"grad_norm": 0.0,
"learning_rate": 2.7235831809872032e-05,
"loss": 0.0,
"step": 1260
},
{
"epoch": 0.4643510054844607,
"grad_norm": 0.0,
"learning_rate": 2.7213893967093235e-05,
"loss": 0.0,
"step": 1270
},
{
"epoch": 0.4680073126142596,
"grad_norm": 0.0,
"learning_rate": 2.719195612431444e-05,
"loss": 0.0,
"step": 1280
},
{
"epoch": 0.4716636197440585,
"grad_norm": 0.0,
"learning_rate": 2.717001828153565e-05,
"loss": 0.0,
"step": 1290
},
{
"epoch": 0.4753199268738574,
"grad_norm": 0.0,
"learning_rate": 2.7148080438756857e-05,
"loss": 0.0,
"step": 1300
},
{
"epoch": 0.4789762340036563,
"grad_norm": 0.0,
"learning_rate": 2.7126142595978063e-05,
"loss": 0.0,
"step": 1310
},
{
"epoch": 0.4826325411334552,
"grad_norm": 0.0,
"learning_rate": 2.710420475319927e-05,
"loss": 0.0,
"step": 1320
},
{
"epoch": 0.48628884826325414,
"grad_norm": 0.0,
"learning_rate": 2.7082266910420475e-05,
"loss": 0.0,
"step": 1330
},
{
"epoch": 0.489945155393053,
"grad_norm": 0.0,
"learning_rate": 2.7060329067641685e-05,
"loss": 0.0,
"step": 1340
},
{
"epoch": 0.4936014625228519,
"grad_norm": 0.0,
"learning_rate": 2.703839122486289e-05,
"loss": 0.0,
"step": 1350
},
{
"epoch": 0.49725776965265084,
"grad_norm": 0.0,
"learning_rate": 2.7016453382084097e-05,
"loss": 0.0,
"step": 1360
},
{
"epoch": 0.5009140767824497,
"grad_norm": 0.0,
"learning_rate": 2.69945155393053e-05,
"loss": 0.0,
"step": 1370
},
{
"epoch": 0.5045703839122486,
"grad_norm": 0.0,
"learning_rate": 2.697257769652651e-05,
"loss": 0.0,
"step": 1380
},
{
"epoch": 0.5082266910420475,
"grad_norm": 0.0,
"learning_rate": 2.6950639853747715e-05,
"loss": 0.0,
"step": 1390
},
{
"epoch": 0.5118829981718465,
"grad_norm": 0.0,
"learning_rate": 2.692870201096892e-05,
"loss": 0.0,
"step": 1400
},
{
"epoch": 0.5155393053016454,
"grad_norm": 0.0,
"learning_rate": 2.6906764168190128e-05,
"loss": 0.0,
"step": 1410
},
{
"epoch": 0.5191956124314442,
"grad_norm": 0.0,
"learning_rate": 2.6884826325411337e-05,
"loss": 0.0,
"step": 1420
},
{
"epoch": 0.5228519195612431,
"grad_norm": 0.0,
"learning_rate": 2.6862888482632543e-05,
"loss": 0.0,
"step": 1430
},
{
"epoch": 0.526508226691042,
"grad_norm": 0.0,
"learning_rate": 2.684095063985375e-05,
"loss": 0.0,
"step": 1440
},
{
"epoch": 0.5301645338208409,
"grad_norm": 0.0,
"learning_rate": 2.6819012797074956e-05,
"loss": 0.0,
"step": 1450
},
{
"epoch": 0.5338208409506399,
"grad_norm": 0.0,
"learning_rate": 2.6797074954296162e-05,
"loss": 0.0,
"step": 1460
},
{
"epoch": 0.5374771480804388,
"grad_norm": 0.0,
"learning_rate": 2.6775137111517368e-05,
"loss": 0.0,
"step": 1470
},
{
"epoch": 0.5411334552102377,
"grad_norm": 0.0,
"learning_rate": 2.6753199268738574e-05,
"loss": 0.0,
"step": 1480
},
{
"epoch": 0.5447897623400365,
"grad_norm": 0.0,
"learning_rate": 2.673126142595978e-05,
"loss": 0.0,
"step": 1490
},
{
"epoch": 0.5484460694698354,
"grad_norm": 0.0,
"learning_rate": 2.6709323583180986e-05,
"loss": 0.0,
"step": 1500
},
{
"epoch": 0.5521023765996343,
"grad_norm": 0.0,
"learning_rate": 2.6687385740402196e-05,
"loss": 0.0,
"step": 1510
},
{
"epoch": 0.5557586837294333,
"grad_norm": 0.0,
"learning_rate": 2.6665447897623402e-05,
"loss": 0.0,
"step": 1520
},
{
"epoch": 0.5594149908592322,
"grad_norm": 0.0,
"learning_rate": 2.6643510054844608e-05,
"loss": 0.0,
"step": 1530
},
{
"epoch": 0.5630712979890311,
"grad_norm": 0.0,
"learning_rate": 2.6621572212065814e-05,
"loss": 0.0,
"step": 1540
},
{
"epoch": 0.56672760511883,
"grad_norm": 0.0,
"learning_rate": 2.659963436928702e-05,
"loss": 0.0,
"step": 1550
},
{
"epoch": 0.5703839122486288,
"grad_norm": 0.0,
"learning_rate": 2.657769652650823e-05,
"loss": 0.0,
"step": 1560
},
{
"epoch": 0.5740402193784278,
"grad_norm": 0.0,
"learning_rate": 2.6555758683729433e-05,
"loss": 0.0,
"step": 1570
},
{
"epoch": 0.5776965265082267,
"grad_norm": 0.0,
"learning_rate": 2.653382084095064e-05,
"loss": 0.0,
"step": 1580
},
{
"epoch": 0.5813528336380256,
"grad_norm": 0.0,
"learning_rate": 2.6511882998171845e-05,
"loss": 0.0,
"step": 1590
},
{
"epoch": 0.5850091407678245,
"grad_norm": 0.0,
"learning_rate": 2.6489945155393054e-05,
"loss": 0.0,
"step": 1600
},
{
"epoch": 0.5886654478976234,
"grad_norm": 0.0,
"learning_rate": 2.646800731261426e-05,
"loss": 0.0,
"step": 1610
},
{
"epoch": 0.5923217550274223,
"grad_norm": 0.0,
"learning_rate": 2.6446069469835467e-05,
"loss": 0.0,
"step": 1620
},
{
"epoch": 0.5959780621572212,
"grad_norm": 0.0,
"learning_rate": 2.6424131627056673e-05,
"loss": 0.0,
"step": 1630
},
{
"epoch": 0.5996343692870201,
"grad_norm": 0.0,
"learning_rate": 2.6402193784277882e-05,
"loss": 0.0,
"step": 1640
},
{
"epoch": 0.603290676416819,
"grad_norm": 0.0,
"learning_rate": 2.638025594149909e-05,
"loss": 0.0,
"step": 1650
},
{
"epoch": 0.6069469835466179,
"grad_norm": 0.0,
"learning_rate": 2.6358318098720295e-05,
"loss": 0.0,
"step": 1660
},
{
"epoch": 0.6106032906764168,
"grad_norm": 0.0,
"learning_rate": 2.6336380255941497e-05,
"loss": 0.0,
"step": 1670
},
{
"epoch": 0.6142595978062158,
"grad_norm": 0.0,
"learning_rate": 2.6314442413162704e-05,
"loss": 0.0,
"step": 1680
},
{
"epoch": 0.6179159049360147,
"grad_norm": 0.0,
"learning_rate": 2.6292504570383913e-05,
"loss": 0.0,
"step": 1690
},
{
"epoch": 0.6215722120658135,
"grad_norm": 0.0,
"learning_rate": 2.627056672760512e-05,
"loss": 0.0,
"step": 1700
},
{
"epoch": 0.6252285191956124,
"grad_norm": 0.0,
"learning_rate": 2.6248628884826325e-05,
"loss": 0.0,
"step": 1710
},
{
"epoch": 0.6288848263254113,
"grad_norm": 0.0,
"learning_rate": 2.622669104204753e-05,
"loss": 0.0,
"step": 1720
},
{
"epoch": 0.6325411334552102,
"grad_norm": 0.0,
"learning_rate": 2.620475319926874e-05,
"loss": 0.0,
"step": 1730
},
{
"epoch": 0.6361974405850092,
"grad_norm": 0.0,
"learning_rate": 2.6182815356489947e-05,
"loss": 0.0,
"step": 1740
},
{
"epoch": 0.6398537477148081,
"grad_norm": 0.0,
"learning_rate": 2.6160877513711153e-05,
"loss": 0.0,
"step": 1750
},
{
"epoch": 0.643510054844607,
"grad_norm": 0.0,
"learning_rate": 2.613893967093236e-05,
"loss": 0.0,
"step": 1760
},
{
"epoch": 0.6471663619744058,
"grad_norm": 0.0,
"learning_rate": 2.6117001828153566e-05,
"loss": 0.0,
"step": 1770
},
{
"epoch": 0.6508226691042047,
"grad_norm": 0.0,
"learning_rate": 2.6095063985374772e-05,
"loss": 0.0,
"step": 1780
},
{
"epoch": 0.6544789762340036,
"grad_norm": 0.0,
"learning_rate": 2.6073126142595978e-05,
"loss": 0.0,
"step": 1790
},
{
"epoch": 0.6581352833638026,
"grad_norm": 0.0,
"learning_rate": 2.6051188299817184e-05,
"loss": 0.0,
"step": 1800
},
{
"epoch": 0.6617915904936015,
"grad_norm": 0.0,
"learning_rate": 2.602925045703839e-05,
"loss": 0.0,
"step": 1810
},
{
"epoch": 0.6654478976234004,
"grad_norm": 0.0,
"learning_rate": 2.60073126142596e-05,
"loss": 0.0,
"step": 1820
},
{
"epoch": 0.6691042047531993,
"grad_norm": 0.0,
"learning_rate": 2.5985374771480806e-05,
"loss": 0.0,
"step": 1830
},
{
"epoch": 0.6727605118829981,
"grad_norm": 0.0,
"learning_rate": 2.5963436928702012e-05,
"loss": 0.0,
"step": 1840
},
{
"epoch": 0.676416819012797,
"grad_norm": 0.0,
"learning_rate": 2.5941499085923218e-05,
"loss": 0.0,
"step": 1850
},
{
"epoch": 0.680073126142596,
"grad_norm": 0.0,
"learning_rate": 2.5919561243144428e-05,
"loss": 0.0,
"step": 1860
},
{
"epoch": 0.6837294332723949,
"grad_norm": 0.0,
"learning_rate": 2.589762340036563e-05,
"loss": 0.0,
"step": 1870
},
{
"epoch": 0.6873857404021938,
"grad_norm": 0.0,
"learning_rate": 2.5875685557586837e-05,
"loss": 0.0,
"step": 1880
},
{
"epoch": 0.6910420475319927,
"grad_norm": 0.0,
"learning_rate": 2.5853747714808043e-05,
"loss": 0.0,
"step": 1890
},
{
"epoch": 0.6946983546617916,
"grad_norm": 0.0,
"learning_rate": 2.583180987202925e-05,
"loss": 0.0,
"step": 1900
},
{
"epoch": 0.6983546617915904,
"grad_norm": 0.0,
"learning_rate": 2.580987202925046e-05,
"loss": 0.0,
"step": 1910
},
{
"epoch": 0.7020109689213894,
"grad_norm": 0.0,
"learning_rate": 2.5787934186471665e-05,
"loss": 0.0,
"step": 1920
},
{
"epoch": 0.7056672760511883,
"grad_norm": 0.0,
"learning_rate": 2.576599634369287e-05,
"loss": 0.0,
"step": 1930
},
{
"epoch": 0.7093235831809872,
"grad_norm": 0.0,
"learning_rate": 2.5744058500914077e-05,
"loss": 0.0,
"step": 1940
},
{
"epoch": 0.7129798903107861,
"grad_norm": 0.0,
"learning_rate": 2.5722120658135286e-05,
"loss": 0.0,
"step": 1950
},
{
"epoch": 0.716636197440585,
"grad_norm": 0.0,
"learning_rate": 2.5700182815356492e-05,
"loss": 0.0,
"step": 1960
},
{
"epoch": 0.720292504570384,
"grad_norm": 0.0,
"learning_rate": 2.5678244972577695e-05,
"loss": 0.0,
"step": 1970
},
{
"epoch": 0.7239488117001828,
"grad_norm": 0.0,
"learning_rate": 2.56563071297989e-05,
"loss": 0.0,
"step": 1980
},
{
"epoch": 0.7276051188299817,
"grad_norm": 0.0,
"learning_rate": 2.563436928702011e-05,
"loss": 0.0,
"step": 1990
},
{
"epoch": 0.7312614259597806,
"grad_norm": 0.0,
"learning_rate": 2.5612431444241317e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 0.7349177330895795,
"grad_norm": 0.0,
"learning_rate": 2.5590493601462523e-05,
"loss": 0.0,
"step": 2010
},
{
"epoch": 0.7385740402193784,
"grad_norm": 0.0,
"learning_rate": 2.556855575868373e-05,
"loss": 0.0,
"step": 2020
},
{
"epoch": 0.7422303473491774,
"grad_norm": 0.0,
"learning_rate": 2.5546617915904935e-05,
"loss": 0.0,
"step": 2030
},
{
"epoch": 0.7458866544789763,
"grad_norm": 0.0,
"learning_rate": 2.5524680073126145e-05,
"loss": 0.0,
"step": 2040
},
{
"epoch": 0.7495429616087751,
"grad_norm": 0.0,
"learning_rate": 2.550274223034735e-05,
"loss": 0.0,
"step": 2050
},
{
"epoch": 0.753199268738574,
"grad_norm": 0.0,
"learning_rate": 2.5480804387568557e-05,
"loss": 0.0,
"step": 2060
},
{
"epoch": 0.7568555758683729,
"grad_norm": 0.0,
"learning_rate": 2.545886654478976e-05,
"loss": 0.0,
"step": 2070
},
{
"epoch": 0.7605118829981719,
"grad_norm": 0.0,
"learning_rate": 2.543692870201097e-05,
"loss": 0.0,
"step": 2080
},
{
"epoch": 0.7641681901279708,
"grad_norm": 0.0,
"learning_rate": 2.5414990859232176e-05,
"loss": 0.0,
"step": 2090
},
{
"epoch": 0.7678244972577697,
"grad_norm": 0.0,
"learning_rate": 2.5393053016453382e-05,
"loss": 0.0,
"step": 2100
},
{
"epoch": 0.7714808043875686,
"grad_norm": 0.0,
"learning_rate": 2.5371115173674588e-05,
"loss": 0.0,
"step": 2110
},
{
"epoch": 0.7751371115173674,
"grad_norm": 0.0,
"learning_rate": 2.5349177330895798e-05,
"loss": 0.0,
"step": 2120
},
{
"epoch": 0.7787934186471663,
"grad_norm": 0.0,
"learning_rate": 2.5327239488117004e-05,
"loss": 0.0,
"step": 2130
},
{
"epoch": 0.7824497257769653,
"grad_norm": 0.0,
"learning_rate": 2.530530164533821e-05,
"loss": 0.0,
"step": 2140
},
{
"epoch": 0.7861060329067642,
"grad_norm": 0.0,
"learning_rate": 2.5283363802559416e-05,
"loss": 0.0,
"step": 2150
},
{
"epoch": 0.7897623400365631,
"grad_norm": 0.0,
"learning_rate": 2.5261425959780622e-05,
"loss": 0.0,
"step": 2160
},
{
"epoch": 0.793418647166362,
"grad_norm": 0.0,
"learning_rate": 2.5239488117001828e-05,
"loss": 0.0,
"step": 2170
},
{
"epoch": 0.7970749542961609,
"grad_norm": 0.0,
"learning_rate": 2.5217550274223034e-05,
"loss": 0.0,
"step": 2180
},
{
"epoch": 0.8007312614259597,
"grad_norm": 0.0,
"learning_rate": 2.519561243144424e-05,
"loss": 0.0,
"step": 2190
},
{
"epoch": 0.8043875685557587,
"grad_norm": 0.0,
"learning_rate": 2.5173674588665447e-05,
"loss": 0.0,
"step": 2200
},
{
"epoch": 0.8080438756855576,
"grad_norm": 0.0,
"learning_rate": 2.5151736745886656e-05,
"loss": 0.0,
"step": 2210
},
{
"epoch": 0.8117001828153565,
"grad_norm": 0.0,
"learning_rate": 2.5129798903107862e-05,
"loss": 0.0,
"step": 2220
},
{
"epoch": 0.8153564899451554,
"grad_norm": 0.0,
"learning_rate": 2.510786106032907e-05,
"loss": 0.0,
"step": 2230
},
{
"epoch": 0.8190127970749543,
"grad_norm": 0.0,
"learning_rate": 2.5085923217550275e-05,
"loss": 0.0,
"step": 2240
},
{
"epoch": 0.8226691042047533,
"grad_norm": 0.0,
"learning_rate": 2.506398537477148e-05,
"loss": 0.0,
"step": 2250
},
{
"epoch": 0.8263254113345521,
"grad_norm": 0.0,
"learning_rate": 2.504204753199269e-05,
"loss": 0.0,
"step": 2260
},
{
"epoch": 0.829981718464351,
"grad_norm": 0.0,
"learning_rate": 2.5020109689213896e-05,
"loss": 0.0,
"step": 2270
},
{
"epoch": 0.8336380255941499,
"grad_norm": 0.0,
"learning_rate": 2.49981718464351e-05,
"loss": 0.0,
"step": 2280
},
{
"epoch": 0.8372943327239488,
"grad_norm": 0.0,
"learning_rate": 2.4976234003656305e-05,
"loss": 0.0,
"step": 2290
},
{
"epoch": 0.8409506398537477,
"grad_norm": 0.0,
"learning_rate": 2.4954296160877515e-05,
"loss": 0.0,
"step": 2300
},
{
"epoch": 0.8446069469835467,
"grad_norm": 0.0,
"learning_rate": 2.493235831809872e-05,
"loss": 0.0,
"step": 2310
},
{
"epoch": 0.8482632541133455,
"grad_norm": 0.0,
"learning_rate": 2.4910420475319927e-05,
"loss": 0.0,
"step": 2320
},
{
"epoch": 0.8519195612431444,
"grad_norm": 0.0,
"learning_rate": 2.4888482632541133e-05,
"loss": 0.0,
"step": 2330
},
{
"epoch": 0.8555758683729433,
"grad_norm": 0.0,
"learning_rate": 2.4866544789762343e-05,
"loss": 0.0,
"step": 2340
},
{
"epoch": 0.8592321755027422,
"grad_norm": 0.0,
"learning_rate": 2.484460694698355e-05,
"loss": 0.0,
"step": 2350
},
{
"epoch": 0.8628884826325411,
"grad_norm": 0.0,
"learning_rate": 2.4822669104204755e-05,
"loss": 0.0,
"step": 2360
},
{
"epoch": 0.8665447897623401,
"grad_norm": 0.0,
"learning_rate": 2.480073126142596e-05,
"loss": 0.0,
"step": 2370
},
{
"epoch": 0.870201096892139,
"grad_norm": 0.0,
"learning_rate": 2.4778793418647164e-05,
"loss": 0.0,
"step": 2380
},
{
"epoch": 0.8738574040219378,
"grad_norm": 0.0,
"learning_rate": 2.4756855575868373e-05,
"loss": 0.0,
"step": 2390
},
{
"epoch": 0.8775137111517367,
"grad_norm": 0.0,
"learning_rate": 2.473491773308958e-05,
"loss": 0.0,
"step": 2400
},
{
"epoch": 0.8811700182815356,
"grad_norm": 0.0,
"learning_rate": 2.4712979890310786e-05,
"loss": 0.0,
"step": 2410
},
{
"epoch": 0.8848263254113345,
"grad_norm": 0.0,
"learning_rate": 2.4691042047531992e-05,
"loss": 0.0,
"step": 2420
},
{
"epoch": 0.8884826325411335,
"grad_norm": 0.0,
"learning_rate": 2.46691042047532e-05,
"loss": 0.0,
"step": 2430
},
{
"epoch": 0.8921389396709324,
"grad_norm": 0.0,
"learning_rate": 2.4647166361974408e-05,
"loss": 0.0,
"step": 2440
},
{
"epoch": 0.8957952468007313,
"grad_norm": 0.0,
"learning_rate": 2.4625228519195614e-05,
"loss": 0.0,
"step": 2450
},
{
"epoch": 0.8994515539305301,
"grad_norm": 0.0,
"learning_rate": 2.460329067641682e-05,
"loss": 0.0,
"step": 2460
},
{
"epoch": 0.903107861060329,
"grad_norm": 0.0,
"learning_rate": 2.458135283363803e-05,
"loss": 0.0,
"step": 2470
},
{
"epoch": 0.906764168190128,
"grad_norm": 0.0,
"learning_rate": 2.4559414990859232e-05,
"loss": 0.0,
"step": 2480
},
{
"epoch": 0.9104204753199269,
"grad_norm": 0.0,
"learning_rate": 2.4537477148080438e-05,
"loss": 0.0,
"step": 2490
},
{
"epoch": 0.9140767824497258,
"grad_norm": 0.0,
"learning_rate": 2.4515539305301644e-05,
"loss": 0.0,
"step": 2500
},
{
"epoch": 0.9177330895795247,
"grad_norm": 0.0,
"learning_rate": 2.449360146252285e-05,
"loss": 0.0,
"step": 2510
},
{
"epoch": 0.9213893967093236,
"grad_norm": 0.0,
"learning_rate": 2.447166361974406e-05,
"loss": 0.0,
"step": 2520
},
{
"epoch": 0.9250457038391224,
"grad_norm": 0.0,
"learning_rate": 2.4449725776965266e-05,
"loss": 0.0,
"step": 2530
},
{
"epoch": 0.9287020109689214,
"grad_norm": 0.0,
"learning_rate": 2.4427787934186472e-05,
"loss": 0.0,
"step": 2540
},
{
"epoch": 0.9323583180987203,
"grad_norm": 0.0,
"learning_rate": 2.440585009140768e-05,
"loss": 0.0,
"step": 2550
},
{
"epoch": 0.9360146252285192,
"grad_norm": 0.0,
"learning_rate": 2.4383912248628888e-05,
"loss": 0.0,
"step": 2560
},
{
"epoch": 0.9396709323583181,
"grad_norm": 0.0,
"learning_rate": 2.4361974405850094e-05,
"loss": 0.0,
"step": 2570
},
{
"epoch": 0.943327239488117,
"grad_norm": 0.0,
"learning_rate": 2.4340036563071297e-05,
"loss": 0.0,
"step": 2580
},
{
"epoch": 0.946983546617916,
"grad_norm": 0.0,
"learning_rate": 2.4318098720292503e-05,
"loss": 0.0,
"step": 2590
},
{
"epoch": 0.9506398537477148,
"grad_norm": 0.0,
"learning_rate": 2.4296160877513713e-05,
"loss": 0.0,
"step": 2600
},
{
"epoch": 0.9542961608775137,
"grad_norm": 0.0,
"learning_rate": 2.427422303473492e-05,
"loss": 0.0,
"step": 2610
},
{
"epoch": 0.9579524680073126,
"grad_norm": 0.0,
"learning_rate": 2.4252285191956125e-05,
"loss": 0.0,
"step": 2620
},
{
"epoch": 0.9616087751371115,
"grad_norm": 0.0,
"learning_rate": 2.423034734917733e-05,
"loss": 0.0,
"step": 2630
},
{
"epoch": 0.9652650822669104,
"grad_norm": 0.0,
"learning_rate": 2.4208409506398537e-05,
"loss": 0.0,
"step": 2640
},
{
"epoch": 0.9689213893967094,
"grad_norm": 0.0,
"learning_rate": 2.4186471663619747e-05,
"loss": 0.0,
"step": 2650
},
{
"epoch": 0.9725776965265083,
"grad_norm": 0.0,
"learning_rate": 2.4164533820840953e-05,
"loss": 0.0,
"step": 2660
},
{
"epoch": 0.9762340036563071,
"grad_norm": 0.0,
"learning_rate": 2.414259597806216e-05,
"loss": 0.0,
"step": 2670
},
{
"epoch": 0.979890310786106,
"grad_norm": 0.0,
"learning_rate": 2.4120658135283362e-05,
"loss": 0.0,
"step": 2680
},
{
"epoch": 0.9835466179159049,
"grad_norm": 0.0,
"learning_rate": 2.409872029250457e-05,
"loss": 0.0,
"step": 2690
},
{
"epoch": 0.9872029250457038,
"grad_norm": 0.0,
"learning_rate": 2.4076782449725777e-05,
"loss": 0.0,
"step": 2700
},
{
"epoch": 0.9908592321755028,
"grad_norm": 0.0,
"learning_rate": 2.4054844606946984e-05,
"loss": 0.0,
"step": 2710
},
{
"epoch": 0.9945155393053017,
"grad_norm": 0.0,
"learning_rate": 2.403290676416819e-05,
"loss": 0.0,
"step": 2720
},
{
"epoch": 0.9981718464351006,
"grad_norm": 0.0,
"learning_rate": 2.4010968921389396e-05,
"loss": 0.0,
"step": 2730
},
{
"epoch": 1.0018281535648994,
"grad_norm": 0.0,
"learning_rate": 2.3989031078610605e-05,
"loss": 0.0,
"step": 2740
},
{
"epoch": 1.0054844606946984,
"grad_norm": 0.0,
"learning_rate": 2.396709323583181e-05,
"loss": 0.0,
"step": 2750
},
{
"epoch": 1.0091407678244972,
"grad_norm": 0.0,
"learning_rate": 2.3945155393053018e-05,
"loss": 0.0,
"step": 2760
},
{
"epoch": 1.012797074954296,
"grad_norm": 0.0,
"learning_rate": 2.3923217550274224e-05,
"loss": 0.0,
"step": 2770
},
{
"epoch": 1.016453382084095,
"grad_norm": 0.0,
"learning_rate": 2.390127970749543e-05,
"loss": 0.0,
"step": 2780
},
{
"epoch": 1.0201096892138939,
"grad_norm": 0.0,
"learning_rate": 2.3879341864716636e-05,
"loss": 0.0,
"step": 2790
},
{
"epoch": 1.023765996343693,
"grad_norm": 0.0,
"learning_rate": 2.3857404021937842e-05,
"loss": 0.0,
"step": 2800
},
{
"epoch": 1.0274223034734917,
"grad_norm": 0.0,
"learning_rate": 2.383546617915905e-05,
"loss": 0.0,
"step": 2810
},
{
"epoch": 1.0310786106032908,
"grad_norm": 0.0,
"learning_rate": 2.3813528336380258e-05,
"loss": 0.0,
"step": 2820
},
{
"epoch": 1.0347349177330896,
"grad_norm": 0.0,
"learning_rate": 2.3791590493601464e-05,
"loss": 0.0,
"step": 2830
},
{
"epoch": 1.0383912248628886,
"grad_norm": 0.0,
"learning_rate": 2.376965265082267e-05,
"loss": 0.0,
"step": 2840
},
{
"epoch": 1.0420475319926874,
"grad_norm": 0.0,
"learning_rate": 2.3747714808043876e-05,
"loss": 0.0,
"step": 2850
},
{
"epoch": 1.0457038391224862,
"grad_norm": 0.0,
"learning_rate": 2.3725776965265082e-05,
"loss": 0.0,
"step": 2860
},
{
"epoch": 1.0493601462522852,
"grad_norm": 0.0,
"learning_rate": 2.3703839122486292e-05,
"loss": 0.0,
"step": 2870
},
{
"epoch": 1.053016453382084,
"grad_norm": 0.0,
"learning_rate": 2.3681901279707495e-05,
"loss": 0.0,
"step": 2880
},
{
"epoch": 1.056672760511883,
"grad_norm": 0.0,
"learning_rate": 2.36599634369287e-05,
"loss": 0.0,
"step": 2890
},
{
"epoch": 1.0603290676416819,
"grad_norm": 0.0,
"learning_rate": 2.3638025594149907e-05,
"loss": 0.0,
"step": 2900
},
{
"epoch": 1.0639853747714807,
"grad_norm": 0.0,
"learning_rate": 2.3616087751371117e-05,
"loss": 0.0,
"step": 2910
},
{
"epoch": 1.0676416819012797,
"grad_norm": 0.0,
"learning_rate": 2.3594149908592323e-05,
"loss": 0.0,
"step": 2920
},
{
"epoch": 1.0712979890310785,
"grad_norm": 0.0,
"learning_rate": 2.357221206581353e-05,
"loss": 0.0,
"step": 2930
},
{
"epoch": 1.0749542961608776,
"grad_norm": 0.0,
"learning_rate": 2.3550274223034735e-05,
"loss": 0.0,
"step": 2940
},
{
"epoch": 1.0786106032906764,
"grad_norm": 0.0,
"learning_rate": 2.3528336380255944e-05,
"loss": 0.0,
"step": 2950
},
{
"epoch": 1.0822669104204754,
"grad_norm": 0.0,
"learning_rate": 2.350639853747715e-05,
"loss": 0.0,
"step": 2960
},
{
"epoch": 1.0859232175502742,
"grad_norm": 0.0,
"learning_rate": 2.3484460694698357e-05,
"loss": 0.0,
"step": 2970
},
{
"epoch": 1.0895795246800732,
"grad_norm": 0.0,
"learning_rate": 2.346252285191956e-05,
"loss": 0.0,
"step": 2980
},
{
"epoch": 1.093235831809872,
"grad_norm": 0.0,
"learning_rate": 2.3440585009140766e-05,
"loss": 0.0,
"step": 2990
},
{
"epoch": 1.0968921389396709,
"grad_norm": 0.0,
"learning_rate": 2.3418647166361975e-05,
"loss": 0.0,
"step": 3000
},
{
"epoch": 1.1005484460694699,
"grad_norm": 0.0,
"learning_rate": 2.339670932358318e-05,
"loss": 0.0,
"step": 3010
},
{
"epoch": 1.1042047531992687,
"grad_norm": 0.0,
"learning_rate": 2.3374771480804387e-05,
"loss": 0.0,
"step": 3020
},
{
"epoch": 1.1078610603290677,
"grad_norm": 0.0,
"learning_rate": 2.3352833638025594e-05,
"loss": 0.0,
"step": 3030
},
{
"epoch": 1.1115173674588665,
"grad_norm": 0.0,
"learning_rate": 2.3330895795246803e-05,
"loss": 0.0,
"step": 3040
},
{
"epoch": 1.1151736745886653,
"grad_norm": 0.0,
"learning_rate": 2.330895795246801e-05,
"loss": 0.0,
"step": 3050
},
{
"epoch": 1.1188299817184644,
"grad_norm": 0.0,
"learning_rate": 2.3287020109689215e-05,
"loss": 0.0,
"step": 3060
},
{
"epoch": 1.1224862888482632,
"grad_norm": 0.0,
"learning_rate": 2.326508226691042e-05,
"loss": 0.0,
"step": 3070
},
{
"epoch": 1.1261425959780622,
"grad_norm": 0.0,
"learning_rate": 2.3243144424131624e-05,
"loss": 0.0,
"step": 3080
},
{
"epoch": 1.129798903107861,
"grad_norm": 0.0,
"learning_rate": 2.3221206581352834e-05,
"loss": 0.0,
"step": 3090
},
{
"epoch": 1.13345521023766,
"grad_norm": 0.0,
"learning_rate": 2.319926873857404e-05,
"loss": 0.0,
"step": 3100
},
{
"epoch": 1.1371115173674589,
"grad_norm": 0.0,
"learning_rate": 2.3177330895795246e-05,
"loss": 0.0,
"step": 3110
},
{
"epoch": 1.1407678244972579,
"grad_norm": 0.0,
"learning_rate": 2.3155393053016452e-05,
"loss": 0.0,
"step": 3120
},
{
"epoch": 1.1444241316270567,
"grad_norm": 0.0,
"learning_rate": 2.3133455210237662e-05,
"loss": 0.0,
"step": 3130
},
{
"epoch": 1.1480804387568555,
"grad_norm": 0.0,
"learning_rate": 2.3111517367458868e-05,
"loss": 0.0,
"step": 3140
},
{
"epoch": 1.1517367458866545,
"grad_norm": 0.0,
"learning_rate": 2.3089579524680074e-05,
"loss": 0.0,
"step": 3150
},
{
"epoch": 1.1553930530164533,
"grad_norm": 0.0,
"learning_rate": 2.306764168190128e-05,
"loss": 0.0,
"step": 3160
},
{
"epoch": 1.1590493601462524,
"grad_norm": 0.0,
"learning_rate": 2.304570383912249e-05,
"loss": 0.0,
"step": 3170
},
{
"epoch": 1.1627056672760512,
"grad_norm": 0.0,
"learning_rate": 2.3023765996343693e-05,
"loss": 0.0,
"step": 3180
},
{
"epoch": 1.16636197440585,
"grad_norm": 0.0,
"learning_rate": 2.30018281535649e-05,
"loss": 0.0,
"step": 3190
},
{
"epoch": 1.170018281535649,
"grad_norm": 0.0,
"learning_rate": 2.2979890310786105e-05,
"loss": 0.0,
"step": 3200
},
{
"epoch": 1.1736745886654478,
"grad_norm": 0.0,
"learning_rate": 2.295795246800731e-05,
"loss": 0.0,
"step": 3210
},
{
"epoch": 1.1773308957952469,
"grad_norm": 0.0,
"learning_rate": 2.293601462522852e-05,
"loss": 0.0,
"step": 3220
},
{
"epoch": 1.1809872029250457,
"grad_norm": 0.0,
"learning_rate": 2.2914076782449727e-05,
"loss": 0.0,
"step": 3230
},
{
"epoch": 1.1846435100548447,
"grad_norm": 0.0,
"learning_rate": 2.2892138939670933e-05,
"loss": 0.0,
"step": 3240
},
{
"epoch": 1.1882998171846435,
"grad_norm": 0.0,
"learning_rate": 2.287020109689214e-05,
"loss": 0.0,
"step": 3250
},
{
"epoch": 1.1919561243144425,
"grad_norm": 0.0,
"learning_rate": 2.284826325411335e-05,
"loss": 0.0,
"step": 3260
},
{
"epoch": 1.1956124314442413,
"grad_norm": 0.0,
"learning_rate": 2.2826325411334555e-05,
"loss": 0.0,
"step": 3270
},
{
"epoch": 1.1992687385740401,
"grad_norm": 0.0,
"learning_rate": 2.280438756855576e-05,
"loss": 0.0,
"step": 3280
},
{
"epoch": 1.2029250457038392,
"grad_norm": 0.0,
"learning_rate": 2.2782449725776963e-05,
"loss": 0.0,
"step": 3290
},
{
"epoch": 1.206581352833638,
"grad_norm": 0.0,
"learning_rate": 2.2760511882998173e-05,
"loss": 0.0,
"step": 3300
},
{
"epoch": 1.210237659963437,
"grad_norm": 0.0,
"learning_rate": 2.273857404021938e-05,
"loss": 0.0,
"step": 3310
},
{
"epoch": 1.2138939670932358,
"grad_norm": 0.0,
"learning_rate": 2.2716636197440585e-05,
"loss": 0.0,
"step": 3320
},
{
"epoch": 1.2175502742230346,
"grad_norm": 0.0,
"learning_rate": 2.269469835466179e-05,
"loss": 0.0,
"step": 3330
},
{
"epoch": 1.2212065813528337,
"grad_norm": 0.0,
"learning_rate": 2.2672760511882998e-05,
"loss": 0.0,
"step": 3340
},
{
"epoch": 1.2248628884826325,
"grad_norm": 0.0,
"learning_rate": 2.2650822669104207e-05,
"loss": 0.0,
"step": 3350
},
{
"epoch": 1.2285191956124315,
"grad_norm": 0.0,
"learning_rate": 2.2628884826325413e-05,
"loss": 0.0,
"step": 3360
},
{
"epoch": 1.2321755027422303,
"grad_norm": 0.0,
"learning_rate": 2.260694698354662e-05,
"loss": 0.0,
"step": 3370
},
{
"epoch": 1.2358318098720293,
"grad_norm": 0.0,
"learning_rate": 2.2585009140767826e-05,
"loss": 0.0,
"step": 3380
},
{
"epoch": 1.2394881170018281,
"grad_norm": 0.0,
"learning_rate": 2.256307129798903e-05,
"loss": 0.0,
"step": 3390
},
{
"epoch": 1.2431444241316272,
"grad_norm": 0.0,
"learning_rate": 2.2541133455210238e-05,
"loss": 0.0,
"step": 3400
},
{
"epoch": 1.246800731261426,
"grad_norm": 0.0,
"learning_rate": 2.2519195612431444e-05,
"loss": 0.0,
"step": 3410
},
{
"epoch": 1.2504570383912248,
"grad_norm": 0.0,
"learning_rate": 2.249725776965265e-05,
"loss": 0.0,
"step": 3420
},
{
"epoch": 1.2541133455210238,
"grad_norm": 0.0,
"learning_rate": 2.247531992687386e-05,
"loss": 0.0,
"step": 3430
},
{
"epoch": 1.2577696526508226,
"grad_norm": 0.0,
"learning_rate": 2.2453382084095066e-05,
"loss": 0.0,
"step": 3440
},
{
"epoch": 1.2614259597806217,
"grad_norm": 0.0,
"learning_rate": 2.2431444241316272e-05,
"loss": 0.0,
"step": 3450
},
{
"epoch": 1.2650822669104205,
"grad_norm": 0.0,
"learning_rate": 2.2409506398537478e-05,
"loss": 0.0,
"step": 3460
},
{
"epoch": 1.2687385740402193,
"grad_norm": 0.0,
"learning_rate": 2.2387568555758684e-05,
"loss": 0.0,
"step": 3470
},
{
"epoch": 1.2723948811700183,
"grad_norm": 0.0,
"learning_rate": 2.2365630712979894e-05,
"loss": 0.0,
"step": 3480
},
{
"epoch": 1.2760511882998171,
"grad_norm": 0.0,
"learning_rate": 2.2343692870201096e-05,
"loss": 0.0,
"step": 3490
},
{
"epoch": 1.2797074954296161,
"grad_norm": 0.0,
"learning_rate": 2.2321755027422303e-05,
"loss": 0.0,
"step": 3500
},
{
"epoch": 1.283363802559415,
"grad_norm": 0.0,
"learning_rate": 2.229981718464351e-05,
"loss": 0.0,
"step": 3510
},
{
"epoch": 1.2870201096892138,
"grad_norm": 0.0,
"learning_rate": 2.2277879341864718e-05,
"loss": 0.0,
"step": 3520
},
{
"epoch": 1.2906764168190128,
"grad_norm": 0.0,
"learning_rate": 2.2255941499085924e-05,
"loss": 0.0,
"step": 3530
},
{
"epoch": 1.2943327239488118,
"grad_norm": 0.0,
"learning_rate": 2.223400365630713e-05,
"loss": 0.0,
"step": 3540
},
{
"epoch": 1.2979890310786106,
"grad_norm": 0.0,
"learning_rate": 2.2212065813528337e-05,
"loss": 0.0,
"step": 3550
},
{
"epoch": 1.3016453382084094,
"grad_norm": 0.0,
"learning_rate": 2.2190127970749543e-05,
"loss": 0.0,
"step": 3560
},
{
"epoch": 1.3053016453382085,
"grad_norm": 0.0,
"learning_rate": 2.2168190127970752e-05,
"loss": 0.0,
"step": 3570
},
{
"epoch": 1.3089579524680073,
"grad_norm": 0.0,
"learning_rate": 2.214625228519196e-05,
"loss": 0.0,
"step": 3580
},
{
"epoch": 1.3126142595978063,
"grad_norm": 0.0,
"learning_rate": 2.212431444241316e-05,
"loss": 0.0,
"step": 3590
},
{
"epoch": 1.3162705667276051,
"grad_norm": 0.0,
"learning_rate": 2.2102376599634367e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 1.319926873857404,
"grad_norm": 0.0,
"learning_rate": 2.2080438756855577e-05,
"loss": 0.0,
"step": 3610
},
{
"epoch": 1.323583180987203,
"grad_norm": 0.0,
"learning_rate": 2.2058500914076783e-05,
"loss": 0.0,
"step": 3620
},
{
"epoch": 1.3272394881170018,
"grad_norm": 0.0,
"learning_rate": 2.203656307129799e-05,
"loss": 0.0,
"step": 3630
},
{
"epoch": 1.3308957952468008,
"grad_norm": 0.0,
"learning_rate": 2.2014625228519195e-05,
"loss": 0.0,
"step": 3640
},
{
"epoch": 1.3345521023765996,
"grad_norm": 0.0,
"learning_rate": 2.1992687385740405e-05,
"loss": 0.0,
"step": 3650
},
{
"epoch": 1.3382084095063984,
"grad_norm": 0.0,
"learning_rate": 2.197074954296161e-05,
"loss": 0.0,
"step": 3660
},
{
"epoch": 1.3418647166361974,
"grad_norm": 0.0,
"learning_rate": 2.1948811700182817e-05,
"loss": 0.0,
"step": 3670
},
{
"epoch": 1.3455210237659965,
"grad_norm": 0.0,
"learning_rate": 2.1926873857404023e-05,
"loss": 0.0,
"step": 3680
},
{
"epoch": 1.3491773308957953,
"grad_norm": 0.0,
"learning_rate": 2.1904936014625226e-05,
"loss": 0.0,
"step": 3690
},
{
"epoch": 1.352833638025594,
"grad_norm": 0.0,
"learning_rate": 2.1882998171846436e-05,
"loss": 0.0,
"step": 3700
},
{
"epoch": 1.3564899451553931,
"grad_norm": 0.0,
"learning_rate": 2.1861060329067642e-05,
"loss": 0.0,
"step": 3710
},
{
"epoch": 1.360146252285192,
"grad_norm": 0.0,
"learning_rate": 2.1839122486288848e-05,
"loss": 0.0,
"step": 3720
},
{
"epoch": 1.363802559414991,
"grad_norm": 0.0,
"learning_rate": 2.1817184643510054e-05,
"loss": 0.0,
"step": 3730
},
{
"epoch": 1.3674588665447898,
"grad_norm": 0.0,
"learning_rate": 2.1795246800731264e-05,
"loss": 0.0,
"step": 3740
},
{
"epoch": 1.3711151736745886,
"grad_norm": 0.0,
"learning_rate": 2.177330895795247e-05,
"loss": 0.0,
"step": 3750
},
{
"epoch": 1.3747714808043876,
"grad_norm": 0.0,
"learning_rate": 2.1751371115173676e-05,
"loss": 0.0,
"step": 3760
},
{
"epoch": 1.3784277879341864,
"grad_norm": 0.0,
"learning_rate": 2.1729433272394882e-05,
"loss": 0.0,
"step": 3770
},
{
"epoch": 1.3820840950639854,
"grad_norm": 0.0,
"learning_rate": 2.170749542961609e-05,
"loss": 0.0,
"step": 3780
},
{
"epoch": 1.3857404021937842,
"grad_norm": 0.0,
"learning_rate": 2.1685557586837294e-05,
"loss": 0.0,
"step": 3790
},
{
"epoch": 1.389396709323583,
"grad_norm": 0.0,
"learning_rate": 2.16636197440585e-05,
"loss": 0.0,
"step": 3800
},
{
"epoch": 1.393053016453382,
"grad_norm": 0.0,
"learning_rate": 2.1641681901279707e-05,
"loss": 0.0,
"step": 3810
},
{
"epoch": 1.3967093235831811,
"grad_norm": 0.0,
"learning_rate": 2.1619744058500913e-05,
"loss": 0.0,
"step": 3820
},
{
"epoch": 1.40036563071298,
"grad_norm": 0.0,
"learning_rate": 2.1597806215722122e-05,
"loss": 0.0,
"step": 3830
},
{
"epoch": 1.4040219378427787,
"grad_norm": 0.0,
"learning_rate": 2.157586837294333e-05,
"loss": 0.0,
"step": 3840
},
{
"epoch": 1.4076782449725778,
"grad_norm": 0.0,
"learning_rate": 2.1553930530164534e-05,
"loss": 0.0,
"step": 3850
},
{
"epoch": 1.4113345521023766,
"grad_norm": 0.0,
"learning_rate": 2.153199268738574e-05,
"loss": 0.0,
"step": 3860
},
{
"epoch": 1.4149908592321756,
"grad_norm": 0.0,
"learning_rate": 2.151005484460695e-05,
"loss": 0.0,
"step": 3870
},
{
"epoch": 1.4186471663619744,
"grad_norm": 0.0,
"learning_rate": 2.1488117001828156e-05,
"loss": 0.0,
"step": 3880
},
{
"epoch": 1.4223034734917732,
"grad_norm": 0.0,
"learning_rate": 2.146617915904936e-05,
"loss": 0.0,
"step": 3890
},
{
"epoch": 1.4259597806215722,
"grad_norm": 0.0,
"learning_rate": 2.1444241316270565e-05,
"loss": 0.0,
"step": 3900
},
{
"epoch": 1.429616087751371,
"grad_norm": 0.0,
"learning_rate": 2.142230347349177e-05,
"loss": 0.0,
"step": 3910
},
{
"epoch": 1.43327239488117,
"grad_norm": 0.0,
"learning_rate": 2.140036563071298e-05,
"loss": 0.0,
"step": 3920
},
{
"epoch": 1.436928702010969,
"grad_norm": 0.0,
"learning_rate": 2.1378427787934187e-05,
"loss": 0.0,
"step": 3930
},
{
"epoch": 1.4405850091407677,
"grad_norm": 0.0,
"learning_rate": 2.1356489945155393e-05,
"loss": 0.0,
"step": 3940
},
{
"epoch": 1.4442413162705667,
"grad_norm": 0.0,
"learning_rate": 2.13345521023766e-05,
"loss": 0.0,
"step": 3950
},
{
"epoch": 1.4478976234003658,
"grad_norm": 0.0,
"learning_rate": 2.131261425959781e-05,
"loss": 0.0,
"step": 3960
},
{
"epoch": 1.4515539305301646,
"grad_norm": 0.0,
"learning_rate": 2.1290676416819015e-05,
"loss": 0.0,
"step": 3970
},
{
"epoch": 1.4552102376599634,
"grad_norm": 0.0,
"learning_rate": 2.126873857404022e-05,
"loss": 0.0,
"step": 3980
},
{
"epoch": 1.4588665447897624,
"grad_norm": 0.0,
"learning_rate": 2.1246800731261424e-05,
"loss": 0.0,
"step": 3990
},
{
"epoch": 1.4625228519195612,
"grad_norm": 0.0,
"learning_rate": 2.1224862888482633e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 1.4661791590493602,
"grad_norm": 0.0,
"learning_rate": 2.120292504570384e-05,
"loss": 0.0,
"step": 4010
},
{
"epoch": 1.469835466179159,
"grad_norm": 0.0,
"learning_rate": 2.1180987202925046e-05,
"loss": 0.0,
"step": 4020
},
{
"epoch": 1.4734917733089579,
"grad_norm": 0.0,
"learning_rate": 2.1159049360146252e-05,
"loss": 0.0,
"step": 4030
},
{
"epoch": 1.477148080438757,
"grad_norm": 0.0,
"learning_rate": 2.1137111517367458e-05,
"loss": 0.0,
"step": 4040
},
{
"epoch": 1.4808043875685557,
"grad_norm": 0.0,
"learning_rate": 2.1115173674588667e-05,
"loss": 0.0,
"step": 4050
},
{
"epoch": 1.4844606946983547,
"grad_norm": 0.0,
"learning_rate": 2.1093235831809874e-05,
"loss": 0.0,
"step": 4060
},
{
"epoch": 1.4881170018281535,
"grad_norm": 0.0,
"learning_rate": 2.107129798903108e-05,
"loss": 0.0,
"step": 4070
},
{
"epoch": 1.4917733089579523,
"grad_norm": 0.0,
"learning_rate": 2.1049360146252286e-05,
"loss": 0.0,
"step": 4080
},
{
"epoch": 1.4954296160877514,
"grad_norm": 0.0,
"learning_rate": 2.1027422303473492e-05,
"loss": 0.0,
"step": 4090
},
{
"epoch": 1.4990859232175504,
"grad_norm": 0.0,
"learning_rate": 2.1005484460694698e-05,
"loss": 0.0,
"step": 4100
},
{
"epoch": 1.5027422303473492,
"grad_norm": 0.0,
"learning_rate": 2.0983546617915904e-05,
"loss": 0.0,
"step": 4110
},
{
"epoch": 1.506398537477148,
"grad_norm": 0.0,
"learning_rate": 2.096160877513711e-05,
"loss": 0.0,
"step": 4120
},
{
"epoch": 1.5100548446069468,
"grad_norm": 0.0,
"learning_rate": 2.093967093235832e-05,
"loss": 0.0,
"step": 4130
},
{
"epoch": 1.5137111517367459,
"grad_norm": 0.0,
"learning_rate": 2.0917733089579526e-05,
"loss": 0.0,
"step": 4140
},
{
"epoch": 1.517367458866545,
"grad_norm": 0.0,
"learning_rate": 2.0895795246800732e-05,
"loss": 0.0,
"step": 4150
},
{
"epoch": 1.5210237659963437,
"grad_norm": 0.0,
"learning_rate": 2.087385740402194e-05,
"loss": 0.0,
"step": 4160
},
{
"epoch": 1.5246800731261425,
"grad_norm": 0.0,
"learning_rate": 2.0851919561243145e-05,
"loss": 0.0,
"step": 4170
},
{
"epoch": 1.5283363802559415,
"grad_norm": 0.0,
"learning_rate": 2.0829981718464354e-05,
"loss": 0.0,
"step": 4180
},
{
"epoch": 1.5319926873857403,
"grad_norm": 0.0,
"learning_rate": 2.0808043875685557e-05,
"loss": 0.0,
"step": 4190
},
{
"epoch": 1.5356489945155394,
"grad_norm": 0.0,
"learning_rate": 2.0786106032906763e-05,
"loss": 0.0,
"step": 4200
},
{
"epoch": 1.5393053016453382,
"grad_norm": 0.0,
"learning_rate": 2.076416819012797e-05,
"loss": 0.0,
"step": 4210
},
{
"epoch": 1.542961608775137,
"grad_norm": 0.0,
"learning_rate": 2.074223034734918e-05,
"loss": 0.0,
"step": 4220
},
{
"epoch": 1.546617915904936,
"grad_norm": 0.0,
"learning_rate": 2.0720292504570385e-05,
"loss": 0.0,
"step": 4230
},
{
"epoch": 1.550274223034735,
"grad_norm": 0.0,
"learning_rate": 2.069835466179159e-05,
"loss": 0.0,
"step": 4240
},
{
"epoch": 1.5539305301645339,
"grad_norm": 0.0,
"learning_rate": 2.0676416819012797e-05,
"loss": 0.0,
"step": 4250
},
{
"epoch": 1.5575868372943327,
"grad_norm": 0.0,
"learning_rate": 2.0654478976234007e-05,
"loss": 0.0,
"step": 4260
},
{
"epoch": 1.5612431444241315,
"grad_norm": 0.0,
"learning_rate": 2.0632541133455213e-05,
"loss": 0.0,
"step": 4270
},
{
"epoch": 1.5648994515539305,
"grad_norm": 0.0,
"learning_rate": 2.061060329067642e-05,
"loss": 0.0,
"step": 4280
},
{
"epoch": 1.5685557586837295,
"grad_norm": 0.0,
"learning_rate": 2.058866544789762e-05,
"loss": 0.0,
"step": 4290
},
{
"epoch": 1.5722120658135283,
"grad_norm": 0.0,
"learning_rate": 2.0566727605118828e-05,
"loss": 0.0,
"step": 4300
},
{
"epoch": 1.5758683729433272,
"grad_norm": 0.0,
"learning_rate": 2.0544789762340037e-05,
"loss": 0.0,
"step": 4310
},
{
"epoch": 1.5795246800731262,
"grad_norm": 0.0,
"learning_rate": 2.0522851919561243e-05,
"loss": 0.0,
"step": 4320
},
{
"epoch": 1.583180987202925,
"grad_norm": 0.0,
"learning_rate": 2.050091407678245e-05,
"loss": 0.0,
"step": 4330
},
{
"epoch": 1.586837294332724,
"grad_norm": 0.0,
"learning_rate": 2.0478976234003656e-05,
"loss": 0.0,
"step": 4340
},
{
"epoch": 1.5904936014625228,
"grad_norm": 0.0,
"learning_rate": 2.0457038391224865e-05,
"loss": 0.0,
"step": 4350
},
{
"epoch": 1.5941499085923216,
"grad_norm": 0.0,
"learning_rate": 2.043510054844607e-05,
"loss": 0.0,
"step": 4360
},
{
"epoch": 1.5978062157221207,
"grad_norm": 0.0,
"learning_rate": 2.0413162705667278e-05,
"loss": 0.0,
"step": 4370
},
{
"epoch": 1.6014625228519197,
"grad_norm": 0.0,
"learning_rate": 2.0391224862888484e-05,
"loss": 0.0,
"step": 4380
},
{
"epoch": 1.6051188299817185,
"grad_norm": 0.0,
"learning_rate": 2.036928702010969e-05,
"loss": 0.0,
"step": 4390
},
{
"epoch": 1.6087751371115173,
"grad_norm": 0.0,
"learning_rate": 2.0347349177330896e-05,
"loss": 0.0,
"step": 4400
},
{
"epoch": 1.6124314442413161,
"grad_norm": 0.0,
"learning_rate": 2.0325411334552102e-05,
"loss": 0.0,
"step": 4410
},
{
"epoch": 1.6160877513711152,
"grad_norm": 0.0,
"learning_rate": 2.0303473491773308e-05,
"loss": 0.0,
"step": 4420
},
{
"epoch": 1.6197440585009142,
"grad_norm": 0.0,
"learning_rate": 2.0281535648994514e-05,
"loss": 0.0,
"step": 4430
},
{
"epoch": 1.623400365630713,
"grad_norm": 0.0,
"learning_rate": 2.0259597806215724e-05,
"loss": 0.0,
"step": 4440
},
{
"epoch": 1.6270566727605118,
"grad_norm": 0.0,
"learning_rate": 2.023765996343693e-05,
"loss": 0.0,
"step": 4450
},
{
"epoch": 1.6307129798903108,
"grad_norm": 0.0,
"learning_rate": 2.0215722120658136e-05,
"loss": 0.0,
"step": 4460
},
{
"epoch": 1.6343692870201096,
"grad_norm": 0.0,
"learning_rate": 2.0193784277879342e-05,
"loss": 0.0,
"step": 4470
},
{
"epoch": 1.6380255941499087,
"grad_norm": 0.0,
"learning_rate": 2.0171846435100552e-05,
"loss": 0.0,
"step": 4480
},
{
"epoch": 1.6416819012797075,
"grad_norm": 0.0,
"learning_rate": 2.0149908592321758e-05,
"loss": 0.0,
"step": 4490
},
{
"epoch": 1.6453382084095063,
"grad_norm": 0.0,
"learning_rate": 2.012797074954296e-05,
"loss": 0.0,
"step": 4500
},
{
"epoch": 1.6489945155393053,
"grad_norm": 0.0,
"learning_rate": 2.0106032906764167e-05,
"loss": 0.0,
"step": 4510
},
{
"epoch": 1.6526508226691043,
"grad_norm": 0.0,
"learning_rate": 2.0084095063985373e-05,
"loss": 0.0,
"step": 4520
},
{
"epoch": 1.6563071297989032,
"grad_norm": 0.0,
"learning_rate": 2.0062157221206583e-05,
"loss": 0.0,
"step": 4530
},
{
"epoch": 1.659963436928702,
"grad_norm": 0.0,
"learning_rate": 2.004021937842779e-05,
"loss": 0.0,
"step": 4540
},
{
"epoch": 1.6636197440585008,
"grad_norm": 0.0,
"learning_rate": 2.0018281535648995e-05,
"loss": 0.0,
"step": 4550
},
{
"epoch": 1.6672760511882998,
"grad_norm": 0.0,
"learning_rate": 1.99963436928702e-05,
"loss": 0.0,
"step": 4560
},
{
"epoch": 1.6709323583180988,
"grad_norm": 0.0,
"learning_rate": 1.997440585009141e-05,
"loss": 0.0,
"step": 4570
},
{
"epoch": 1.6745886654478976,
"grad_norm": 0.0,
"learning_rate": 1.9952468007312617e-05,
"loss": 0.0,
"step": 4580
},
{
"epoch": 1.6782449725776964,
"grad_norm": 0.0,
"learning_rate": 1.9930530164533823e-05,
"loss": 0.0,
"step": 4590
},
{
"epoch": 1.6819012797074955,
"grad_norm": 0.0,
"learning_rate": 1.9908592321755026e-05,
"loss": 0.0,
"step": 4600
},
{
"epoch": 1.6855575868372943,
"grad_norm": 0.0,
"learning_rate": 1.9886654478976235e-05,
"loss": 0.0,
"step": 4610
},
{
"epoch": 1.6892138939670933,
"grad_norm": 0.0,
"learning_rate": 1.986471663619744e-05,
"loss": 0.0,
"step": 4620
},
{
"epoch": 1.6928702010968921,
"grad_norm": 0.0,
"learning_rate": 1.9842778793418647e-05,
"loss": 0.0,
"step": 4630
},
{
"epoch": 1.696526508226691,
"grad_norm": 0.0,
"learning_rate": 1.9820840950639853e-05,
"loss": 0.0,
"step": 4640
},
{
"epoch": 1.70018281535649,
"grad_norm": 0.0,
"learning_rate": 1.979890310786106e-05,
"loss": 0.0,
"step": 4650
},
{
"epoch": 1.703839122486289,
"grad_norm": 0.0,
"learning_rate": 1.977696526508227e-05,
"loss": 0.0,
"step": 4660
},
{
"epoch": 1.7074954296160878,
"grad_norm": 0.0,
"learning_rate": 1.9755027422303475e-05,
"loss": 0.0,
"step": 4670
},
{
"epoch": 1.7111517367458866,
"grad_norm": 0.0,
"learning_rate": 1.973308957952468e-05,
"loss": 0.0,
"step": 4680
},
{
"epoch": 1.7148080438756854,
"grad_norm": 0.0,
"learning_rate": 1.9711151736745888e-05,
"loss": 0.0,
"step": 4690
},
{
"epoch": 1.7184643510054844,
"grad_norm": 0.0,
"learning_rate": 1.9689213893967094e-05,
"loss": 0.0,
"step": 4700
},
{
"epoch": 1.7221206581352835,
"grad_norm": 0.0,
"learning_rate": 1.96672760511883e-05,
"loss": 0.0,
"step": 4710
},
{
"epoch": 1.7257769652650823,
"grad_norm": 0.0,
"learning_rate": 1.9645338208409506e-05,
"loss": 0.0,
"step": 4720
},
{
"epoch": 1.729433272394881,
"grad_norm": 0.0,
"learning_rate": 1.9623400365630712e-05,
"loss": 0.0,
"step": 4730
},
{
"epoch": 1.7330895795246801,
"grad_norm": 0.0,
"learning_rate": 1.9601462522851918e-05,
"loss": 0.0,
"step": 4740
},
{
"epoch": 1.736745886654479,
"grad_norm": 0.0,
"learning_rate": 1.9579524680073128e-05,
"loss": 0.0,
"step": 4750
},
{
"epoch": 1.740402193784278,
"grad_norm": 0.0,
"learning_rate": 1.9557586837294334e-05,
"loss": 0.0,
"step": 4760
},
{
"epoch": 1.7440585009140768,
"grad_norm": 0.0,
"learning_rate": 1.953564899451554e-05,
"loss": 0.0,
"step": 4770
},
{
"epoch": 1.7477148080438756,
"grad_norm": 0.0,
"learning_rate": 1.9513711151736746e-05,
"loss": 0.0,
"step": 4780
},
{
"epoch": 1.7513711151736746,
"grad_norm": 0.0,
"learning_rate": 1.9491773308957956e-05,
"loss": 0.0,
"step": 4790
},
{
"epoch": 1.7550274223034736,
"grad_norm": 0.0,
"learning_rate": 1.946983546617916e-05,
"loss": 0.0,
"step": 4800
},
{
"epoch": 1.7586837294332724,
"grad_norm": 0.0,
"learning_rate": 1.9447897623400365e-05,
"loss": 0.0,
"step": 4810
},
{
"epoch": 1.7623400365630713,
"grad_norm": 0.0,
"learning_rate": 1.942595978062157e-05,
"loss": 0.0,
"step": 4820
},
{
"epoch": 1.76599634369287,
"grad_norm": 0.0,
"learning_rate": 1.940402193784278e-05,
"loss": 0.0,
"step": 4830
},
{
"epoch": 1.769652650822669,
"grad_norm": 0.0,
"learning_rate": 1.9382084095063986e-05,
"loss": 0.0,
"step": 4840
},
{
"epoch": 1.7733089579524681,
"grad_norm": 0.0,
"learning_rate": 1.9360146252285193e-05,
"loss": 0.0,
"step": 4850
},
{
"epoch": 1.776965265082267,
"grad_norm": 0.0,
"learning_rate": 1.93382084095064e-05,
"loss": 0.0,
"step": 4860
},
{
"epoch": 1.7806215722120657,
"grad_norm": 0.0,
"learning_rate": 1.9316270566727605e-05,
"loss": 0.0,
"step": 4870
},
{
"epoch": 1.7842778793418648,
"grad_norm": 0.0,
"learning_rate": 1.9294332723948814e-05,
"loss": 0.0,
"step": 4880
},
{
"epoch": 1.7879341864716636,
"grad_norm": 0.0,
"learning_rate": 1.927239488117002e-05,
"loss": 0.0,
"step": 4890
},
{
"epoch": 1.7915904936014626,
"grad_norm": 0.0,
"learning_rate": 1.9250457038391223e-05,
"loss": 0.0,
"step": 4900
},
{
"epoch": 1.7952468007312614,
"grad_norm": 0.0,
"learning_rate": 1.922851919561243e-05,
"loss": 0.0,
"step": 4910
},
{
"epoch": 1.7989031078610602,
"grad_norm": 0.0,
"learning_rate": 1.920658135283364e-05,
"loss": 0.0,
"step": 4920
},
{
"epoch": 1.8025594149908593,
"grad_norm": 0.0,
"learning_rate": 1.9184643510054845e-05,
"loss": 0.0,
"step": 4930
},
{
"epoch": 1.8062157221206583,
"grad_norm": 0.0,
"learning_rate": 1.916270566727605e-05,
"loss": 0.0,
"step": 4940
},
{
"epoch": 1.809872029250457,
"grad_norm": 0.0,
"learning_rate": 1.9140767824497257e-05,
"loss": 0.0,
"step": 4950
},
{
"epoch": 1.813528336380256,
"grad_norm": 0.0,
"learning_rate": 1.9118829981718467e-05,
"loss": 0.0,
"step": 4960
},
{
"epoch": 1.8171846435100547,
"grad_norm": 0.0,
"learning_rate": 1.9096892138939673e-05,
"loss": 0.0,
"step": 4970
},
{
"epoch": 1.8208409506398537,
"grad_norm": 0.0,
"learning_rate": 1.907495429616088e-05,
"loss": 0.0,
"step": 4980
},
{
"epoch": 1.8244972577696528,
"grad_norm": 0.0,
"learning_rate": 1.9053016453382085e-05,
"loss": 0.0,
"step": 4990
},
{
"epoch": 1.8281535648994516,
"grad_norm": 0.0,
"learning_rate": 1.9031078610603288e-05,
"loss": 0.0,
"step": 5000
},
{
"epoch": 1.8318098720292504,
"grad_norm": 0.0,
"learning_rate": 1.9009140767824498e-05,
"loss": 0.0,
"step": 5010
},
{
"epoch": 1.8354661791590492,
"grad_norm": 0.0,
"learning_rate": 1.8987202925045704e-05,
"loss": 0.0,
"step": 5020
},
{
"epoch": 1.8391224862888482,
"grad_norm": 0.0,
"learning_rate": 1.896526508226691e-05,
"loss": 0.0,
"step": 5030
},
{
"epoch": 1.8427787934186473,
"grad_norm": 0.0,
"learning_rate": 1.8943327239488116e-05,
"loss": 0.0,
"step": 5040
},
{
"epoch": 1.846435100548446,
"grad_norm": 0.0,
"learning_rate": 1.8921389396709326e-05,
"loss": 0.0,
"step": 5050
},
{
"epoch": 1.8500914076782449,
"grad_norm": 0.0,
"learning_rate": 1.8899451553930532e-05,
"loss": 0.0,
"step": 5060
},
{
"epoch": 1.853747714808044,
"grad_norm": 0.0,
"learning_rate": 1.8877513711151738e-05,
"loss": 0.0,
"step": 5070
},
{
"epoch": 1.857404021937843,
"grad_norm": 0.0,
"learning_rate": 1.8855575868372944e-05,
"loss": 0.0,
"step": 5080
},
{
"epoch": 1.8610603290676417,
"grad_norm": 0.0,
"learning_rate": 1.8833638025594154e-05,
"loss": 0.0,
"step": 5090
},
{
"epoch": 1.8647166361974405,
"grad_norm": 0.0,
"learning_rate": 1.8811700182815356e-05,
"loss": 0.0,
"step": 5100
},
{
"epoch": 1.8683729433272394,
"grad_norm": 0.0,
"learning_rate": 1.8789762340036562e-05,
"loss": 0.0,
"step": 5110
},
{
"epoch": 1.8720292504570384,
"grad_norm": 0.0,
"learning_rate": 1.876782449725777e-05,
"loss": 0.0,
"step": 5120
},
{
"epoch": 1.8756855575868374,
"grad_norm": 0.0,
"learning_rate": 1.8745886654478975e-05,
"loss": 0.0,
"step": 5130
},
{
"epoch": 1.8793418647166362,
"grad_norm": 0.0,
"learning_rate": 1.8723948811700184e-05,
"loss": 0.0,
"step": 5140
},
{
"epoch": 1.882998171846435,
"grad_norm": 0.0,
"learning_rate": 1.870201096892139e-05,
"loss": 0.0,
"step": 5150
},
{
"epoch": 1.8866544789762338,
"grad_norm": 0.0,
"learning_rate": 1.8680073126142597e-05,
"loss": 0.0,
"step": 5160
},
{
"epoch": 1.8903107861060329,
"grad_norm": 0.0,
"learning_rate": 1.8658135283363803e-05,
"loss": 0.0,
"step": 5170
},
{
"epoch": 1.893967093235832,
"grad_norm": 0.0,
"learning_rate": 1.8636197440585012e-05,
"loss": 0.0,
"step": 5180
},
{
"epoch": 1.8976234003656307,
"grad_norm": 0.0,
"learning_rate": 1.861425959780622e-05,
"loss": 0.0,
"step": 5190
},
{
"epoch": 1.9012797074954295,
"grad_norm": 0.0,
"learning_rate": 1.859232175502742e-05,
"loss": 0.0,
"step": 5200
},
{
"epoch": 1.9049360146252285,
"grad_norm": 0.0,
"learning_rate": 1.8570383912248627e-05,
"loss": 0.0,
"step": 5210
},
{
"epoch": 1.9085923217550276,
"grad_norm": 0.0,
"learning_rate": 1.8548446069469833e-05,
"loss": 0.0,
"step": 5220
},
{
"epoch": 1.9122486288848264,
"grad_norm": 0.0,
"learning_rate": 1.8526508226691043e-05,
"loss": 0.0,
"step": 5230
},
{
"epoch": 1.9159049360146252,
"grad_norm": 0.0,
"learning_rate": 1.850457038391225e-05,
"loss": 0.0,
"step": 5240
},
{
"epoch": 1.919561243144424,
"grad_norm": 0.0,
"learning_rate": 1.8482632541133455e-05,
"loss": 0.0,
"step": 5250
},
{
"epoch": 1.923217550274223,
"grad_norm": 0.0,
"learning_rate": 1.846069469835466e-05,
"loss": 0.0,
"step": 5260
},
{
"epoch": 1.926873857404022,
"grad_norm": 0.0,
"learning_rate": 1.843875685557587e-05,
"loss": 0.0,
"step": 5270
},
{
"epoch": 1.9305301645338209,
"grad_norm": 0.0,
"learning_rate": 1.8416819012797077e-05,
"loss": 0.0,
"step": 5280
},
{
"epoch": 1.9341864716636197,
"grad_norm": 0.0,
"learning_rate": 1.8394881170018283e-05,
"loss": 0.0,
"step": 5290
},
{
"epoch": 1.9378427787934185,
"grad_norm": 0.0,
"learning_rate": 1.8372943327239486e-05,
"loss": 0.0,
"step": 5300
},
{
"epoch": 1.9414990859232175,
"grad_norm": 0.0,
"learning_rate": 1.8351005484460695e-05,
"loss": 0.0,
"step": 5310
},
{
"epoch": 1.9451553930530165,
"grad_norm": 0.0,
"learning_rate": 1.83290676416819e-05,
"loss": 0.0,
"step": 5320
},
{
"epoch": 1.9488117001828154,
"grad_norm": 0.0,
"learning_rate": 1.8307129798903108e-05,
"loss": 0.0,
"step": 5330
},
{
"epoch": 1.9524680073126142,
"grad_norm": 0.0,
"learning_rate": 1.8285191956124314e-05,
"loss": 0.0,
"step": 5340
},
{
"epoch": 1.9561243144424132,
"grad_norm": 0.0,
"learning_rate": 1.826325411334552e-05,
"loss": 0.0,
"step": 5350
},
{
"epoch": 1.9597806215722122,
"grad_norm": 0.0,
"learning_rate": 1.824131627056673e-05,
"loss": 0.0,
"step": 5360
},
{
"epoch": 1.963436928702011,
"grad_norm": 0.0,
"learning_rate": 1.8219378427787936e-05,
"loss": 0.0,
"step": 5370
},
{
"epoch": 1.9670932358318098,
"grad_norm": 0.0,
"learning_rate": 1.8197440585009142e-05,
"loss": 0.0,
"step": 5380
},
{
"epoch": 1.9707495429616086,
"grad_norm": 0.0,
"learning_rate": 1.8175502742230348e-05,
"loss": 0.0,
"step": 5390
},
{
"epoch": 1.9744058500914077,
"grad_norm": 0.0,
"learning_rate": 1.8153564899451554e-05,
"loss": 0.0,
"step": 5400
},
{
"epoch": 1.9780621572212067,
"grad_norm": 0.0,
"learning_rate": 1.813162705667276e-05,
"loss": 0.0,
"step": 5410
},
{
"epoch": 1.9817184643510055,
"grad_norm": 0.0,
"learning_rate": 1.8109689213893966e-05,
"loss": 0.0,
"step": 5420
},
{
"epoch": 1.9853747714808043,
"grad_norm": 0.0,
"learning_rate": 1.8087751371115173e-05,
"loss": 0.0,
"step": 5430
},
{
"epoch": 1.9890310786106031,
"grad_norm": 0.0,
"learning_rate": 1.8065813528336382e-05,
"loss": 0.0,
"step": 5440
},
{
"epoch": 1.9926873857404022,
"grad_norm": 0.0,
"learning_rate": 1.8043875685557588e-05,
"loss": 0.0,
"step": 5450
},
{
"epoch": 1.9963436928702012,
"grad_norm": 0.0,
"learning_rate": 1.8021937842778794e-05,
"loss": 0.0,
"step": 5460
},
{
"epoch": 2.0,
"grad_norm": 0.0,
"learning_rate": 1.8e-05,
"loss": 0.0,
"step": 5470
},
{
"epoch": 2.003656307129799,
"grad_norm": 0.0,
"learning_rate": 1.7978062157221207e-05,
"loss": 0.0,
"step": 5480
},
{
"epoch": 2.0073126142595976,
"grad_norm": 0.0,
"learning_rate": 1.7956124314442416e-05,
"loss": 0.0,
"step": 5490
},
{
"epoch": 2.010968921389397,
"grad_norm": 0.0,
"learning_rate": 1.7934186471663622e-05,
"loss": 0.0,
"step": 5500
},
{
"epoch": 2.0146252285191957,
"grad_norm": 0.0,
"learning_rate": 1.7912248628884825e-05,
"loss": 0.0,
"step": 5510
},
{
"epoch": 2.0182815356489945,
"grad_norm": 0.0,
"learning_rate": 1.789031078610603e-05,
"loss": 0.0,
"step": 5520
},
{
"epoch": 2.0219378427787933,
"grad_norm": 0.0,
"learning_rate": 1.786837294332724e-05,
"loss": 0.0,
"step": 5530
},
{
"epoch": 2.025594149908592,
"grad_norm": 0.0,
"learning_rate": 1.7846435100548447e-05,
"loss": 0.0,
"step": 5540
},
{
"epoch": 2.0292504570383914,
"grad_norm": 0.0,
"learning_rate": 1.7824497257769653e-05,
"loss": 0.0,
"step": 5550
},
{
"epoch": 2.03290676416819,
"grad_norm": 0.0,
"learning_rate": 1.780255941499086e-05,
"loss": 0.0,
"step": 5560
},
{
"epoch": 2.036563071297989,
"grad_norm": 0.0,
"learning_rate": 1.7780621572212065e-05,
"loss": 0.0,
"step": 5570
},
{
"epoch": 2.0402193784277878,
"grad_norm": 0.0,
"learning_rate": 1.7758683729433275e-05,
"loss": 0.0,
"step": 5580
},
{
"epoch": 2.043875685557587,
"grad_norm": 0.0,
"learning_rate": 1.773674588665448e-05,
"loss": 0.0,
"step": 5590
},
{
"epoch": 2.047531992687386,
"grad_norm": 0.0,
"learning_rate": 1.7714808043875687e-05,
"loss": 0.0,
"step": 5600
},
{
"epoch": 2.0511882998171846,
"grad_norm": 0.0,
"learning_rate": 1.769287020109689e-05,
"loss": 0.0,
"step": 5610
},
{
"epoch": 2.0548446069469835,
"grad_norm": 0.0,
"learning_rate": 1.76709323583181e-05,
"loss": 0.0,
"step": 5620
},
{
"epoch": 2.0585009140767823,
"grad_norm": 0.0,
"learning_rate": 1.7648994515539305e-05,
"loss": 0.0,
"step": 5630
},
{
"epoch": 2.0621572212065815,
"grad_norm": 0.0,
"learning_rate": 1.762705667276051e-05,
"loss": 0.0,
"step": 5640
},
{
"epoch": 2.0658135283363803,
"grad_norm": 0.0,
"learning_rate": 1.7605118829981718e-05,
"loss": 0.0,
"step": 5650
},
{
"epoch": 2.069469835466179,
"grad_norm": 0.0,
"learning_rate": 1.7583180987202927e-05,
"loss": 0.0,
"step": 5660
},
{
"epoch": 2.073126142595978,
"grad_norm": 0.0,
"learning_rate": 1.7561243144424133e-05,
"loss": 0.0,
"step": 5670
},
{
"epoch": 2.076782449725777,
"grad_norm": 0.0,
"learning_rate": 1.753930530164534e-05,
"loss": 0.0,
"step": 5680
},
{
"epoch": 2.080438756855576,
"grad_norm": 0.0,
"learning_rate": 1.7517367458866546e-05,
"loss": 0.0,
"step": 5690
},
{
"epoch": 2.084095063985375,
"grad_norm": 0.0,
"learning_rate": 1.7495429616087752e-05,
"loss": 0.0,
"step": 5700
},
{
"epoch": 2.0877513711151736,
"grad_norm": 0.0,
"learning_rate": 1.7473491773308958e-05,
"loss": 0.0,
"step": 5710
},
{
"epoch": 2.0914076782449724,
"grad_norm": 0.0,
"learning_rate": 1.7451553930530164e-05,
"loss": 0.0,
"step": 5720
},
{
"epoch": 2.0950639853747717,
"grad_norm": 0.0,
"learning_rate": 1.742961608775137e-05,
"loss": 0.0001,
"step": 5730
},
{
"epoch": 2.0987202925045705,
"grad_norm": 0.24371479451656342,
"learning_rate": 1.7407678244972576e-05,
"loss": 0.0011,
"step": 5740
},
{
"epoch": 2.1023765996343693,
"grad_norm": 0.0,
"learning_rate": 1.7385740402193786e-05,
"loss": 0.0006,
"step": 5750
},
{
"epoch": 2.106032906764168,
"grad_norm": 0.0,
"learning_rate": 1.7363802559414992e-05,
"loss": 0.0003,
"step": 5760
},
{
"epoch": 2.109689213893967,
"grad_norm": 0.0,
"learning_rate": 1.7341864716636198e-05,
"loss": 0.0005,
"step": 5770
},
{
"epoch": 2.113345521023766,
"grad_norm": 0.0,
"learning_rate": 1.7319926873857404e-05,
"loss": 0.0,
"step": 5780
},
{
"epoch": 2.117001828153565,
"grad_norm": 0.0,
"learning_rate": 1.7297989031078614e-05,
"loss": 0.0002,
"step": 5790
},
{
"epoch": 2.1206581352833638,
"grad_norm": 0.0,
"learning_rate": 1.727605118829982e-05,
"loss": 0.0,
"step": 5800
},
{
"epoch": 2.1243144424131626,
"grad_norm": 0.0,
"learning_rate": 1.7254113345521023e-05,
"loss": 0.0,
"step": 5810
},
{
"epoch": 2.1279707495429614,
"grad_norm": 0.0,
"learning_rate": 1.723217550274223e-05,
"loss": 0.0,
"step": 5820
},
{
"epoch": 2.1316270566727606,
"grad_norm": 0.0,
"learning_rate": 1.7210237659963435e-05,
"loss": 0.0,
"step": 5830
},
{
"epoch": 2.1352833638025595,
"grad_norm": 0.0,
"learning_rate": 1.7188299817184645e-05,
"loss": 0.0,
"step": 5840
},
{
"epoch": 2.1389396709323583,
"grad_norm": 0.0,
"learning_rate": 1.716636197440585e-05,
"loss": 0.0,
"step": 5850
},
{
"epoch": 2.142595978062157,
"grad_norm": 0.0,
"learning_rate": 1.7144424131627057e-05,
"loss": 0.0,
"step": 5860
},
{
"epoch": 2.1462522851919563,
"grad_norm": 0.0,
"learning_rate": 1.7122486288848263e-05,
"loss": 0.0,
"step": 5870
},
{
"epoch": 2.149908592321755,
"grad_norm": 0.0,
"learning_rate": 1.7100548446069473e-05,
"loss": 0.0,
"step": 5880
},
{
"epoch": 2.153564899451554,
"grad_norm": 0.0,
"learning_rate": 1.707861060329068e-05,
"loss": 0.0,
"step": 5890
},
{
"epoch": 2.1572212065813527,
"grad_norm": 0.0,
"learning_rate": 1.7056672760511885e-05,
"loss": 0.0,
"step": 5900
},
{
"epoch": 2.1608775137111516,
"grad_norm": 0.0,
"learning_rate": 1.7034734917733088e-05,
"loss": 0.0,
"step": 5910
},
{
"epoch": 2.164533820840951,
"grad_norm": 0.0,
"learning_rate": 1.7012797074954294e-05,
"loss": 0.0,
"step": 5920
},
{
"epoch": 2.1681901279707496,
"grad_norm": 0.0,
"learning_rate": 1.6990859232175503e-05,
"loss": 0.0,
"step": 5930
},
{
"epoch": 2.1718464351005484,
"grad_norm": 0.0,
"learning_rate": 1.696892138939671e-05,
"loss": 0.0,
"step": 5940
},
{
"epoch": 2.1755027422303472,
"grad_norm": 0.0,
"learning_rate": 1.6946983546617916e-05,
"loss": 0.0,
"step": 5950
},
{
"epoch": 2.1791590493601465,
"grad_norm": 0.0,
"learning_rate": 1.6925045703839122e-05,
"loss": 0.0,
"step": 5960
},
{
"epoch": 2.1828153564899453,
"grad_norm": 0.0,
"learning_rate": 1.690310786106033e-05,
"loss": 0.0,
"step": 5970
},
{
"epoch": 2.186471663619744,
"grad_norm": 0.0,
"learning_rate": 1.6881170018281537e-05,
"loss": 0.0,
"step": 5980
},
{
"epoch": 2.190127970749543,
"grad_norm": 0.0,
"learning_rate": 1.6859232175502744e-05,
"loss": 0.0,
"step": 5990
},
{
"epoch": 2.1937842778793417,
"grad_norm": 0.0,
"learning_rate": 1.683729433272395e-05,
"loss": 0.0,
"step": 6000
},
{
"epoch": 2.197440585009141,
"grad_norm": 0.0,
"learning_rate": 1.6815356489945156e-05,
"loss": 0.0,
"step": 6010
},
{
"epoch": 2.2010968921389398,
"grad_norm": 0.0,
"learning_rate": 1.6793418647166362e-05,
"loss": 0.0,
"step": 6020
},
{
"epoch": 2.2047531992687386,
"grad_norm": 0.0,
"learning_rate": 1.6771480804387568e-05,
"loss": 0.0,
"step": 6030
},
{
"epoch": 2.2084095063985374,
"grad_norm": 0.0,
"learning_rate": 1.6749542961608774e-05,
"loss": 0.0,
"step": 6040
},
{
"epoch": 2.212065813528336,
"grad_norm": 0.0,
"learning_rate": 1.672760511882998e-05,
"loss": 0.0,
"step": 6050
},
{
"epoch": 2.2157221206581355,
"grad_norm": 0.0,
"learning_rate": 1.670566727605119e-05,
"loss": 0.0,
"step": 6060
},
{
"epoch": 2.2193784277879343,
"grad_norm": 0.0,
"learning_rate": 1.6683729433272396e-05,
"loss": 0.0,
"step": 6070
},
{
"epoch": 2.223034734917733,
"grad_norm": 0.0,
"learning_rate": 1.6661791590493602e-05,
"loss": 0.0,
"step": 6080
},
{
"epoch": 2.226691042047532,
"grad_norm": 0.0,
"learning_rate": 1.6639853747714808e-05,
"loss": 0.0,
"step": 6090
},
{
"epoch": 2.2303473491773307,
"grad_norm": 0.0,
"learning_rate": 1.6617915904936018e-05,
"loss": 0.0,
"step": 6100
},
{
"epoch": 2.23400365630713,
"grad_norm": 0.0,
"learning_rate": 1.659597806215722e-05,
"loss": 0.0,
"step": 6110
},
{
"epoch": 2.2376599634369287,
"grad_norm": 0.0,
"learning_rate": 1.6574040219378427e-05,
"loss": 0.0,
"step": 6120
},
{
"epoch": 2.2413162705667276,
"grad_norm": 0.0,
"learning_rate": 1.6552102376599633e-05,
"loss": 0.0,
"step": 6130
},
{
"epoch": 2.2449725776965264,
"grad_norm": 0.0,
"learning_rate": 1.6530164533820842e-05,
"loss": 0.0,
"step": 6140
},
{
"epoch": 2.2486288848263256,
"grad_norm": 0.0,
"learning_rate": 1.650822669104205e-05,
"loss": 0.0,
"step": 6150
},
{
"epoch": 2.2522851919561244,
"grad_norm": 0.0,
"learning_rate": 1.6486288848263255e-05,
"loss": 0.0,
"step": 6160
},
{
"epoch": 2.2559414990859232,
"grad_norm": 0.0,
"learning_rate": 1.646435100548446e-05,
"loss": 0.0,
"step": 6170
},
{
"epoch": 2.259597806215722,
"grad_norm": 0.0,
"learning_rate": 1.6442413162705667e-05,
"loss": 0.0,
"step": 6180
},
{
"epoch": 2.263254113345521,
"grad_norm": 0.0,
"learning_rate": 1.6420475319926876e-05,
"loss": 0.0,
"step": 6190
},
{
"epoch": 2.26691042047532,
"grad_norm": 0.0,
"learning_rate": 1.6398537477148083e-05,
"loss": 0.0,
"step": 6200
},
{
"epoch": 2.270566727605119,
"grad_norm": 0.0,
"learning_rate": 1.6376599634369285e-05,
"loss": 0.0,
"step": 6210
},
{
"epoch": 2.2742230347349177,
"grad_norm": 0.0,
"learning_rate": 1.635466179159049e-05,
"loss": 0.0,
"step": 6220
},
{
"epoch": 2.2778793418647165,
"grad_norm": 0.0,
"learning_rate": 1.63327239488117e-05,
"loss": 0.0,
"step": 6230
},
{
"epoch": 2.2815356489945158,
"grad_norm": 0.0,
"learning_rate": 1.6310786106032907e-05,
"loss": 0.0,
"step": 6240
},
{
"epoch": 2.2851919561243146,
"grad_norm": 0.0,
"learning_rate": 1.6288848263254113e-05,
"loss": 0.0,
"step": 6250
},
{
"epoch": 2.2888482632541134,
"grad_norm": 0.0,
"learning_rate": 1.626691042047532e-05,
"loss": 0.0,
"step": 6260
},
{
"epoch": 2.292504570383912,
"grad_norm": 0.0,
"learning_rate": 1.624497257769653e-05,
"loss": 0.0,
"step": 6270
},
{
"epoch": 2.296160877513711,
"grad_norm": 0.0,
"learning_rate": 1.6223034734917735e-05,
"loss": 0.0,
"step": 6280
},
{
"epoch": 2.2998171846435103,
"grad_norm": 0.0,
"learning_rate": 1.620109689213894e-05,
"loss": 0.0,
"step": 6290
},
{
"epoch": 2.303473491773309,
"grad_norm": 0.0,
"learning_rate": 1.6179159049360147e-05,
"loss": 0.0,
"step": 6300
},
{
"epoch": 2.307129798903108,
"grad_norm": 0.0,
"learning_rate": 1.615722120658135e-05,
"loss": 0.0,
"step": 6310
},
{
"epoch": 2.3107861060329067,
"grad_norm": 0.0,
"learning_rate": 1.613528336380256e-05,
"loss": 0.0,
"step": 6320
},
{
"epoch": 2.3144424131627055,
"grad_norm": 0.0,
"learning_rate": 1.6113345521023766e-05,
"loss": 0.0,
"step": 6330
},
{
"epoch": 2.3180987202925047,
"grad_norm": 0.0,
"learning_rate": 1.6091407678244972e-05,
"loss": 0.0,
"step": 6340
},
{
"epoch": 2.3217550274223036,
"grad_norm": 0.0,
"learning_rate": 1.6069469835466178e-05,
"loss": 0.0,
"step": 6350
},
{
"epoch": 2.3254113345521024,
"grad_norm": 0.0,
"learning_rate": 1.6047531992687388e-05,
"loss": 0.0,
"step": 6360
},
{
"epoch": 2.329067641681901,
"grad_norm": 0.0,
"learning_rate": 1.6025594149908594e-05,
"loss": 0.0,
"step": 6370
},
{
"epoch": 2.3327239488117,
"grad_norm": 0.0,
"learning_rate": 1.60036563071298e-05,
"loss": 0.0,
"step": 6380
},
{
"epoch": 2.3363802559414992,
"grad_norm": 0.0,
"learning_rate": 1.5981718464351006e-05,
"loss": 0.0,
"step": 6390
},
{
"epoch": 2.340036563071298,
"grad_norm": 0.0,
"learning_rate": 1.5959780621572212e-05,
"loss": 0.0,
"step": 6400
},
{
"epoch": 2.343692870201097,
"grad_norm": 0.0,
"learning_rate": 1.593784277879342e-05,
"loss": 0.0,
"step": 6410
},
{
"epoch": 2.3473491773308957,
"grad_norm": 0.0,
"learning_rate": 1.5915904936014625e-05,
"loss": 0.0,
"step": 6420
},
{
"epoch": 2.3510054844606945,
"grad_norm": 0.0,
"learning_rate": 1.589396709323583e-05,
"loss": 0.0,
"step": 6430
},
{
"epoch": 2.3546617915904937,
"grad_norm": 0.0,
"learning_rate": 1.5872029250457037e-05,
"loss": 0.0,
"step": 6440
},
{
"epoch": 2.3583180987202925,
"grad_norm": 0.0,
"learning_rate": 1.5850091407678246e-05,
"loss": 0.0,
"step": 6450
},
{
"epoch": 2.3619744058500913,
"grad_norm": 0.0,
"learning_rate": 1.5828153564899452e-05,
"loss": 0.0,
"step": 6460
},
{
"epoch": 2.36563071297989,
"grad_norm": 0.0,
"learning_rate": 1.580621572212066e-05,
"loss": 0.0,
"step": 6470
},
{
"epoch": 2.3692870201096894,
"grad_norm": 0.0,
"learning_rate": 1.5784277879341865e-05,
"loss": 0.0,
"step": 6480
},
{
"epoch": 2.372943327239488,
"grad_norm": 0.0,
"learning_rate": 1.5762340036563074e-05,
"loss": 0.0,
"step": 6490
},
{
"epoch": 2.376599634369287,
"grad_norm": 0.0,
"learning_rate": 1.574040219378428e-05,
"loss": 0.0,
"step": 6500
},
{
"epoch": 2.380255941499086,
"grad_norm": 0.0,
"learning_rate": 1.5718464351005483e-05,
"loss": 0.0,
"step": 6510
},
{
"epoch": 2.383912248628885,
"grad_norm": 0.0,
"learning_rate": 1.569652650822669e-05,
"loss": 0.0,
"step": 6520
},
{
"epoch": 2.387568555758684,
"grad_norm": 0.0,
"learning_rate": 1.5674588665447895e-05,
"loss": 0.0,
"step": 6530
},
{
"epoch": 2.3912248628884827,
"grad_norm": 0.0,
"learning_rate": 1.5652650822669105e-05,
"loss": 0.0,
"step": 6540
},
{
"epoch": 2.3948811700182815,
"grad_norm": 0.0,
"learning_rate": 1.563071297989031e-05,
"loss": 0.0,
"step": 6550
},
{
"epoch": 2.3985374771480803,
"grad_norm": 0.0,
"learning_rate": 1.5608775137111517e-05,
"loss": 0.0,
"step": 6560
},
{
"epoch": 2.4021937842778796,
"grad_norm": 0.0,
"learning_rate": 1.5586837294332723e-05,
"loss": 0.0,
"step": 6570
},
{
"epoch": 2.4058500914076784,
"grad_norm": 0.0,
"learning_rate": 1.5564899451553933e-05,
"loss": 0.0,
"step": 6580
},
{
"epoch": 2.409506398537477,
"grad_norm": 0.0,
"learning_rate": 1.554296160877514e-05,
"loss": 0.0,
"step": 6590
},
{
"epoch": 2.413162705667276,
"grad_norm": 0.0,
"learning_rate": 1.5521023765996345e-05,
"loss": 0.0,
"step": 6600
},
{
"epoch": 2.416819012797075,
"grad_norm": 0.0,
"learning_rate": 1.549908592321755e-05,
"loss": 0.0,
"step": 6610
},
{
"epoch": 2.420475319926874,
"grad_norm": 0.0,
"learning_rate": 1.5477148080438757e-05,
"loss": 0.0,
"step": 6620
},
{
"epoch": 2.424131627056673,
"grad_norm": 0.0,
"learning_rate": 1.5455210237659964e-05,
"loss": 0.0,
"step": 6630
},
{
"epoch": 2.4277879341864717,
"grad_norm": 0.0,
"learning_rate": 1.543327239488117e-05,
"loss": 0.0,
"step": 6640
},
{
"epoch": 2.4314442413162705,
"grad_norm": 0.0,
"learning_rate": 1.5411334552102376e-05,
"loss": 0.0,
"step": 6650
},
{
"epoch": 2.4351005484460693,
"grad_norm": 0.0,
"learning_rate": 1.5389396709323582e-05,
"loss": 0.0,
"step": 6660
},
{
"epoch": 2.4387568555758685,
"grad_norm": 0.0,
"learning_rate": 1.536745886654479e-05,
"loss": 0.0,
"step": 6670
},
{
"epoch": 2.4424131627056673,
"grad_norm": 0.0,
"learning_rate": 1.5345521023765998e-05,
"loss": 0.0,
"step": 6680
},
{
"epoch": 2.446069469835466,
"grad_norm": 0.0,
"learning_rate": 1.5323583180987204e-05,
"loss": 0.0,
"step": 6690
},
{
"epoch": 2.449725776965265,
"grad_norm": 0.0,
"learning_rate": 1.530164533820841e-05,
"loss": 0.0,
"step": 6700
},
{
"epoch": 2.4533820840950638,
"grad_norm": 0.0,
"learning_rate": 1.527970749542962e-05,
"loss": 0.0,
"step": 6710
},
{
"epoch": 2.457038391224863,
"grad_norm": 0.0,
"learning_rate": 1.5257769652650824e-05,
"loss": 0.0,
"step": 6720
},
{
"epoch": 2.460694698354662,
"grad_norm": 0.0,
"learning_rate": 1.523583180987203e-05,
"loss": 0.0,
"step": 6730
},
{
"epoch": 2.4643510054844606,
"grad_norm": 0.0,
"learning_rate": 1.5213893967093235e-05,
"loss": 0.0,
"step": 6740
},
{
"epoch": 2.4680073126142594,
"grad_norm": 0.0,
"learning_rate": 1.519195612431444e-05,
"loss": 0.0,
"step": 6750
},
{
"epoch": 2.4716636197440587,
"grad_norm": 0.0,
"learning_rate": 1.517001828153565e-05,
"loss": 0.0,
"step": 6760
},
{
"epoch": 2.4753199268738575,
"grad_norm": 0.0,
"learning_rate": 1.5148080438756856e-05,
"loss": 0.0,
"step": 6770
},
{
"epoch": 2.4789762340036563,
"grad_norm": 0.0,
"learning_rate": 1.5126142595978063e-05,
"loss": 0.0,
"step": 6780
},
{
"epoch": 2.482632541133455,
"grad_norm": 0.0,
"learning_rate": 1.5104204753199267e-05,
"loss": 0.0,
"step": 6790
},
{
"epoch": 2.4862888482632544,
"grad_norm": 0.0,
"learning_rate": 1.5082266910420477e-05,
"loss": 0.0,
"step": 6800
},
{
"epoch": 2.489945155393053,
"grad_norm": 0.0,
"learning_rate": 1.5060329067641683e-05,
"loss": 0.0,
"step": 6810
},
{
"epoch": 2.493601462522852,
"grad_norm": 0.0,
"learning_rate": 1.5038391224862889e-05,
"loss": 0.0,
"step": 6820
},
{
"epoch": 2.497257769652651,
"grad_norm": 0.0,
"learning_rate": 1.5016453382084095e-05,
"loss": 0.0,
"step": 6830
},
{
"epoch": 2.5009140767824496,
"grad_norm": 0.0,
"learning_rate": 1.4994515539305301e-05,
"loss": 0.0,
"step": 6840
},
{
"epoch": 2.504570383912249,
"grad_norm": 0.0,
"learning_rate": 1.4972577696526509e-05,
"loss": 0.0,
"step": 6850
},
{
"epoch": 2.5082266910420477,
"grad_norm": 0.0,
"learning_rate": 1.4950639853747715e-05,
"loss": 0.0,
"step": 6860
},
{
"epoch": 2.5118829981718465,
"grad_norm": 0.0,
"learning_rate": 1.4928702010968921e-05,
"loss": 0.0,
"step": 6870
},
{
"epoch": 2.5155393053016453,
"grad_norm": 0.0,
"learning_rate": 1.4906764168190129e-05,
"loss": 0.0,
"step": 6880
},
{
"epoch": 2.519195612431444,
"grad_norm": 0.0,
"learning_rate": 1.4884826325411333e-05,
"loss": 0.0,
"step": 6890
},
{
"epoch": 2.5228519195612433,
"grad_norm": 0.0,
"learning_rate": 1.4862888482632541e-05,
"loss": 0.0,
"step": 6900
},
{
"epoch": 2.526508226691042,
"grad_norm": 0.0,
"learning_rate": 1.4840950639853747e-05,
"loss": 0.0,
"step": 6910
},
{
"epoch": 2.530164533820841,
"grad_norm": 0.0,
"learning_rate": 1.4819012797074955e-05,
"loss": 0.0,
"step": 6920
},
{
"epoch": 2.5338208409506398,
"grad_norm": 0.0,
"learning_rate": 1.4797074954296161e-05,
"loss": 0.0,
"step": 6930
},
{
"epoch": 2.5374771480804386,
"grad_norm": 0.0,
"learning_rate": 1.4775137111517368e-05,
"loss": 0.0,
"step": 6940
},
{
"epoch": 2.541133455210238,
"grad_norm": 0.0,
"learning_rate": 1.4753199268738574e-05,
"loss": 0.0,
"step": 6950
},
{
"epoch": 2.5447897623400366,
"grad_norm": 0.0,
"learning_rate": 1.4731261425959782e-05,
"loss": 0.0,
"step": 6960
},
{
"epoch": 2.5484460694698354,
"grad_norm": 0.0,
"learning_rate": 1.4709323583180988e-05,
"loss": 0.0,
"step": 6970
},
{
"epoch": 2.5521023765996342,
"grad_norm": 0.0,
"learning_rate": 1.4687385740402196e-05,
"loss": 0.0,
"step": 6980
},
{
"epoch": 2.555758683729433,
"grad_norm": 0.0,
"learning_rate": 1.46654478976234e-05,
"loss": 0.0,
"step": 6990
},
{
"epoch": 2.5594149908592323,
"grad_norm": 0.0,
"learning_rate": 1.4643510054844606e-05,
"loss": 0.0,
"step": 7000
},
{
"epoch": 2.563071297989031,
"grad_norm": 0.0,
"learning_rate": 1.4621572212065814e-05,
"loss": 0.0,
"step": 7010
},
{
"epoch": 2.56672760511883,
"grad_norm": 0.0,
"learning_rate": 1.459963436928702e-05,
"loss": 0.0,
"step": 7020
},
{
"epoch": 2.5703839122486287,
"grad_norm": 0.0,
"learning_rate": 1.4577696526508228e-05,
"loss": 0.0,
"step": 7030
},
{
"epoch": 2.5740402193784275,
"grad_norm": 0.0,
"learning_rate": 1.4555758683729432e-05,
"loss": 0.0,
"step": 7040
},
{
"epoch": 2.577696526508227,
"grad_norm": 0.0,
"learning_rate": 1.453382084095064e-05,
"loss": 0.0,
"step": 7050
},
{
"epoch": 2.5813528336380256,
"grad_norm": 0.0,
"learning_rate": 1.4511882998171846e-05,
"loss": 0.0,
"step": 7060
},
{
"epoch": 2.5850091407678244,
"grad_norm": 0.0,
"learning_rate": 1.4489945155393054e-05,
"loss": 0.0,
"step": 7070
},
{
"epoch": 2.5886654478976237,
"grad_norm": 0.0,
"learning_rate": 1.446800731261426e-05,
"loss": 0.0,
"step": 7080
},
{
"epoch": 2.5923217550274225,
"grad_norm": 0.0,
"learning_rate": 1.4446069469835466e-05,
"loss": 0.0,
"step": 7090
},
{
"epoch": 2.5959780621572213,
"grad_norm": 0.0,
"learning_rate": 1.4424131627056673e-05,
"loss": 0.0,
"step": 7100
},
{
"epoch": 2.59963436928702,
"grad_norm": 0.0,
"learning_rate": 1.4402193784277879e-05,
"loss": 0.0,
"step": 7110
},
{
"epoch": 2.603290676416819,
"grad_norm": 0.0,
"learning_rate": 1.4380255941499087e-05,
"loss": 0.0,
"step": 7120
},
{
"epoch": 2.606946983546618,
"grad_norm": 0.0,
"learning_rate": 1.4358318098720293e-05,
"loss": 0.0,
"step": 7130
},
{
"epoch": 2.610603290676417,
"grad_norm": 0.0,
"learning_rate": 1.43363802559415e-05,
"loss": 0.0,
"step": 7140
},
{
"epoch": 2.6142595978062158,
"grad_norm": 0.0,
"learning_rate": 1.4314442413162705e-05,
"loss": 0.0,
"step": 7150
},
{
"epoch": 2.6179159049360146,
"grad_norm": 0.0,
"learning_rate": 1.4292504570383913e-05,
"loss": 0.0,
"step": 7160
},
{
"epoch": 2.6215722120658134,
"grad_norm": 0.0,
"learning_rate": 1.4270566727605119e-05,
"loss": 0.0,
"step": 7170
},
{
"epoch": 2.6252285191956126,
"grad_norm": 0.0,
"learning_rate": 1.4248628884826327e-05,
"loss": 0.0,
"step": 7180
},
{
"epoch": 2.6288848263254114,
"grad_norm": 0.0,
"learning_rate": 1.4226691042047533e-05,
"loss": 0.0,
"step": 7190
},
{
"epoch": 2.6325411334552102,
"grad_norm": 0.0,
"learning_rate": 1.4204753199268739e-05,
"loss": 0.0,
"step": 7200
},
{
"epoch": 2.636197440585009,
"grad_norm": 0.0,
"learning_rate": 1.4182815356489945e-05,
"loss": 0.0,
"step": 7210
},
{
"epoch": 2.639853747714808,
"grad_norm": 0.0,
"learning_rate": 1.4160877513711153e-05,
"loss": 0.0,
"step": 7220
},
{
"epoch": 2.643510054844607,
"grad_norm": 0.0,
"learning_rate": 1.413893967093236e-05,
"loss": 0.0,
"step": 7230
},
{
"epoch": 2.647166361974406,
"grad_norm": 0.0,
"learning_rate": 1.4117001828153565e-05,
"loss": 0.0,
"step": 7240
},
{
"epoch": 2.6508226691042047,
"grad_norm": 0.0,
"learning_rate": 1.4095063985374771e-05,
"loss": 0.0,
"step": 7250
},
{
"epoch": 2.6544789762340035,
"grad_norm": 0.0,
"learning_rate": 1.4073126142595978e-05,
"loss": 0.0,
"step": 7260
},
{
"epoch": 2.6581352833638023,
"grad_norm": 0.0,
"learning_rate": 1.4051188299817185e-05,
"loss": 0.0,
"step": 7270
},
{
"epoch": 2.6617915904936016,
"grad_norm": 0.0,
"learning_rate": 1.4029250457038392e-05,
"loss": 0.0,
"step": 7280
},
{
"epoch": 2.6654478976234004,
"grad_norm": 0.0,
"learning_rate": 1.40073126142596e-05,
"loss": 0.0,
"step": 7290
},
{
"epoch": 2.669104204753199,
"grad_norm": 0.0,
"learning_rate": 1.3985374771480804e-05,
"loss": 0.0,
"step": 7300
},
{
"epoch": 2.672760511882998,
"grad_norm": 0.0,
"learning_rate": 1.3963436928702012e-05,
"loss": 0.0,
"step": 7310
},
{
"epoch": 2.676416819012797,
"grad_norm": 0.0,
"learning_rate": 1.3941499085923218e-05,
"loss": 0.0,
"step": 7320
},
{
"epoch": 2.680073126142596,
"grad_norm": 0.0,
"learning_rate": 1.3919561243144426e-05,
"loss": 0.0,
"step": 7330
},
{
"epoch": 2.683729433272395,
"grad_norm": 0.0,
"learning_rate": 1.3897623400365632e-05,
"loss": 0.0,
"step": 7340
},
{
"epoch": 2.6873857404021937,
"grad_norm": 0.0,
"learning_rate": 1.3875685557586836e-05,
"loss": 0.0,
"step": 7350
},
{
"epoch": 2.691042047531993,
"grad_norm": 0.0,
"learning_rate": 1.3853747714808044e-05,
"loss": 0.0,
"step": 7360
},
{
"epoch": 2.6946983546617918,
"grad_norm": 0.0,
"learning_rate": 1.383180987202925e-05,
"loss": 0.0,
"step": 7370
},
{
"epoch": 2.6983546617915906,
"grad_norm": 0.0,
"learning_rate": 1.3809872029250458e-05,
"loss": 0.0,
"step": 7380
},
{
"epoch": 2.7020109689213894,
"grad_norm": 0.0,
"learning_rate": 1.3787934186471664e-05,
"loss": 0.0,
"step": 7390
},
{
"epoch": 2.705667276051188,
"grad_norm": 0.0,
"learning_rate": 1.376599634369287e-05,
"loss": 0.0,
"step": 7400
},
{
"epoch": 2.7093235831809874,
"grad_norm": 0.0,
"learning_rate": 1.3744058500914077e-05,
"loss": 0.0,
"step": 7410
},
{
"epoch": 2.7129798903107862,
"grad_norm": 0.0,
"learning_rate": 1.3722120658135284e-05,
"loss": 0.0,
"step": 7420
},
{
"epoch": 2.716636197440585,
"grad_norm": 0.0,
"learning_rate": 1.370018281535649e-05,
"loss": 0.0,
"step": 7430
},
{
"epoch": 2.720292504570384,
"grad_norm": 0.0,
"learning_rate": 1.3678244972577698e-05,
"loss": 0.0,
"step": 7440
},
{
"epoch": 2.7239488117001827,
"grad_norm": 0.0,
"learning_rate": 1.3656307129798903e-05,
"loss": 0.0,
"step": 7450
},
{
"epoch": 2.727605118829982,
"grad_norm": 0.0,
"learning_rate": 1.3634369287020109e-05,
"loss": 0.0,
"step": 7460
},
{
"epoch": 2.7312614259597807,
"grad_norm": 0.0,
"learning_rate": 1.3612431444241317e-05,
"loss": 0.0,
"step": 7470
},
{
"epoch": 2.7349177330895795,
"grad_norm": 0.0,
"learning_rate": 1.3590493601462523e-05,
"loss": 0.0,
"step": 7480
},
{
"epoch": 2.7385740402193783,
"grad_norm": 0.0,
"learning_rate": 1.356855575868373e-05,
"loss": 0.0,
"step": 7490
},
{
"epoch": 2.742230347349177,
"grad_norm": 0.0,
"learning_rate": 1.3546617915904935e-05,
"loss": 0.0,
"step": 7500
},
{
"epoch": 2.7458866544789764,
"grad_norm": 0.0,
"learning_rate": 1.3524680073126143e-05,
"loss": 0.0,
"step": 7510
},
{
"epoch": 2.749542961608775,
"grad_norm": 0.0,
"learning_rate": 1.3502742230347349e-05,
"loss": 0.0,
"step": 7520
},
{
"epoch": 2.753199268738574,
"grad_norm": 0.0,
"learning_rate": 1.3480804387568557e-05,
"loss": 0.0,
"step": 7530
},
{
"epoch": 2.756855575868373,
"grad_norm": 0.0,
"learning_rate": 1.3458866544789763e-05,
"loss": 0.0,
"step": 7540
},
{
"epoch": 2.7605118829981716,
"grad_norm": 0.0,
"learning_rate": 1.343692870201097e-05,
"loss": 0.0,
"step": 7550
},
{
"epoch": 2.764168190127971,
"grad_norm": 0.0,
"learning_rate": 1.3414990859232175e-05,
"loss": 0.0,
"step": 7560
},
{
"epoch": 2.7678244972577697,
"grad_norm": 0.0,
"learning_rate": 1.3393053016453383e-05,
"loss": 0.0,
"step": 7570
},
{
"epoch": 2.7714808043875685,
"grad_norm": 0.0,
"learning_rate": 1.337111517367459e-05,
"loss": 0.0,
"step": 7580
},
{
"epoch": 2.7751371115173673,
"grad_norm": 0.0,
"learning_rate": 1.3349177330895796e-05,
"loss": 0.0,
"step": 7590
},
{
"epoch": 2.778793418647166,
"grad_norm": 0.0,
"learning_rate": 1.3327239488117002e-05,
"loss": 0.0,
"step": 7600
},
{
"epoch": 2.7824497257769654,
"grad_norm": 0.0,
"learning_rate": 1.3305301645338208e-05,
"loss": 0.0,
"step": 7610
},
{
"epoch": 2.786106032906764,
"grad_norm": 0.0,
"learning_rate": 1.3283363802559416e-05,
"loss": 0.0,
"step": 7620
},
{
"epoch": 2.789762340036563,
"grad_norm": 0.0,
"learning_rate": 1.3261425959780622e-05,
"loss": 0.0,
"step": 7630
},
{
"epoch": 2.7934186471663622,
"grad_norm": 0.0,
"learning_rate": 1.323948811700183e-05,
"loss": 0.0,
"step": 7640
},
{
"epoch": 2.797074954296161,
"grad_norm": 0.0,
"learning_rate": 1.3217550274223034e-05,
"loss": 0.0,
"step": 7650
},
{
"epoch": 2.80073126142596,
"grad_norm": 0.0,
"learning_rate": 1.3195612431444242e-05,
"loss": 0.0,
"step": 7660
},
{
"epoch": 2.8043875685557587,
"grad_norm": 0.0,
"learning_rate": 1.3173674588665448e-05,
"loss": 0.0,
"step": 7670
},
{
"epoch": 2.8080438756855575,
"grad_norm": 0.0,
"learning_rate": 1.3151736745886656e-05,
"loss": 0.0,
"step": 7680
},
{
"epoch": 2.8117001828153567,
"grad_norm": 0.0,
"learning_rate": 1.3129798903107862e-05,
"loss": 0.0,
"step": 7690
},
{
"epoch": 2.8153564899451555,
"grad_norm": 0.0,
"learning_rate": 1.3107861060329066e-05,
"loss": 0.0,
"step": 7700
},
{
"epoch": 2.8190127970749543,
"grad_norm": 0.0,
"learning_rate": 1.3085923217550274e-05,
"loss": 0.0,
"step": 7710
},
{
"epoch": 2.822669104204753,
"grad_norm": 0.0,
"learning_rate": 1.306398537477148e-05,
"loss": 0.0,
"step": 7720
},
{
"epoch": 2.826325411334552,
"grad_norm": 0.0,
"learning_rate": 1.3042047531992688e-05,
"loss": 0.0,
"step": 7730
},
{
"epoch": 2.829981718464351,
"grad_norm": 0.0,
"learning_rate": 1.3020109689213894e-05,
"loss": 0.0,
"step": 7740
},
{
"epoch": 2.83363802559415,
"grad_norm": 0.0,
"learning_rate": 1.29981718464351e-05,
"loss": 0.0,
"step": 7750
},
{
"epoch": 2.837294332723949,
"grad_norm": 0.0,
"learning_rate": 1.2976234003656307e-05,
"loss": 0.0,
"step": 7760
},
{
"epoch": 2.8409506398537476,
"grad_norm": 0.0,
"learning_rate": 1.2954296160877515e-05,
"loss": 0.0,
"step": 7770
},
{
"epoch": 2.8446069469835464,
"grad_norm": 0.0,
"learning_rate": 1.293235831809872e-05,
"loss": 0.0,
"step": 7780
},
{
"epoch": 2.8482632541133457,
"grad_norm": 0.0,
"learning_rate": 1.2910420475319929e-05,
"loss": 0.0,
"step": 7790
},
{
"epoch": 2.8519195612431445,
"grad_norm": 0.0,
"learning_rate": 1.2888482632541133e-05,
"loss": 0.0,
"step": 7800
},
{
"epoch": 2.8555758683729433,
"grad_norm": 0.0,
"learning_rate": 1.286654478976234e-05,
"loss": 0.0,
"step": 7810
},
{
"epoch": 2.859232175502742,
"grad_norm": 0.0,
"learning_rate": 1.2844606946983547e-05,
"loss": 0.0,
"step": 7820
},
{
"epoch": 2.862888482632541,
"grad_norm": 0.0,
"learning_rate": 1.2822669104204753e-05,
"loss": 0.0,
"step": 7830
},
{
"epoch": 2.86654478976234,
"grad_norm": 0.0,
"learning_rate": 1.2800731261425961e-05,
"loss": 0.0,
"step": 7840
},
{
"epoch": 2.870201096892139,
"grad_norm": 0.0,
"learning_rate": 1.2778793418647165e-05,
"loss": 0.0,
"step": 7850
},
{
"epoch": 2.873857404021938,
"grad_norm": 0.0,
"learning_rate": 1.2756855575868373e-05,
"loss": 0.0,
"step": 7860
},
{
"epoch": 2.8775137111517366,
"grad_norm": 0.0,
"learning_rate": 1.273491773308958e-05,
"loss": 0.0,
"step": 7870
},
{
"epoch": 2.8811700182815354,
"grad_norm": 0.0,
"learning_rate": 1.2712979890310787e-05,
"loss": 0.0,
"step": 7880
},
{
"epoch": 2.8848263254113347,
"grad_norm": 0.0,
"learning_rate": 1.2691042047531993e-05,
"loss": 0.0,
"step": 7890
},
{
"epoch": 2.8884826325411335,
"grad_norm": 0.0,
"learning_rate": 1.26691042047532e-05,
"loss": 0.0,
"step": 7900
},
{
"epoch": 2.8921389396709323,
"grad_norm": 0.0,
"learning_rate": 1.2647166361974406e-05,
"loss": 0.0,
"step": 7910
},
{
"epoch": 2.8957952468007315,
"grad_norm": 0.0,
"learning_rate": 1.2625228519195613e-05,
"loss": 0.0,
"step": 7920
},
{
"epoch": 2.89945155393053,
"grad_norm": 0.0,
"learning_rate": 1.260329067641682e-05,
"loss": 0.0,
"step": 7930
},
{
"epoch": 2.903107861060329,
"grad_norm": 0.0,
"learning_rate": 1.2581352833638026e-05,
"loss": 0.0,
"step": 7940
},
{
"epoch": 2.906764168190128,
"grad_norm": 0.0,
"learning_rate": 1.2559414990859232e-05,
"loss": 0.0,
"step": 7950
},
{
"epoch": 2.9104204753199268,
"grad_norm": 0.0,
"learning_rate": 1.2537477148080438e-05,
"loss": 0.0,
"step": 7960
},
{
"epoch": 2.914076782449726,
"grad_norm": 0.0,
"learning_rate": 1.2515539305301646e-05,
"loss": 0.0,
"step": 7970
},
{
"epoch": 2.917733089579525,
"grad_norm": 0.0,
"learning_rate": 1.2493601462522852e-05,
"loss": 0.0,
"step": 7980
},
{
"epoch": 2.9213893967093236,
"grad_norm": 0.0,
"learning_rate": 1.247166361974406e-05,
"loss": 0.0,
"step": 7990
},
{
"epoch": 2.9250457038391224,
"grad_norm": 0.0,
"learning_rate": 1.2449725776965264e-05,
"loss": 0.0,
"step": 8000
},
{
"epoch": 2.9287020109689212,
"grad_norm": 0.0,
"learning_rate": 1.2427787934186472e-05,
"loss": 0.0,
"step": 8010
},
{
"epoch": 2.9323583180987205,
"grad_norm": 0.0,
"learning_rate": 1.2405850091407678e-05,
"loss": 0.0,
"step": 8020
},
{
"epoch": 2.9360146252285193,
"grad_norm": 0.0,
"learning_rate": 1.2383912248628886e-05,
"loss": 0.0,
"step": 8030
},
{
"epoch": 2.939670932358318,
"grad_norm": 0.0,
"learning_rate": 1.2361974405850092e-05,
"loss": 0.0,
"step": 8040
},
{
"epoch": 2.943327239488117,
"grad_norm": 0.0,
"learning_rate": 1.2340036563071298e-05,
"loss": 0.0,
"step": 8050
},
{
"epoch": 2.9469835466179157,
"grad_norm": 0.0,
"learning_rate": 1.2318098720292504e-05,
"loss": 0.0,
"step": 8060
},
{
"epoch": 2.950639853747715,
"grad_norm": 0.0,
"learning_rate": 1.229616087751371e-05,
"loss": 0.0,
"step": 8070
},
{
"epoch": 2.954296160877514,
"grad_norm": 0.0,
"learning_rate": 1.2274223034734918e-05,
"loss": 0.0,
"step": 8080
},
{
"epoch": 2.9579524680073126,
"grad_norm": 0.0,
"learning_rate": 1.2252285191956125e-05,
"loss": 0.0,
"step": 8090
},
{
"epoch": 2.9616087751371114,
"grad_norm": 0.0,
"learning_rate": 1.223034734917733e-05,
"loss": 0.0,
"step": 8100
},
{
"epoch": 2.96526508226691,
"grad_norm": 0.0,
"learning_rate": 1.2208409506398537e-05,
"loss": 0.0,
"step": 8110
},
{
"epoch": 2.9689213893967095,
"grad_norm": 0.0,
"learning_rate": 1.2186471663619745e-05,
"loss": 0.0,
"step": 8120
},
{
"epoch": 2.9725776965265083,
"grad_norm": 0.0,
"learning_rate": 1.2164533820840951e-05,
"loss": 0.0,
"step": 8130
},
{
"epoch": 2.976234003656307,
"grad_norm": 0.0,
"learning_rate": 1.2142595978062159e-05,
"loss": 0.0,
"step": 8140
},
{
"epoch": 2.979890310786106,
"grad_norm": 0.0,
"learning_rate": 1.2120658135283363e-05,
"loss": 0.0,
"step": 8150
},
{
"epoch": 2.9835466179159047,
"grad_norm": 0.0,
"learning_rate": 1.2098720292504571e-05,
"loss": 0.0,
"step": 8160
},
{
"epoch": 2.987202925045704,
"grad_norm": 0.0,
"learning_rate": 1.2076782449725777e-05,
"loss": 0.0,
"step": 8170
},
{
"epoch": 2.9908592321755028,
"grad_norm": 0.0,
"learning_rate": 1.2054844606946983e-05,
"loss": 0.0,
"step": 8180
},
{
"epoch": 2.9945155393053016,
"grad_norm": 0.0,
"learning_rate": 1.2032906764168191e-05,
"loss": 0.0,
"step": 8190
},
{
"epoch": 2.998171846435101,
"grad_norm": 0.0,
"learning_rate": 1.2010968921389397e-05,
"loss": 0.0,
"step": 8200
},
{
"epoch": 3.0018281535648996,
"grad_norm": 0.0,
"learning_rate": 1.1989031078610603e-05,
"loss": 0.0,
"step": 8210
},
{
"epoch": 3.0054844606946984,
"grad_norm": 0.0,
"learning_rate": 1.196709323583181e-05,
"loss": 0.0,
"step": 8220
},
{
"epoch": 3.0091407678244972,
"grad_norm": 0.0,
"learning_rate": 1.1945155393053017e-05,
"loss": 0.0,
"step": 8230
},
{
"epoch": 3.012797074954296,
"grad_norm": 0.0,
"learning_rate": 1.1923217550274223e-05,
"loss": 0.0,
"step": 8240
},
{
"epoch": 3.016453382084095,
"grad_norm": 0.0,
"learning_rate": 1.1901279707495431e-05,
"loss": 0.0,
"step": 8250
},
{
"epoch": 3.020109689213894,
"grad_norm": 0.0,
"learning_rate": 1.1879341864716636e-05,
"loss": 0.0,
"step": 8260
},
{
"epoch": 3.023765996343693,
"grad_norm": 0.0,
"learning_rate": 1.1857404021937844e-05,
"loss": 0.0,
"step": 8270
},
{
"epoch": 3.0274223034734917,
"grad_norm": 0.0,
"learning_rate": 1.183546617915905e-05,
"loss": 0.0,
"step": 8280
},
{
"epoch": 3.0310786106032905,
"grad_norm": 0.0,
"learning_rate": 1.1813528336380256e-05,
"loss": 0.0,
"step": 8290
},
{
"epoch": 3.03473491773309,
"grad_norm": 0.0,
"learning_rate": 1.1791590493601464e-05,
"loss": 0.0,
"step": 8300
},
{
"epoch": 3.0383912248628886,
"grad_norm": 0.0,
"learning_rate": 1.1769652650822668e-05,
"loss": 0.0,
"step": 8310
},
{
"epoch": 3.0420475319926874,
"grad_norm": 0.0,
"learning_rate": 1.1747714808043876e-05,
"loss": 0.0,
"step": 8320
},
{
"epoch": 3.045703839122486,
"grad_norm": 0.0,
"learning_rate": 1.1725776965265082e-05,
"loss": 0.0,
"step": 8330
},
{
"epoch": 3.049360146252285,
"grad_norm": 0.0,
"learning_rate": 1.170383912248629e-05,
"loss": 0.0,
"step": 8340
},
{
"epoch": 3.0530164533820843,
"grad_norm": 0.0,
"learning_rate": 1.1681901279707496e-05,
"loss": 0.0,
"step": 8350
},
{
"epoch": 3.056672760511883,
"grad_norm": 0.0,
"learning_rate": 1.1659963436928702e-05,
"loss": 0.0,
"step": 8360
},
{
"epoch": 3.060329067641682,
"grad_norm": 0.0,
"learning_rate": 1.1638025594149908e-05,
"loss": 0.0,
"step": 8370
},
{
"epoch": 3.0639853747714807,
"grad_norm": 0.0,
"learning_rate": 1.1616087751371116e-05,
"loss": 0.0,
"step": 8380
},
{
"epoch": 3.0676416819012795,
"grad_norm": 0.0,
"learning_rate": 1.1594149908592322e-05,
"loss": 0.0,
"step": 8390
},
{
"epoch": 3.0712979890310788,
"grad_norm": 0.0,
"learning_rate": 1.157221206581353e-05,
"loss": 0.0,
"step": 8400
},
{
"epoch": 3.0749542961608776,
"grad_norm": 0.0,
"learning_rate": 1.1550274223034735e-05,
"loss": 0.0,
"step": 8410
},
{
"epoch": 3.0786106032906764,
"grad_norm": 0.0,
"learning_rate": 1.152833638025594e-05,
"loss": 0.0,
"step": 8420
},
{
"epoch": 3.082266910420475,
"grad_norm": 0.0,
"learning_rate": 1.1506398537477149e-05,
"loss": 0.0,
"step": 8430
},
{
"epoch": 3.0859232175502744,
"grad_norm": 0.0,
"learning_rate": 1.1484460694698355e-05,
"loss": 0.0,
"step": 8440
},
{
"epoch": 3.0895795246800732,
"grad_norm": 0.0,
"learning_rate": 1.1462522851919563e-05,
"loss": 0.0,
"step": 8450
},
{
"epoch": 3.093235831809872,
"grad_norm": 0.0,
"learning_rate": 1.1440585009140767e-05,
"loss": 0.0,
"step": 8460
},
{
"epoch": 3.096892138939671,
"grad_norm": 0.0,
"learning_rate": 1.1418647166361975e-05,
"loss": 0.0,
"step": 8470
},
{
"epoch": 3.1005484460694697,
"grad_norm": 0.0,
"learning_rate": 1.1396709323583181e-05,
"loss": 0.0,
"step": 8480
},
{
"epoch": 3.104204753199269,
"grad_norm": 0.0,
"learning_rate": 1.1374771480804389e-05,
"loss": 0.0,
"step": 8490
},
{
"epoch": 3.1078610603290677,
"grad_norm": 0.0,
"learning_rate": 1.1352833638025595e-05,
"loss": 0.0,
"step": 8500
},
{
"epoch": 3.1115173674588665,
"grad_norm": 0.0,
"learning_rate": 1.1330895795246801e-05,
"loss": 0.0,
"step": 8510
},
{
"epoch": 3.1151736745886653,
"grad_norm": 0.0,
"learning_rate": 1.1308957952468007e-05,
"loss": 0.0,
"step": 8520
},
{
"epoch": 3.118829981718464,
"grad_norm": 0.0,
"learning_rate": 1.1287020109689213e-05,
"loss": 0.0,
"step": 8530
},
{
"epoch": 3.1224862888482634,
"grad_norm": 0.0,
"learning_rate": 1.1265082266910421e-05,
"loss": 0.0,
"step": 8540
},
{
"epoch": 3.126142595978062,
"grad_norm": 0.0,
"learning_rate": 1.1243144424131627e-05,
"loss": 0.0,
"step": 8550
},
{
"epoch": 3.129798903107861,
"grad_norm": 0.0,
"learning_rate": 1.1221206581352834e-05,
"loss": 0.0,
"step": 8560
},
{
"epoch": 3.13345521023766,
"grad_norm": 0.0,
"learning_rate": 1.119926873857404e-05,
"loss": 0.0,
"step": 8570
},
{
"epoch": 3.137111517367459,
"grad_norm": 0.0,
"learning_rate": 1.1177330895795248e-05,
"loss": 0.0,
"step": 8580
},
{
"epoch": 3.140767824497258,
"grad_norm": 0.0,
"learning_rate": 1.1155393053016454e-05,
"loss": 0.0,
"step": 8590
},
{
"epoch": 3.1444241316270567,
"grad_norm": 0.0,
"learning_rate": 1.1133455210237662e-05,
"loss": 0.0,
"step": 8600
},
{
"epoch": 3.1480804387568555,
"grad_norm": 0.0,
"learning_rate": 1.1111517367458866e-05,
"loss": 0.0,
"step": 8610
},
{
"epoch": 3.1517367458866543,
"grad_norm": 0.0,
"learning_rate": 1.1089579524680074e-05,
"loss": 0.0,
"step": 8620
},
{
"epoch": 3.1553930530164536,
"grad_norm": 0.0,
"learning_rate": 1.106764168190128e-05,
"loss": 0.0,
"step": 8630
},
{
"epoch": 3.1590493601462524,
"grad_norm": 0.0,
"learning_rate": 1.1045703839122488e-05,
"loss": 0.0,
"step": 8640
},
{
"epoch": 3.162705667276051,
"grad_norm": 0.0,
"learning_rate": 1.1023765996343694e-05,
"loss": 0.0,
"step": 8650
},
{
"epoch": 3.16636197440585,
"grad_norm": 0.0,
"learning_rate": 1.1001828153564898e-05,
"loss": 0.0,
"step": 8660
},
{
"epoch": 3.170018281535649,
"grad_norm": 0.0,
"learning_rate": 1.0979890310786106e-05,
"loss": 0.0,
"step": 8670
},
{
"epoch": 3.173674588665448,
"grad_norm": 0.0,
"learning_rate": 1.0957952468007312e-05,
"loss": 0.0,
"step": 8680
},
{
"epoch": 3.177330895795247,
"grad_norm": 0.0,
"learning_rate": 1.093601462522852e-05,
"loss": 0.0,
"step": 8690
},
{
"epoch": 3.1809872029250457,
"grad_norm": 0.0,
"learning_rate": 1.0914076782449726e-05,
"loss": 0.0,
"step": 8700
},
{
"epoch": 3.1846435100548445,
"grad_norm": 0.0,
"learning_rate": 1.0892138939670932e-05,
"loss": 0.0,
"step": 8710
},
{
"epoch": 3.1882998171846433,
"grad_norm": 0.0,
"learning_rate": 1.0870201096892139e-05,
"loss": 0.0,
"step": 8720
},
{
"epoch": 3.1919561243144425,
"grad_norm": 0.0,
"learning_rate": 1.0848263254113346e-05,
"loss": 0.0,
"step": 8730
},
{
"epoch": 3.1956124314442413,
"grad_norm": 0.0,
"learning_rate": 1.0826325411334553e-05,
"loss": 0.0,
"step": 8740
},
{
"epoch": 3.19926873857404,
"grad_norm": 0.0,
"learning_rate": 1.080438756855576e-05,
"loss": 0.0,
"step": 8750
},
{
"epoch": 3.202925045703839,
"grad_norm": 0.0,
"learning_rate": 1.0782449725776965e-05,
"loss": 0.0,
"step": 8760
},
{
"epoch": 3.206581352833638,
"grad_norm": 0.0,
"learning_rate": 1.0760511882998171e-05,
"loss": 0.0,
"step": 8770
},
{
"epoch": 3.210237659963437,
"grad_norm": 0.0,
"learning_rate": 1.0738574040219379e-05,
"loss": 0.0,
"step": 8780
},
{
"epoch": 3.213893967093236,
"grad_norm": 0.0,
"learning_rate": 1.0716636197440585e-05,
"loss": 0.0,
"step": 8790
},
{
"epoch": 3.2175502742230346,
"grad_norm": 0.0,
"learning_rate": 1.0694698354661793e-05,
"loss": 0.0,
"step": 8800
},
{
"epoch": 3.2212065813528334,
"grad_norm": 0.0,
"learning_rate": 1.0672760511882997e-05,
"loss": 0.0,
"step": 8810
},
{
"epoch": 3.2248628884826327,
"grad_norm": 0.0,
"learning_rate": 1.0650822669104205e-05,
"loss": 0.0,
"step": 8820
},
{
"epoch": 3.2285191956124315,
"grad_norm": 0.0,
"learning_rate": 1.0628884826325411e-05,
"loss": 0.0,
"step": 8830
},
{
"epoch": 3.2321755027422303,
"grad_norm": 0.0,
"learning_rate": 1.0606946983546619e-05,
"loss": 0.0,
"step": 8840
},
{
"epoch": 3.235831809872029,
"grad_norm": 0.0,
"learning_rate": 1.0585009140767825e-05,
"loss": 0.0,
"step": 8850
},
{
"epoch": 3.2394881170018284,
"grad_norm": 0.0,
"learning_rate": 1.0563071297989031e-05,
"loss": 0.0,
"step": 8860
},
{
"epoch": 3.243144424131627,
"grad_norm": 0.0,
"learning_rate": 1.0541133455210237e-05,
"loss": 0.0,
"step": 8870
},
{
"epoch": 3.246800731261426,
"grad_norm": 0.0,
"learning_rate": 1.0519195612431444e-05,
"loss": 0.0,
"step": 8880
},
{
"epoch": 3.250457038391225,
"grad_norm": 0.0,
"learning_rate": 1.0497257769652651e-05,
"loss": 0.0,
"step": 8890
},
{
"epoch": 3.2541133455210236,
"grad_norm": 0.0,
"learning_rate": 1.0475319926873858e-05,
"loss": 0.0,
"step": 8900
},
{
"epoch": 3.257769652650823,
"grad_norm": 0.0,
"learning_rate": 1.0453382084095064e-05,
"loss": 0.0,
"step": 8910
},
{
"epoch": 3.2614259597806217,
"grad_norm": 0.0,
"learning_rate": 1.043144424131627e-05,
"loss": 0.0,
"step": 8920
},
{
"epoch": 3.2650822669104205,
"grad_norm": 0.0,
"learning_rate": 1.0409506398537478e-05,
"loss": 0.0,
"step": 8930
},
{
"epoch": 3.2687385740402193,
"grad_norm": 0.0,
"learning_rate": 1.0387568555758684e-05,
"loss": 0.0,
"step": 8940
},
{
"epoch": 3.272394881170018,
"grad_norm": 0.0,
"learning_rate": 1.0365630712979892e-05,
"loss": 0.0,
"step": 8950
},
{
"epoch": 3.2760511882998173,
"grad_norm": 0.0,
"learning_rate": 1.0343692870201096e-05,
"loss": 0.0,
"step": 8960
},
{
"epoch": 3.279707495429616,
"grad_norm": 0.0,
"learning_rate": 1.0321755027422304e-05,
"loss": 0.0,
"step": 8970
},
{
"epoch": 3.283363802559415,
"grad_norm": 0.0,
"learning_rate": 1.029981718464351e-05,
"loss": 0.0,
"step": 8980
},
{
"epoch": 3.2870201096892138,
"grad_norm": 0.0,
"learning_rate": 1.0277879341864718e-05,
"loss": 0.0,
"step": 8990
},
{
"epoch": 3.2906764168190126,
"grad_norm": 0.0,
"learning_rate": 1.0255941499085924e-05,
"loss": 0.0,
"step": 9000
},
{
"epoch": 3.294332723948812,
"grad_norm": 0.0,
"learning_rate": 1.0234003656307129e-05,
"loss": 0.0,
"step": 9010
},
{
"epoch": 3.2979890310786106,
"grad_norm": 0.0,
"learning_rate": 1.0212065813528336e-05,
"loss": 0.0,
"step": 9020
},
{
"epoch": 3.3016453382084094,
"grad_norm": 0.0,
"learning_rate": 1.0190127970749543e-05,
"loss": 0.0,
"step": 9030
},
{
"epoch": 3.3053016453382082,
"grad_norm": 0.0,
"learning_rate": 1.016819012797075e-05,
"loss": 0.0,
"step": 9040
},
{
"epoch": 3.3089579524680075,
"grad_norm": 0.0,
"learning_rate": 1.0146252285191956e-05,
"loss": 0.0,
"step": 9050
},
{
"epoch": 3.3126142595978063,
"grad_norm": 0.0,
"learning_rate": 1.0124314442413163e-05,
"loss": 0.0,
"step": 9060
},
{
"epoch": 3.316270566727605,
"grad_norm": 0.0,
"learning_rate": 1.0102376599634369e-05,
"loss": 0.0,
"step": 9070
},
{
"epoch": 3.319926873857404,
"grad_norm": 0.0,
"learning_rate": 1.0080438756855577e-05,
"loss": 0.0,
"step": 9080
},
{
"epoch": 3.3235831809872027,
"grad_norm": 0.0,
"learning_rate": 1.0058500914076783e-05,
"loss": 0.0,
"step": 9090
},
{
"epoch": 3.327239488117002,
"grad_norm": 0.0,
"learning_rate": 1.003656307129799e-05,
"loss": 0.0,
"step": 9100
},
{
"epoch": 3.330895795246801,
"grad_norm": 0.0,
"learning_rate": 1.0014625228519195e-05,
"loss": 0.0,
"step": 9110
},
{
"epoch": 3.3345521023765996,
"grad_norm": 0.0,
"learning_rate": 9.992687385740401e-06,
"loss": 0.0,
"step": 9120
},
{
"epoch": 3.3382084095063984,
"grad_norm": 0.0,
"learning_rate": 9.970749542961609e-06,
"loss": 0.0,
"step": 9130
},
{
"epoch": 3.3418647166361977,
"grad_norm": 0.0,
"learning_rate": 9.948811700182815e-06,
"loss": 0.0,
"step": 9140
},
{
"epoch": 3.3455210237659965,
"grad_norm": 0.0,
"learning_rate": 9.926873857404023e-06,
"loss": 0.0,
"step": 9150
},
{
"epoch": 3.3491773308957953,
"grad_norm": 0.0,
"learning_rate": 9.904936014625227e-06,
"loss": 0.0,
"step": 9160
},
{
"epoch": 3.352833638025594,
"grad_norm": 0.0,
"learning_rate": 9.882998171846435e-06,
"loss": 0.0,
"step": 9170
},
{
"epoch": 3.356489945155393,
"grad_norm": 0.0,
"learning_rate": 9.861060329067641e-06,
"loss": 0.0,
"step": 9180
},
{
"epoch": 3.360146252285192,
"grad_norm": 0.0,
"learning_rate": 9.83912248628885e-06,
"loss": 0.0,
"step": 9190
},
{
"epoch": 3.363802559414991,
"grad_norm": 0.0,
"learning_rate": 9.817184643510055e-06,
"loss": 0.0,
"step": 9200
},
{
"epoch": 3.3674588665447898,
"grad_norm": 0.0,
"learning_rate": 9.795246800731262e-06,
"loss": 0.0,
"step": 9210
},
{
"epoch": 3.3711151736745886,
"grad_norm": 0.0,
"learning_rate": 9.773308957952468e-06,
"loss": 0.0,
"step": 9220
},
{
"epoch": 3.3747714808043874,
"grad_norm": 0.0,
"learning_rate": 9.751371115173675e-06,
"loss": 0.0,
"step": 9230
},
{
"epoch": 3.3784277879341866,
"grad_norm": 0.0,
"learning_rate": 9.729433272394882e-06,
"loss": 0.0,
"step": 9240
},
{
"epoch": 3.3820840950639854,
"grad_norm": 0.0,
"learning_rate": 9.707495429616088e-06,
"loss": 0.0,
"step": 9250
},
{
"epoch": 3.3857404021937842,
"grad_norm": 0.0,
"learning_rate": 9.685557586837294e-06,
"loss": 0.0,
"step": 9260
},
{
"epoch": 3.389396709323583,
"grad_norm": 0.0,
"learning_rate": 9.6636197440585e-06,
"loss": 0.0,
"step": 9270
},
{
"epoch": 3.393053016453382,
"grad_norm": 0.0,
"learning_rate": 9.641681901279708e-06,
"loss": 0.0,
"step": 9280
},
{
"epoch": 3.396709323583181,
"grad_norm": 0.0,
"learning_rate": 9.619744058500914e-06,
"loss": 0.0,
"step": 9290
},
{
"epoch": 3.40036563071298,
"grad_norm": 0.0,
"learning_rate": 9.597806215722122e-06,
"loss": 0.0,
"step": 9300
},
{
"epoch": 3.4040219378427787,
"grad_norm": 0.0,
"learning_rate": 9.575868372943328e-06,
"loss": 0.0,
"step": 9310
},
{
"epoch": 3.4076782449725775,
"grad_norm": 0.0,
"learning_rate": 9.553930530164534e-06,
"loss": 0.0,
"step": 9320
},
{
"epoch": 3.411334552102377,
"grad_norm": 0.0,
"learning_rate": 9.53199268738574e-06,
"loss": 0.0,
"step": 9330
},
{
"epoch": 3.4149908592321756,
"grad_norm": 0.0,
"learning_rate": 9.510054844606948e-06,
"loss": 0.0,
"step": 9340
},
{
"epoch": 3.4186471663619744,
"grad_norm": 0.0,
"learning_rate": 9.488117001828154e-06,
"loss": 0.0,
"step": 9350
},
{
"epoch": 3.422303473491773,
"grad_norm": 0.0,
"learning_rate": 9.46617915904936e-06,
"loss": 0.0,
"step": 9360
},
{
"epoch": 3.425959780621572,
"grad_norm": 0.0,
"learning_rate": 9.444241316270567e-06,
"loss": 0.0,
"step": 9370
},
{
"epoch": 3.4296160877513713,
"grad_norm": 0.0,
"learning_rate": 9.422303473491773e-06,
"loss": 0.0,
"step": 9380
},
{
"epoch": 3.43327239488117,
"grad_norm": 0.0,
"learning_rate": 9.40036563071298e-06,
"loss": 0.0,
"step": 9390
},
{
"epoch": 3.436928702010969,
"grad_norm": 0.0,
"learning_rate": 9.378427787934187e-06,
"loss": 0.0,
"step": 9400
},
{
"epoch": 3.4405850091407677,
"grad_norm": 0.0,
"learning_rate": 9.356489945155395e-06,
"loss": 0.0,
"step": 9410
},
{
"epoch": 3.444241316270567,
"grad_norm": 0.0,
"learning_rate": 9.334552102376599e-06,
"loss": 0.0,
"step": 9420
},
{
"epoch": 3.4478976234003658,
"grad_norm": 0.0,
"learning_rate": 9.312614259597807e-06,
"loss": 0.0,
"step": 9430
},
{
"epoch": 3.4515539305301646,
"grad_norm": 0.0,
"learning_rate": 9.290676416819013e-06,
"loss": 0.0,
"step": 9440
},
{
"epoch": 3.4552102376599634,
"grad_norm": 0.0,
"learning_rate": 9.26873857404022e-06,
"loss": 0.0,
"step": 9450
},
{
"epoch": 3.458866544789762,
"grad_norm": 0.0,
"learning_rate": 9.246800731261427e-06,
"loss": 0.0,
"step": 9460
},
{
"epoch": 3.4625228519195614,
"grad_norm": 0.0,
"learning_rate": 9.224862888482633e-06,
"loss": 0.0,
"step": 9470
},
{
"epoch": 3.4661791590493602,
"grad_norm": 0.0,
"learning_rate": 9.20292504570384e-06,
"loss": 0.0,
"step": 9480
},
{
"epoch": 3.469835466179159,
"grad_norm": 0.0,
"learning_rate": 9.180987202925045e-06,
"loss": 0.0,
"step": 9490
},
{
"epoch": 3.473491773308958,
"grad_norm": 0.0,
"learning_rate": 9.159049360146253e-06,
"loss": 0.0,
"step": 9500
},
{
"epoch": 3.4771480804387567,
"grad_norm": 0.0,
"learning_rate": 9.13711151736746e-06,
"loss": 0.0,
"step": 9510
},
{
"epoch": 3.480804387568556,
"grad_norm": 0.0,
"learning_rate": 9.115173674588665e-06,
"loss": 0.0,
"step": 9520
},
{
"epoch": 3.4844606946983547,
"grad_norm": 0.0,
"learning_rate": 9.093235831809872e-06,
"loss": 0.0,
"step": 9530
},
{
"epoch": 3.4881170018281535,
"grad_norm": 0.0,
"learning_rate": 9.07129798903108e-06,
"loss": 0.0,
"step": 9540
},
{
"epoch": 3.4917733089579523,
"grad_norm": 0.0,
"learning_rate": 9.049360146252286e-06,
"loss": 0.0,
"step": 9550
},
{
"epoch": 3.495429616087751,
"grad_norm": 0.0,
"learning_rate": 9.027422303473493e-06,
"loss": 0.0,
"step": 9560
},
{
"epoch": 3.4990859232175504,
"grad_norm": 0.0,
"learning_rate": 9.005484460694698e-06,
"loss": 0.0,
"step": 9570
},
{
"epoch": 3.502742230347349,
"grad_norm": 0.0,
"learning_rate": 8.983546617915906e-06,
"loss": 0.0,
"step": 9580
},
{
"epoch": 3.506398537477148,
"grad_norm": 0.0,
"learning_rate": 8.961608775137112e-06,
"loss": 0.0,
"step": 9590
},
{
"epoch": 3.510054844606947,
"grad_norm": 0.0,
"learning_rate": 8.939670932358318e-06,
"loss": 0.0,
"step": 9600
},
{
"epoch": 3.5137111517367456,
"grad_norm": 0.0,
"learning_rate": 8.917733089579526e-06,
"loss": 0.0,
"step": 9610
},
{
"epoch": 3.517367458866545,
"grad_norm": 0.0,
"learning_rate": 8.89579524680073e-06,
"loss": 0.0,
"step": 9620
},
{
"epoch": 3.5210237659963437,
"grad_norm": 0.0,
"learning_rate": 8.873857404021938e-06,
"loss": 0.0,
"step": 9630
},
{
"epoch": 3.5246800731261425,
"grad_norm": 0.0,
"learning_rate": 8.851919561243144e-06,
"loss": 0.0,
"step": 9640
},
{
"epoch": 3.5283363802559418,
"grad_norm": 0.0,
"learning_rate": 8.829981718464352e-06,
"loss": 0.0,
"step": 9650
},
{
"epoch": 3.53199268738574,
"grad_norm": 0.0,
"learning_rate": 8.808043875685558e-06,
"loss": 0.0,
"step": 9660
},
{
"epoch": 3.5356489945155394,
"grad_norm": 0.0,
"learning_rate": 8.786106032906764e-06,
"loss": 0.0,
"step": 9670
},
{
"epoch": 3.539305301645338,
"grad_norm": 0.0,
"learning_rate": 8.76416819012797e-06,
"loss": 0.0,
"step": 9680
},
{
"epoch": 3.542961608775137,
"grad_norm": 0.0,
"learning_rate": 8.742230347349178e-06,
"loss": 0.0,
"step": 9690
},
{
"epoch": 3.5466179159049362,
"grad_norm": 0.0,
"learning_rate": 8.720292504570384e-06,
"loss": 0.0,
"step": 9700
},
{
"epoch": 3.550274223034735,
"grad_norm": 0.0,
"learning_rate": 8.69835466179159e-06,
"loss": 0.0,
"step": 9710
},
{
"epoch": 3.553930530164534,
"grad_norm": 0.0,
"learning_rate": 8.676416819012797e-06,
"loss": 0.0,
"step": 9720
},
{
"epoch": 3.5575868372943327,
"grad_norm": 0.0,
"learning_rate": 8.654478976234003e-06,
"loss": 0.0,
"step": 9730
},
{
"epoch": 3.5612431444241315,
"grad_norm": 0.0,
"learning_rate": 8.63254113345521e-06,
"loss": 0.0,
"step": 9740
},
{
"epoch": 3.5648994515539307,
"grad_norm": 0.0,
"learning_rate": 8.610603290676417e-06,
"loss": 0.0,
"step": 9750
},
{
"epoch": 3.5685557586837295,
"grad_norm": 0.0,
"learning_rate": 8.588665447897625e-06,
"loss": 0.0,
"step": 9760
},
{
"epoch": 3.5722120658135283,
"grad_norm": 0.0,
"learning_rate": 8.566727605118829e-06,
"loss": 0.0,
"step": 9770
},
{
"epoch": 3.575868372943327,
"grad_norm": 0.0,
"learning_rate": 8.544789762340037e-06,
"loss": 0.0,
"step": 9780
},
{
"epoch": 3.579524680073126,
"grad_norm": 0.0,
"learning_rate": 8.522851919561243e-06,
"loss": 0.0,
"step": 9790
},
{
"epoch": 3.583180987202925,
"grad_norm": 0.0,
"learning_rate": 8.500914076782451e-06,
"loss": 0.0,
"step": 9800
},
{
"epoch": 3.586837294332724,
"grad_norm": 0.0,
"learning_rate": 8.478976234003657e-06,
"loss": 0.0,
"step": 9810
},
{
"epoch": 3.590493601462523,
"grad_norm": 0.0,
"learning_rate": 8.457038391224863e-06,
"loss": 0.0,
"step": 9820
},
{
"epoch": 3.5941499085923216,
"grad_norm": 0.0,
"learning_rate": 8.43510054844607e-06,
"loss": 0.0,
"step": 9830
},
{
"epoch": 3.5978062157221204,
"grad_norm": 0.0,
"learning_rate": 8.413162705667276e-06,
"loss": 0.0,
"step": 9840
},
{
"epoch": 3.6014625228519197,
"grad_norm": 0.0,
"learning_rate": 8.391224862888483e-06,
"loss": 0.0,
"step": 9850
},
{
"epoch": 3.6051188299817185,
"grad_norm": 0.0,
"learning_rate": 8.36928702010969e-06,
"loss": 0.0,
"step": 9860
},
{
"epoch": 3.6087751371115173,
"grad_norm": 0.0,
"learning_rate": 8.347349177330896e-06,
"loss": 0.0,
"step": 9870
},
{
"epoch": 3.612431444241316,
"grad_norm": 0.0,
"learning_rate": 8.325411334552102e-06,
"loss": 0.0,
"step": 9880
},
{
"epoch": 3.616087751371115,
"grad_norm": 0.0,
"learning_rate": 8.30347349177331e-06,
"loss": 0.0,
"step": 9890
},
{
"epoch": 3.619744058500914,
"grad_norm": 0.0,
"learning_rate": 8.281535648994516e-06,
"loss": 0.0,
"step": 9900
},
{
"epoch": 3.623400365630713,
"grad_norm": 0.0,
"learning_rate": 8.259597806215724e-06,
"loss": 0.0,
"step": 9910
},
{
"epoch": 3.627056672760512,
"grad_norm": 0.0,
"learning_rate": 8.237659963436928e-06,
"loss": 0.0,
"step": 9920
},
{
"epoch": 3.630712979890311,
"grad_norm": 0.0,
"learning_rate": 8.215722120658136e-06,
"loss": 0.0,
"step": 9930
},
{
"epoch": 3.6343692870201094,
"grad_norm": 0.0,
"learning_rate": 8.193784277879342e-06,
"loss": 0.0,
"step": 9940
},
{
"epoch": 3.6380255941499087,
"grad_norm": 0.0,
"learning_rate": 8.171846435100548e-06,
"loss": 0.0,
"step": 9950
},
{
"epoch": 3.6416819012797075,
"grad_norm": 0.0,
"learning_rate": 8.149908592321756e-06,
"loss": 0.0,
"step": 9960
},
{
"epoch": 3.6453382084095063,
"grad_norm": 0.0,
"learning_rate": 8.12797074954296e-06,
"loss": 0.0,
"step": 9970
},
{
"epoch": 3.6489945155393055,
"grad_norm": 0.0,
"learning_rate": 8.106032906764168e-06,
"loss": 0.0,
"step": 9980
},
{
"epoch": 3.6526508226691043,
"grad_norm": 0.0,
"learning_rate": 8.084095063985374e-06,
"loss": 0.0,
"step": 9990
},
{
"epoch": 3.656307129798903,
"grad_norm": 0.0,
"learning_rate": 8.062157221206582e-06,
"loss": 0.0,
"step": 10000
},
{
"epoch": 3.659963436928702,
"grad_norm": 0.0,
"learning_rate": 8.040219378427788e-06,
"loss": 0.0,
"step": 10010
},
{
"epoch": 3.6636197440585008,
"grad_norm": 0.0,
"learning_rate": 8.018281535648995e-06,
"loss": 0.0,
"step": 10020
},
{
"epoch": 3.6672760511883,
"grad_norm": 0.0,
"learning_rate": 7.9963436928702e-06,
"loss": 0.0,
"step": 10030
},
{
"epoch": 3.670932358318099,
"grad_norm": 0.0,
"learning_rate": 7.974405850091408e-06,
"loss": 0.0,
"step": 10040
},
{
"epoch": 3.6745886654478976,
"grad_norm": 0.0,
"learning_rate": 7.952468007312615e-06,
"loss": 0.0,
"step": 10050
},
{
"epoch": 3.6782449725776964,
"grad_norm": 0.0,
"learning_rate": 7.930530164533822e-06,
"loss": 0.0,
"step": 10060
},
{
"epoch": 3.6819012797074953,
"grad_norm": 0.0,
"learning_rate": 7.908592321755027e-06,
"loss": 0.0,
"step": 10070
},
{
"epoch": 3.6855575868372945,
"grad_norm": 0.0,
"learning_rate": 7.886654478976233e-06,
"loss": 0.0,
"step": 10080
},
{
"epoch": 3.6892138939670933,
"grad_norm": 0.0,
"learning_rate": 7.864716636197441e-06,
"loss": 0.0,
"step": 10090
},
{
"epoch": 3.692870201096892,
"grad_norm": 0.0,
"learning_rate": 7.842778793418647e-06,
"loss": 0.0,
"step": 10100
},
{
"epoch": 3.696526508226691,
"grad_norm": 0.0,
"learning_rate": 7.820840950639855e-06,
"loss": 0.0,
"step": 10110
},
{
"epoch": 3.7001828153564897,
"grad_norm": 0.0,
"learning_rate": 7.79890310786106e-06,
"loss": 0.0,
"step": 10120
},
{
"epoch": 3.703839122486289,
"grad_norm": 0.0,
"learning_rate": 7.776965265082267e-06,
"loss": 0.0,
"step": 10130
},
{
"epoch": 3.707495429616088,
"grad_norm": 0.0,
"learning_rate": 7.755027422303473e-06,
"loss": 0.0,
"step": 10140
},
{
"epoch": 3.7111517367458866,
"grad_norm": 0.0,
"learning_rate": 7.733089579524681e-06,
"loss": 0.0,
"step": 10150
},
{
"epoch": 3.7148080438756854,
"grad_norm": 0.0,
"learning_rate": 7.711151736745887e-06,
"loss": 0.0,
"step": 10160
},
{
"epoch": 3.7184643510054842,
"grad_norm": 0.0,
"learning_rate": 7.689213893967093e-06,
"loss": 0.0,
"step": 10170
},
{
"epoch": 3.7221206581352835,
"grad_norm": 0.0,
"learning_rate": 7.6672760511883e-06,
"loss": 0.0,
"step": 10180
},
{
"epoch": 3.7257769652650823,
"grad_norm": 0.0,
"learning_rate": 7.645338208409506e-06,
"loss": 0.0,
"step": 10190
},
{
"epoch": 3.729433272394881,
"grad_norm": 0.0,
"learning_rate": 7.6234003656307135e-06,
"loss": 0.0,
"step": 10200
},
{
"epoch": 3.7330895795246803,
"grad_norm": 0.0,
"learning_rate": 7.601462522851919e-06,
"loss": 0.0,
"step": 10210
},
{
"epoch": 3.7367458866544787,
"grad_norm": 0.0,
"learning_rate": 7.579524680073127e-06,
"loss": 0.0,
"step": 10220
},
{
"epoch": 3.740402193784278,
"grad_norm": 0.0,
"learning_rate": 7.557586837294333e-06,
"loss": 0.0,
"step": 10230
},
{
"epoch": 3.7440585009140768,
"grad_norm": 0.0,
"learning_rate": 7.53564899451554e-06,
"loss": 0.0,
"step": 10240
},
{
"epoch": 3.7477148080438756,
"grad_norm": 0.0,
"learning_rate": 7.513711151736746e-06,
"loss": 0.0,
"step": 10250
},
{
"epoch": 3.751371115173675,
"grad_norm": 0.0,
"learning_rate": 7.491773308957952e-06,
"loss": 0.0,
"step": 10260
},
{
"epoch": 3.7550274223034736,
"grad_norm": 0.0,
"learning_rate": 7.469835466179159e-06,
"loss": 0.0,
"step": 10270
},
{
"epoch": 3.7586837294332724,
"grad_norm": 0.0,
"learning_rate": 7.447897623400366e-06,
"loss": 0.0,
"step": 10280
},
{
"epoch": 3.7623400365630713,
"grad_norm": 0.0,
"learning_rate": 7.425959780621572e-06,
"loss": 0.0,
"step": 10290
},
{
"epoch": 3.76599634369287,
"grad_norm": 0.0,
"learning_rate": 7.404021937842779e-06,
"loss": 0.0,
"step": 10300
},
{
"epoch": 3.7696526508226693,
"grad_norm": 0.0,
"learning_rate": 7.382084095063985e-06,
"loss": 0.0,
"step": 10310
},
{
"epoch": 3.773308957952468,
"grad_norm": 0.0,
"learning_rate": 7.360146252285192e-06,
"loss": 0.0,
"step": 10320
},
{
"epoch": 3.776965265082267,
"grad_norm": 0.0,
"learning_rate": 7.338208409506399e-06,
"loss": 0.0,
"step": 10330
},
{
"epoch": 3.7806215722120657,
"grad_norm": 0.0,
"learning_rate": 7.3162705667276054e-06,
"loss": 0.0,
"step": 10340
},
{
"epoch": 3.7842778793418645,
"grad_norm": 0.0,
"learning_rate": 7.294332723948812e-06,
"loss": 0.0,
"step": 10350
},
{
"epoch": 3.787934186471664,
"grad_norm": 0.0,
"learning_rate": 7.272394881170018e-06,
"loss": 0.0,
"step": 10360
},
{
"epoch": 3.7915904936014626,
"grad_norm": 0.0,
"learning_rate": 7.250457038391225e-06,
"loss": 0.0,
"step": 10370
},
{
"epoch": 3.7952468007312614,
"grad_norm": 0.0,
"learning_rate": 7.228519195612432e-06,
"loss": 0.0,
"step": 10380
},
{
"epoch": 3.7989031078610602,
"grad_norm": 0.0,
"learning_rate": 7.206581352833638e-06,
"loss": 0.0,
"step": 10390
},
{
"epoch": 3.802559414990859,
"grad_norm": 0.0,
"learning_rate": 7.184643510054845e-06,
"loss": 0.0,
"step": 10400
},
{
"epoch": 3.8062157221206583,
"grad_norm": 0.0,
"learning_rate": 7.162705667276051e-06,
"loss": 0.0,
"step": 10410
},
{
"epoch": 3.809872029250457,
"grad_norm": 0.0,
"learning_rate": 7.140767824497258e-06,
"loss": 0.0,
"step": 10420
},
{
"epoch": 3.813528336380256,
"grad_norm": 0.0,
"learning_rate": 7.118829981718465e-06,
"loss": 0.0,
"step": 10430
},
{
"epoch": 3.8171846435100547,
"grad_norm": 0.0,
"learning_rate": 7.096892138939671e-06,
"loss": 0.0,
"step": 10440
},
{
"epoch": 3.8208409506398535,
"grad_norm": 0.0,
"learning_rate": 7.074954296160878e-06,
"loss": 0.0,
"step": 10450
},
{
"epoch": 3.8244972577696528,
"grad_norm": 0.0,
"learning_rate": 7.053016453382084e-06,
"loss": 0.0,
"step": 10460
},
{
"epoch": 3.8281535648994516,
"grad_norm": 0.0,
"learning_rate": 7.031078610603291e-06,
"loss": 0.0,
"step": 10470
},
{
"epoch": 3.8318098720292504,
"grad_norm": 0.0,
"learning_rate": 7.009140767824497e-06,
"loss": 0.0,
"step": 10480
},
{
"epoch": 3.835466179159049,
"grad_norm": 0.0,
"learning_rate": 6.9872029250457035e-06,
"loss": 0.0,
"step": 10490
},
{
"epoch": 3.839122486288848,
"grad_norm": 0.0,
"learning_rate": 6.9652650822669105e-06,
"loss": 0.0,
"step": 10500
},
{
"epoch": 3.8427787934186473,
"grad_norm": 0.0,
"learning_rate": 6.943327239488117e-06,
"loss": 0.0,
"step": 10510
},
{
"epoch": 3.846435100548446,
"grad_norm": 0.0,
"learning_rate": 6.921389396709324e-06,
"loss": 0.0,
"step": 10520
},
{
"epoch": 3.850091407678245,
"grad_norm": 0.0,
"learning_rate": 6.899451553930531e-06,
"loss": 0.0,
"step": 10530
},
{
"epoch": 3.853747714808044,
"grad_norm": 0.0,
"learning_rate": 6.877513711151737e-06,
"loss": 0.0,
"step": 10540
},
{
"epoch": 3.857404021937843,
"grad_norm": 0.0,
"learning_rate": 6.855575868372944e-06,
"loss": 0.0,
"step": 10550
},
{
"epoch": 3.8610603290676417,
"grad_norm": 0.0,
"learning_rate": 6.83363802559415e-06,
"loss": 0.0,
"step": 10560
},
{
"epoch": 3.8647166361974405,
"grad_norm": 0.0,
"learning_rate": 6.811700182815357e-06,
"loss": 0.0,
"step": 10570
},
{
"epoch": 3.8683729433272394,
"grad_norm": 0.0,
"learning_rate": 6.789762340036564e-06,
"loss": 0.0,
"step": 10580
},
{
"epoch": 3.8720292504570386,
"grad_norm": 0.0,
"learning_rate": 6.76782449725777e-06,
"loss": 0.0,
"step": 10590
},
{
"epoch": 3.8756855575868374,
"grad_norm": 0.0,
"learning_rate": 6.745886654478976e-06,
"loss": 0.0,
"step": 10600
},
{
"epoch": 3.8793418647166362,
"grad_norm": 0.0,
"learning_rate": 6.723948811700182e-06,
"loss": 0.0,
"step": 10610
},
{
"epoch": 3.882998171846435,
"grad_norm": 0.0,
"learning_rate": 6.702010968921389e-06,
"loss": 0.0,
"step": 10620
},
{
"epoch": 3.886654478976234,
"grad_norm": 0.0,
"learning_rate": 6.680073126142596e-06,
"loss": 0.0,
"step": 10630
},
{
"epoch": 3.890310786106033,
"grad_norm": 0.0,
"learning_rate": 6.658135283363802e-06,
"loss": 0.0,
"step": 10640
},
{
"epoch": 3.893967093235832,
"grad_norm": 0.0,
"learning_rate": 6.636197440585009e-06,
"loss": 0.0,
"step": 10650
},
{
"epoch": 3.8976234003656307,
"grad_norm": 0.0,
"learning_rate": 6.6142595978062155e-06,
"loss": 0.0,
"step": 10660
},
{
"epoch": 3.9012797074954295,
"grad_norm": 0.0,
"learning_rate": 6.5923217550274225e-06,
"loss": 0.0,
"step": 10670
},
{
"epoch": 3.9049360146252283,
"grad_norm": 0.0,
"learning_rate": 6.5703839122486295e-06,
"loss": 0.0,
"step": 10680
},
{
"epoch": 3.9085923217550276,
"grad_norm": 0.0,
"learning_rate": 6.548446069469836e-06,
"loss": 0.0,
"step": 10690
},
{
"epoch": 3.9122486288848264,
"grad_norm": 0.0,
"learning_rate": 6.526508226691043e-06,
"loss": 0.0,
"step": 10700
},
{
"epoch": 3.915904936014625,
"grad_norm": 0.0,
"learning_rate": 6.50457038391225e-06,
"loss": 0.0,
"step": 10710
},
{
"epoch": 3.919561243144424,
"grad_norm": 0.0,
"learning_rate": 6.482632541133455e-06,
"loss": 0.0,
"step": 10720
},
{
"epoch": 3.923217550274223,
"grad_norm": 0.0,
"learning_rate": 6.460694698354662e-06,
"loss": 0.0,
"step": 10730
},
{
"epoch": 3.926873857404022,
"grad_norm": 0.0,
"learning_rate": 6.438756855575868e-06,
"loss": 0.0,
"step": 10740
},
{
"epoch": 3.930530164533821,
"grad_norm": 0.0,
"learning_rate": 6.416819012797075e-06,
"loss": 0.0,
"step": 10750
},
{
"epoch": 3.9341864716636197,
"grad_norm": 0.0,
"learning_rate": 6.394881170018282e-06,
"loss": 0.0,
"step": 10760
},
{
"epoch": 3.9378427787934185,
"grad_norm": 0.0,
"learning_rate": 6.372943327239488e-06,
"loss": 0.0,
"step": 10770
},
{
"epoch": 3.9414990859232173,
"grad_norm": 0.0,
"learning_rate": 6.351005484460695e-06,
"loss": 0.0,
"step": 10780
},
{
"epoch": 3.9451553930530165,
"grad_norm": 0.0,
"learning_rate": 6.329067641681901e-06,
"loss": 0.0,
"step": 10790
},
{
"epoch": 3.9488117001828154,
"grad_norm": 0.0,
"learning_rate": 6.307129798903108e-06,
"loss": 0.0,
"step": 10800
},
{
"epoch": 3.952468007312614,
"grad_norm": 0.0,
"learning_rate": 6.285191956124315e-06,
"loss": 0.0,
"step": 10810
},
{
"epoch": 3.9561243144424134,
"grad_norm": 0.0,
"learning_rate": 6.263254113345521e-06,
"loss": 0.0,
"step": 10820
},
{
"epoch": 3.9597806215722122,
"grad_norm": 0.0,
"learning_rate": 6.241316270566728e-06,
"loss": 0.0,
"step": 10830
},
{
"epoch": 3.963436928702011,
"grad_norm": 0.0,
"learning_rate": 6.219378427787934e-06,
"loss": 0.0,
"step": 10840
},
{
"epoch": 3.96709323583181,
"grad_norm": 0.0,
"learning_rate": 6.197440585009141e-06,
"loss": 0.0,
"step": 10850
},
{
"epoch": 3.9707495429616086,
"grad_norm": 0.0,
"learning_rate": 6.175502742230348e-06,
"loss": 0.0,
"step": 10860
},
{
"epoch": 3.974405850091408,
"grad_norm": 0.0,
"learning_rate": 6.153564899451554e-06,
"loss": 0.0,
"step": 10870
},
{
"epoch": 3.9780621572212067,
"grad_norm": 0.0,
"learning_rate": 6.131627056672761e-06,
"loss": 0.0,
"step": 10880
},
{
"epoch": 3.9817184643510055,
"grad_norm": 0.0,
"learning_rate": 6.109689213893967e-06,
"loss": 0.0,
"step": 10890
},
{
"epoch": 3.9853747714808043,
"grad_norm": 0.0,
"learning_rate": 6.087751371115174e-06,
"loss": 0.0,
"step": 10900
},
{
"epoch": 3.989031078610603,
"grad_norm": 0.0,
"learning_rate": 6.065813528336381e-06,
"loss": 0.0,
"step": 10910
},
{
"epoch": 3.9926873857404024,
"grad_norm": 0.0,
"learning_rate": 6.043875685557587e-06,
"loss": 0.0,
"step": 10920
},
{
"epoch": 3.996343692870201,
"grad_norm": 0.0,
"learning_rate": 6.021937842778794e-06,
"loss": 0.0,
"step": 10930
},
{
"epoch": 4.0,
"grad_norm": 0.0,
"learning_rate": 6e-06,
"loss": 0.0,
"step": 10940
},
{
"epoch": 4.003656307129799,
"grad_norm": 0.0,
"learning_rate": 5.978062157221207e-06,
"loss": 0.0,
"step": 10950
},
{
"epoch": 4.007312614259598,
"grad_norm": 0.0,
"learning_rate": 5.956124314442413e-06,
"loss": 0.0,
"step": 10960
},
{
"epoch": 4.010968921389397,
"grad_norm": 0.0,
"learning_rate": 5.934186471663619e-06,
"loss": 0.0,
"step": 10970
},
{
"epoch": 4.014625228519195,
"grad_norm": 0.0,
"learning_rate": 5.912248628884826e-06,
"loss": 0.0,
"step": 10980
},
{
"epoch": 4.0182815356489945,
"grad_norm": 0.0,
"learning_rate": 5.8903107861060326e-06,
"loss": 0.0,
"step": 10990
},
{
"epoch": 4.021937842778794,
"grad_norm": 0.0,
"learning_rate": 5.8683729433272395e-06,
"loss": 0.0,
"step": 11000
},
{
"epoch": 4.025594149908592,
"grad_norm": 0.0,
"learning_rate": 5.8464351005484465e-06,
"loss": 0.0,
"step": 11010
},
{
"epoch": 4.029250457038391,
"grad_norm": 0.0,
"learning_rate": 5.824497257769653e-06,
"loss": 0.0,
"step": 11020
},
{
"epoch": 4.03290676416819,
"grad_norm": 0.0,
"learning_rate": 5.80255941499086e-06,
"loss": 0.0,
"step": 11030
},
{
"epoch": 4.036563071297989,
"grad_norm": 0.0,
"learning_rate": 5.780621572212066e-06,
"loss": 0.0,
"step": 11040
},
{
"epoch": 4.040219378427788,
"grad_norm": 0.0,
"learning_rate": 5.758683729433273e-06,
"loss": 0.0,
"step": 11050
},
{
"epoch": 4.043875685557587,
"grad_norm": 0.0,
"learning_rate": 5.73674588665448e-06,
"loss": 0.0,
"step": 11060
},
{
"epoch": 4.047531992687386,
"grad_norm": 0.0,
"learning_rate": 5.714808043875686e-06,
"loss": 0.0,
"step": 11070
},
{
"epoch": 4.051188299817184,
"grad_norm": 0.0,
"learning_rate": 5.692870201096892e-06,
"loss": 0.0,
"step": 11080
},
{
"epoch": 4.0548446069469835,
"grad_norm": 0.0,
"learning_rate": 5.670932358318098e-06,
"loss": 0.0,
"step": 11090
},
{
"epoch": 4.058500914076783,
"grad_norm": 0.0,
"learning_rate": 5.648994515539305e-06,
"loss": 0.0,
"step": 11100
},
{
"epoch": 4.062157221206581,
"grad_norm": 0.0,
"learning_rate": 5.627056672760512e-06,
"loss": 0.0,
"step": 11110
},
{
"epoch": 4.06581352833638,
"grad_norm": 0.0,
"learning_rate": 5.605118829981718e-06,
"loss": 0.0,
"step": 11120
},
{
"epoch": 4.06946983546618,
"grad_norm": 0.0,
"learning_rate": 5.583180987202925e-06,
"loss": 0.0,
"step": 11130
},
{
"epoch": 4.073126142595978,
"grad_norm": 0.0,
"learning_rate": 5.5612431444241314e-06,
"loss": 0.0,
"step": 11140
},
{
"epoch": 4.076782449725777,
"grad_norm": 0.0,
"learning_rate": 5.5393053016453384e-06,
"loss": 0.0,
"step": 11150
},
{
"epoch": 4.0804387568555756,
"grad_norm": 0.0,
"learning_rate": 5.517367458866545e-06,
"loss": 0.0,
"step": 11160
},
{
"epoch": 4.084095063985375,
"grad_norm": 0.0,
"learning_rate": 5.4954296160877516e-06,
"loss": 0.0,
"step": 11170
},
{
"epoch": 4.087751371115174,
"grad_norm": 0.0,
"learning_rate": 5.4734917733089585e-06,
"loss": 0.0,
"step": 11180
},
{
"epoch": 4.091407678244972,
"grad_norm": 0.0,
"learning_rate": 5.451553930530164e-06,
"loss": 0.0,
"step": 11190
},
{
"epoch": 4.095063985374772,
"grad_norm": 0.0,
"learning_rate": 5.429616087751371e-06,
"loss": 0.0,
"step": 11200
},
{
"epoch": 4.09872029250457,
"grad_norm": 0.0,
"learning_rate": 5.407678244972578e-06,
"loss": 0.0,
"step": 11210
},
{
"epoch": 4.102376599634369,
"grad_norm": 0.0,
"learning_rate": 5.385740402193784e-06,
"loss": 0.0,
"step": 11220
},
{
"epoch": 4.1060329067641685,
"grad_norm": 0.0,
"learning_rate": 5.363802559414991e-06,
"loss": 0.0,
"step": 11230
},
{
"epoch": 4.109689213893967,
"grad_norm": 0.0,
"learning_rate": 5.341864716636198e-06,
"loss": 0.0,
"step": 11240
},
{
"epoch": 4.113345521023766,
"grad_norm": 0.0,
"learning_rate": 5.319926873857404e-06,
"loss": 0.0,
"step": 11250
},
{
"epoch": 4.1170018281535645,
"grad_norm": 0.0,
"learning_rate": 5.297989031078611e-06,
"loss": 0.0,
"step": 11260
},
{
"epoch": 4.120658135283364,
"grad_norm": 0.0,
"learning_rate": 5.276051188299817e-06,
"loss": 0.0,
"step": 11270
},
{
"epoch": 4.124314442413163,
"grad_norm": 0.0,
"learning_rate": 5.254113345521024e-06,
"loss": 0.0,
"step": 11280
},
{
"epoch": 4.127970749542961,
"grad_norm": 0.0,
"learning_rate": 5.232175502742231e-06,
"loss": 0.0,
"step": 11290
},
{
"epoch": 4.131627056672761,
"grad_norm": 0.0,
"learning_rate": 5.210237659963437e-06,
"loss": 0.0,
"step": 11300
},
{
"epoch": 4.135283363802559,
"grad_norm": 0.0,
"learning_rate": 5.1882998171846435e-06,
"loss": 0.0,
"step": 11310
},
{
"epoch": 4.138939670932358,
"grad_norm": 0.0,
"learning_rate": 5.16636197440585e-06,
"loss": 0.0,
"step": 11320
},
{
"epoch": 4.1425959780621575,
"grad_norm": 0.0,
"learning_rate": 5.144424131627057e-06,
"loss": 0.0,
"step": 11330
},
{
"epoch": 4.146252285191956,
"grad_norm": 0.0,
"learning_rate": 5.122486288848264e-06,
"loss": 0.0,
"step": 11340
},
{
"epoch": 4.149908592321755,
"grad_norm": 0.0,
"learning_rate": 5.10054844606947e-06,
"loss": 0.0,
"step": 11350
},
{
"epoch": 4.153564899451554,
"grad_norm": 0.0,
"learning_rate": 5.078610603290677e-06,
"loss": 0.0,
"step": 11360
},
{
"epoch": 4.157221206581353,
"grad_norm": 0.0,
"learning_rate": 5.056672760511883e-06,
"loss": 0.0,
"step": 11370
},
{
"epoch": 4.160877513711152,
"grad_norm": 0.0,
"learning_rate": 5.03473491773309e-06,
"loss": 0.0,
"step": 11380
},
{
"epoch": 4.16453382084095,
"grad_norm": 0.0,
"learning_rate": 5.012797074954297e-06,
"loss": 0.0,
"step": 11390
},
{
"epoch": 4.16819012797075,
"grad_norm": 0.0,
"learning_rate": 4.990859232175503e-06,
"loss": 0.0,
"step": 11400
},
{
"epoch": 4.171846435100549,
"grad_norm": 0.0,
"learning_rate": 4.96892138939671e-06,
"loss": 0.0,
"step": 11410
},
{
"epoch": 4.175502742230347,
"grad_norm": 0.0,
"learning_rate": 4.946983546617916e-06,
"loss": 0.0,
"step": 11420
},
{
"epoch": 4.1791590493601465,
"grad_norm": 0.0,
"learning_rate": 4.925045703839122e-06,
"loss": 0.0,
"step": 11430
},
{
"epoch": 4.182815356489945,
"grad_norm": 0.0,
"learning_rate": 4.903107861060329e-06,
"loss": 0.0,
"step": 11440
},
{
"epoch": 4.186471663619744,
"grad_norm": 0.0,
"learning_rate": 4.881170018281535e-06,
"loss": 0.0,
"step": 11450
},
{
"epoch": 4.190127970749543,
"grad_norm": 0.0,
"learning_rate": 4.859232175502742e-06,
"loss": 0.0,
"step": 11460
},
{
"epoch": 4.193784277879342,
"grad_norm": 0.0,
"learning_rate": 4.8372943327239485e-06,
"loss": 0.0,
"step": 11470
},
{
"epoch": 4.197440585009141,
"grad_norm": 0.0,
"learning_rate": 4.8153564899451555e-06,
"loss": 0.0,
"step": 11480
},
{
"epoch": 4.201096892138939,
"grad_norm": 0.0,
"learning_rate": 4.7934186471663625e-06,
"loss": 0.0,
"step": 11490
},
{
"epoch": 4.204753199268739,
"grad_norm": 0.0,
"learning_rate": 4.771480804387569e-06,
"loss": 0.0,
"step": 11500
},
{
"epoch": 4.208409506398538,
"grad_norm": 0.0,
"learning_rate": 4.749542961608776e-06,
"loss": 0.0,
"step": 11510
},
{
"epoch": 4.212065813528336,
"grad_norm": 0.0,
"learning_rate": 4.727605118829982e-06,
"loss": 0.0,
"step": 11520
},
{
"epoch": 4.2157221206581355,
"grad_norm": 0.0,
"learning_rate": 4.705667276051189e-06,
"loss": 0.0,
"step": 11530
},
{
"epoch": 4.219378427787934,
"grad_norm": 0.0,
"learning_rate": 4.683729433272396e-06,
"loss": 0.0,
"step": 11540
},
{
"epoch": 4.223034734917733,
"grad_norm": 0.0,
"learning_rate": 4.661791590493601e-06,
"loss": 0.0,
"step": 11550
},
{
"epoch": 4.226691042047532,
"grad_norm": 0.0,
"learning_rate": 4.639853747714808e-06,
"loss": 0.0,
"step": 11560
},
{
"epoch": 4.230347349177331,
"grad_norm": 0.0,
"learning_rate": 4.617915904936014e-06,
"loss": 0.0,
"step": 11570
},
{
"epoch": 4.23400365630713,
"grad_norm": 0.0,
"learning_rate": 4.595978062157221e-06,
"loss": 0.0,
"step": 11580
},
{
"epoch": 4.237659963436928,
"grad_norm": 0.0,
"learning_rate": 4.574040219378428e-06,
"loss": 0.0,
"step": 11590
},
{
"epoch": 4.2413162705667276,
"grad_norm": 0.0,
"learning_rate": 4.552102376599634e-06,
"loss": 0.0,
"step": 11600
},
{
"epoch": 4.244972577696527,
"grad_norm": 0.0,
"learning_rate": 4.530164533820841e-06,
"loss": 0.0,
"step": 11610
},
{
"epoch": 4.248628884826325,
"grad_norm": 0.0,
"learning_rate": 4.508226691042047e-06,
"loss": 0.0,
"step": 11620
},
{
"epoch": 4.252285191956124,
"grad_norm": 0.0,
"learning_rate": 4.486288848263254e-06,
"loss": 0.0,
"step": 11630
},
{
"epoch": 4.255941499085923,
"grad_norm": 0.0,
"learning_rate": 4.464351005484461e-06,
"loss": 0.0,
"step": 11640
},
{
"epoch": 4.259597806215722,
"grad_norm": 0.0,
"learning_rate": 4.4424131627056675e-06,
"loss": 0.0,
"step": 11650
},
{
"epoch": 4.263254113345521,
"grad_norm": 0.0,
"learning_rate": 4.4204753199268745e-06,
"loss": 0.0,
"step": 11660
},
{
"epoch": 4.26691042047532,
"grad_norm": 0.0,
"learning_rate": 4.39853747714808e-06,
"loss": 0.0,
"step": 11670
},
{
"epoch": 4.270566727605119,
"grad_norm": 0.0,
"learning_rate": 4.376599634369287e-06,
"loss": 0.0,
"step": 11680
},
{
"epoch": 4.274223034734918,
"grad_norm": 0.0,
"learning_rate": 4.354661791590494e-06,
"loss": 0.0,
"step": 11690
},
{
"epoch": 4.2778793418647165,
"grad_norm": 0.0,
"learning_rate": 4.3327239488117e-06,
"loss": 0.0,
"step": 11700
},
{
"epoch": 4.281535648994516,
"grad_norm": 0.0,
"learning_rate": 4.310786106032907e-06,
"loss": 0.0,
"step": 11710
},
{
"epoch": 4.285191956124314,
"grad_norm": 0.0,
"learning_rate": 4.288848263254113e-06,
"loss": 0.0,
"step": 11720
},
{
"epoch": 4.288848263254113,
"grad_norm": 0.0,
"learning_rate": 4.26691042047532e-06,
"loss": 0.0,
"step": 11730
},
{
"epoch": 4.292504570383913,
"grad_norm": 0.0,
"learning_rate": 4.244972577696527e-06,
"loss": 0.0,
"step": 11740
},
{
"epoch": 4.296160877513711,
"grad_norm": 0.0,
"learning_rate": 4.223034734917733e-06,
"loss": 0.0,
"step": 11750
},
{
"epoch": 4.29981718464351,
"grad_norm": 0.0,
"learning_rate": 4.20109689213894e-06,
"loss": 0.0,
"step": 11760
},
{
"epoch": 4.303473491773309,
"grad_norm": 0.0,
"learning_rate": 4.179159049360146e-06,
"loss": 0.0,
"step": 11770
},
{
"epoch": 4.307129798903108,
"grad_norm": 0.0,
"learning_rate": 4.157221206581353e-06,
"loss": 0.0,
"step": 11780
},
{
"epoch": 4.310786106032907,
"grad_norm": 0.0,
"learning_rate": 4.135283363802559e-06,
"loss": 0.0,
"step": 11790
},
{
"epoch": 4.3144424131627055,
"grad_norm": 0.0,
"learning_rate": 4.1133455210237655e-06,
"loss": 0.0,
"step": 11800
},
{
"epoch": 4.318098720292505,
"grad_norm": 0.0,
"learning_rate": 4.0914076782449725e-06,
"loss": 0.0,
"step": 11810
},
{
"epoch": 4.321755027422303,
"grad_norm": 0.0,
"learning_rate": 4.0694698354661795e-06,
"loss": 0.0,
"step": 11820
},
{
"epoch": 4.325411334552102,
"grad_norm": 0.0,
"learning_rate": 4.047531992687386e-06,
"loss": 0.0,
"step": 11830
},
{
"epoch": 4.329067641681902,
"grad_norm": 0.0,
"learning_rate": 4.025594149908593e-06,
"loss": 0.0,
"step": 11840
},
{
"epoch": 4.3327239488117,
"grad_norm": 0.0,
"learning_rate": 4.003656307129799e-06,
"loss": 0.0,
"step": 11850
},
{
"epoch": 4.336380255941499,
"grad_norm": 0.0,
"learning_rate": 3.981718464351006e-06,
"loss": 0.0,
"step": 11860
},
{
"epoch": 4.340036563071298,
"grad_norm": 0.0,
"learning_rate": 3.959780621572213e-06,
"loss": 0.0,
"step": 11870
},
{
"epoch": 4.343692870201097,
"grad_norm": 0.0,
"learning_rate": 3.937842778793419e-06,
"loss": 0.0,
"step": 11880
},
{
"epoch": 4.347349177330896,
"grad_norm": 0.0,
"learning_rate": 3.915904936014626e-06,
"loss": 0.0,
"step": 11890
},
{
"epoch": 4.3510054844606945,
"grad_norm": 0.0,
"learning_rate": 3.893967093235831e-06,
"loss": 0.0,
"step": 11900
},
{
"epoch": 4.354661791590494,
"grad_norm": 0.0,
"learning_rate": 3.872029250457038e-06,
"loss": 0.0,
"step": 11910
},
{
"epoch": 4.358318098720293,
"grad_norm": 0.0,
"learning_rate": 3.850091407678245e-06,
"loss": 0.0,
"step": 11920
},
{
"epoch": 4.361974405850091,
"grad_norm": 0.0,
"learning_rate": 3.828153564899451e-06,
"loss": 0.0,
"step": 11930
},
{
"epoch": 4.365630712979891,
"grad_norm": 0.0,
"learning_rate": 3.8062157221206583e-06,
"loss": 0.0,
"step": 11940
},
{
"epoch": 4.369287020109689,
"grad_norm": 0.0,
"learning_rate": 3.784277879341865e-06,
"loss": 0.0,
"step": 11950
},
{
"epoch": 4.372943327239488,
"grad_norm": 0.0,
"learning_rate": 3.7623400365630714e-06,
"loss": 0.0,
"step": 11960
},
{
"epoch": 4.376599634369287,
"grad_norm": 0.0,
"learning_rate": 3.740402193784278e-06,
"loss": 0.0,
"step": 11970
},
{
"epoch": 4.380255941499086,
"grad_norm": 0.0,
"learning_rate": 3.7184643510054846e-06,
"loss": 0.0,
"step": 11980
},
{
"epoch": 4.383912248628885,
"grad_norm": 0.0,
"learning_rate": 3.696526508226691e-06,
"loss": 0.0,
"step": 11990
},
{
"epoch": 4.387568555758683,
"grad_norm": 0.0,
"learning_rate": 3.6745886654478977e-06,
"loss": 0.0,
"step": 12000
}
],
"logging_steps": 10,
"max_steps": 13675,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}