{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.387568555758683, "eval_steps": 500, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003656307129798903, "grad_norm": 0.0, "learning_rate": 2.9978062157221207e-05, "loss": 0.0015, "step": 10 }, { "epoch": 0.007312614259597806, "grad_norm": 0.0, "learning_rate": 2.9956124314442413e-05, "loss": 0.0, "step": 20 }, { "epoch": 0.010968921389396709, "grad_norm": 0.0, "learning_rate": 2.9934186471663623e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.014625228519195612, "grad_norm": 0.0, "learning_rate": 2.991224862888483e-05, "loss": 0.0001, "step": 40 }, { "epoch": 0.018281535648994516, "grad_norm": 0.0, "learning_rate": 2.9890310786106035e-05, "loss": 0.0, "step": 50 }, { "epoch": 0.021937842778793418, "grad_norm": 0.0, "learning_rate": 2.9868372943327238e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.025594149908592323, "grad_norm": 0.0, "learning_rate": 2.9846435100548447e-05, "loss": 0.0004, "step": 70 }, { "epoch": 0.029250457038391225, "grad_norm": 0.0, "learning_rate": 2.9824497257769653e-05, "loss": 0.0, "step": 80 }, { "epoch": 0.03290676416819013, "grad_norm": 0.0, "learning_rate": 2.980255941499086e-05, "loss": 0.0007, "step": 90 }, { "epoch": 0.03656307129798903, "grad_norm": 0.0, "learning_rate": 2.9780621572212066e-05, "loss": 0.0042, "step": 100 }, { "epoch": 0.04021937842778794, "grad_norm": 0.0, "learning_rate": 2.9758683729433275e-05, "loss": 0.0, "step": 110 }, { "epoch": 0.043875685557586835, "grad_norm": 1.104537010192871, "learning_rate": 2.973674588665448e-05, "loss": 0.0002, "step": 120 }, { "epoch": 0.04753199268738574, "grad_norm": 0.0, "learning_rate": 2.9714808043875687e-05, "loss": 0.0, "step": 130 }, { "epoch": 0.051188299817184646, "grad_norm": 0.0, "learning_rate": 2.9692870201096894e-05, "loss": 0.0, "step": 140 }, { "epoch": 0.054844606946983544, "grad_norm": 0.0, "learning_rate": 2.96709323583181e-05, "loss": 0.0, "step": 150 }, { "epoch": 0.05850091407678245, "grad_norm": 0.0, "learning_rate": 2.9648994515539306e-05, "loss": 0.0, "step": 160 }, { "epoch": 0.062157221206581355, "grad_norm": 0.0, "learning_rate": 2.9627056672760512e-05, "loss": 0.0, "step": 170 }, { "epoch": 0.06581352833638025, "grad_norm": 0.0, "learning_rate": 2.9605118829981718e-05, "loss": 0.0, "step": 180 }, { "epoch": 0.06946983546617916, "grad_norm": 0.0, "learning_rate": 2.9583180987202924e-05, "loss": 0.0, "step": 190 }, { "epoch": 0.07312614259597806, "grad_norm": 0.0, "learning_rate": 2.9561243144424134e-05, "loss": 0.0, "step": 200 }, { "epoch": 0.07678244972577697, "grad_norm": 0.0, "learning_rate": 2.953930530164534e-05, "loss": 0.0, "step": 210 }, { "epoch": 0.08043875685557587, "grad_norm": 0.0, "learning_rate": 2.9517367458866546e-05, "loss": 0.0, "step": 220 }, { "epoch": 0.08409506398537477, "grad_norm": 0.0, "learning_rate": 2.9495429616087752e-05, "loss": 0.0, "step": 230 }, { "epoch": 0.08775137111517367, "grad_norm": 0.0, "learning_rate": 2.947349177330896e-05, "loss": 0.0, "step": 240 }, { "epoch": 0.09140767824497258, "grad_norm": 0.0, "learning_rate": 2.9451553930530168e-05, "loss": 0.0099, "step": 250 }, { "epoch": 0.09506398537477148, "grad_norm": 0.0, "learning_rate": 2.942961608775137e-05, "loss": 0.0, "step": 260 }, { "epoch": 0.09872029250457039, "grad_norm": 0.0, "learning_rate": 2.9407678244972577e-05, "loss": 0.0, "step": 270 }, { "epoch": 0.10237659963436929, "grad_norm": 0.0, "learning_rate": 2.9385740402193783e-05, "loss": 0.0, "step": 280 }, { "epoch": 0.10603290676416818, "grad_norm": 0.0, "learning_rate": 2.9363802559414992e-05, "loss": 0.0, "step": 290 }, { "epoch": 0.10968921389396709, "grad_norm": 0.0, "learning_rate": 2.93418647166362e-05, "loss": 0.0, "step": 300 }, { "epoch": 0.113345521023766, "grad_norm": 0.0, "learning_rate": 2.9319926873857405e-05, "loss": 0.0, "step": 310 }, { "epoch": 0.1170018281535649, "grad_norm": 0.0, "learning_rate": 2.929798903107861e-05, "loss": 0.0001, "step": 320 }, { "epoch": 0.1206581352833638, "grad_norm": 0.0, "learning_rate": 2.927605118829982e-05, "loss": 0.0, "step": 330 }, { "epoch": 0.12431444241316271, "grad_norm": 0.0, "learning_rate": 2.9254113345521026e-05, "loss": 0.0, "step": 340 }, { "epoch": 0.12797074954296161, "grad_norm": 0.0, "learning_rate": 2.9232175502742233e-05, "loss": 0.0, "step": 350 }, { "epoch": 0.1316270566727605, "grad_norm": 0.0, "learning_rate": 2.9210237659963435e-05, "loss": 0.0, "step": 360 }, { "epoch": 0.13528336380255943, "grad_norm": 0.0, "learning_rate": 2.918829981718464e-05, "loss": 0.0, "step": 370 }, { "epoch": 0.13893967093235832, "grad_norm": 0.0, "learning_rate": 2.916636197440585e-05, "loss": 0.0, "step": 380 }, { "epoch": 0.1425959780621572, "grad_norm": 0.0, "learning_rate": 2.9144424131627057e-05, "loss": 0.0, "step": 390 }, { "epoch": 0.14625228519195613, "grad_norm": 0.0, "learning_rate": 2.9122486288848263e-05, "loss": 0.0, "step": 400 }, { "epoch": 0.14990859232175502, "grad_norm": 0.0, "learning_rate": 2.910054844606947e-05, "loss": 0.0001, "step": 410 }, { "epoch": 0.15356489945155394, "grad_norm": 0.0, "learning_rate": 2.907861060329068e-05, "loss": 0.0, "step": 420 }, { "epoch": 0.15722120658135283, "grad_norm": 0.0, "learning_rate": 2.9056672760511885e-05, "loss": 0.0, "step": 430 }, { "epoch": 0.16087751371115175, "grad_norm": 0.0, "learning_rate": 2.903473491773309e-05, "loss": 0.0, "step": 440 }, { "epoch": 0.16453382084095064, "grad_norm": 0.0, "learning_rate": 2.9012797074954297e-05, "loss": 0.0, "step": 450 }, { "epoch": 0.16819012797074953, "grad_norm": 0.0, "learning_rate": 2.8990859232175504e-05, "loss": 0.0, "step": 460 }, { "epoch": 0.17184643510054845, "grad_norm": 0.0, "learning_rate": 2.896892138939671e-05, "loss": 0.0, "step": 470 }, { "epoch": 0.17550274223034734, "grad_norm": 0.0, "learning_rate": 2.8946983546617916e-05, "loss": 0.0, "step": 480 }, { "epoch": 0.17915904936014626, "grad_norm": 0.0, "learning_rate": 2.8925045703839122e-05, "loss": 0.0, "step": 490 }, { "epoch": 0.18281535648994515, "grad_norm": 0.0, "learning_rate": 2.8903107861060328e-05, "loss": 0.0, "step": 500 }, { "epoch": 0.18647166361974407, "grad_norm": 0.0, "learning_rate": 2.8881170018281538e-05, "loss": 0.0, "step": 510 }, { "epoch": 0.19012797074954296, "grad_norm": 0.0, "learning_rate": 2.8859232175502744e-05, "loss": 0.0, "step": 520 }, { "epoch": 0.19378427787934185, "grad_norm": 0.0, "learning_rate": 2.883729433272395e-05, "loss": 0.0, "step": 530 }, { "epoch": 0.19744058500914077, "grad_norm": 0.0, "learning_rate": 2.8815356489945156e-05, "loss": 0.0, "step": 540 }, { "epoch": 0.20109689213893966, "grad_norm": 0.0, "learning_rate": 2.8793418647166366e-05, "loss": 0.0, "step": 550 }, { "epoch": 0.20475319926873858, "grad_norm": 0.0, "learning_rate": 2.877148080438757e-05, "loss": 0.0, "step": 560 }, { "epoch": 0.20840950639853748, "grad_norm": 0.0, "learning_rate": 2.8749542961608775e-05, "loss": 0.0, "step": 570 }, { "epoch": 0.21206581352833637, "grad_norm": 0.0, "learning_rate": 2.872760511882998e-05, "loss": 0.0, "step": 580 }, { "epoch": 0.21572212065813529, "grad_norm": 0.0, "learning_rate": 2.870566727605119e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.21937842778793418, "grad_norm": 0.0, "learning_rate": 2.8683729433272396e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.2230347349177331, "grad_norm": 0.0, "learning_rate": 2.8661791590493602e-05, "loss": 0.0, "step": 610 }, { "epoch": 0.226691042047532, "grad_norm": 0.0, "learning_rate": 2.863985374771481e-05, "loss": 0.0, "step": 620 }, { "epoch": 0.2303473491773309, "grad_norm": 0.0, "learning_rate": 2.8617915904936015e-05, "loss": 0.0, "step": 630 }, { "epoch": 0.2340036563071298, "grad_norm": 0.0, "learning_rate": 2.8595978062157224e-05, "loss": 0.0, "step": 640 }, { "epoch": 0.2376599634369287, "grad_norm": 0.0, "learning_rate": 2.857404021937843e-05, "loss": 0.0002, "step": 650 }, { "epoch": 0.2413162705667276, "grad_norm": 0.0, "learning_rate": 2.8552102376599633e-05, "loss": 0.0, "step": 660 }, { "epoch": 0.2449725776965265, "grad_norm": 0.0, "learning_rate": 2.853016453382084e-05, "loss": 0.0, "step": 670 }, { "epoch": 0.24862888482632542, "grad_norm": 0.0, "learning_rate": 2.850822669104205e-05, "loss": 0.0, "step": 680 }, { "epoch": 0.2522851919561243, "grad_norm": 0.0, "learning_rate": 2.8486288848263255e-05, "loss": 0.0, "step": 690 }, { "epoch": 0.25594149908592323, "grad_norm": 0.0, "learning_rate": 2.846435100548446e-05, "loss": 0.0, "step": 700 }, { "epoch": 0.2595978062157221, "grad_norm": 0.0, "learning_rate": 2.8442413162705667e-05, "loss": 0.0005, "step": 710 }, { "epoch": 0.263254113345521, "grad_norm": 0.0, "learning_rate": 2.8420475319926873e-05, "loss": 0.0, "step": 720 }, { "epoch": 0.26691042047531993, "grad_norm": 0.0, "learning_rate": 2.8398537477148083e-05, "loss": 0.0, "step": 730 }, { "epoch": 0.27056672760511885, "grad_norm": 0.0, "learning_rate": 2.837659963436929e-05, "loss": 0.0073, "step": 740 }, { "epoch": 0.2742230347349177, "grad_norm": 0.0, "learning_rate": 2.8354661791590495e-05, "loss": 0.0, "step": 750 }, { "epoch": 0.27787934186471663, "grad_norm": 0.0, "learning_rate": 2.8332723948811698e-05, "loss": 0.0, "step": 760 }, { "epoch": 0.28153564899451555, "grad_norm": 0.0, "learning_rate": 2.8310786106032907e-05, "loss": 0.0, "step": 770 }, { "epoch": 0.2851919561243144, "grad_norm": 0.01952667348086834, "learning_rate": 2.8288848263254114e-05, "loss": 0.0094, "step": 780 }, { "epoch": 0.28884826325411334, "grad_norm": 1.364105224609375, "learning_rate": 2.826691042047532e-05, "loss": 0.0086, "step": 790 }, { "epoch": 0.29250457038391225, "grad_norm": 0.0, "learning_rate": 2.8244972577696526e-05, "loss": 0.0, "step": 800 }, { "epoch": 0.2961608775137112, "grad_norm": 0.0, "learning_rate": 2.8223034734917735e-05, "loss": 0.0, "step": 810 }, { "epoch": 0.29981718464351004, "grad_norm": 0.0, "learning_rate": 2.820109689213894e-05, "loss": 0.0, "step": 820 }, { "epoch": 0.30347349177330896, "grad_norm": 0.0, "learning_rate": 2.8179159049360148e-05, "loss": 0.0, "step": 830 }, { "epoch": 0.3071297989031079, "grad_norm": 0.0, "learning_rate": 2.8157221206581354e-05, "loss": 0.0002, "step": 840 }, { "epoch": 0.31078610603290674, "grad_norm": 0.0, "learning_rate": 2.813528336380256e-05, "loss": 0.0, "step": 850 }, { "epoch": 0.31444241316270566, "grad_norm": 0.0, "learning_rate": 2.8113345521023766e-05, "loss": 0.0, "step": 860 }, { "epoch": 0.3180987202925046, "grad_norm": 0.0, "learning_rate": 2.8091407678244972e-05, "loss": 0.0, "step": 870 }, { "epoch": 0.3217550274223035, "grad_norm": 0.0, "learning_rate": 2.806946983546618e-05, "loss": 0.0, "step": 880 }, { "epoch": 0.32541133455210236, "grad_norm": 0.0, "learning_rate": 2.8047531992687385e-05, "loss": 0.0, "step": 890 }, { "epoch": 0.3290676416819013, "grad_norm": 0.0, "learning_rate": 2.8025594149908594e-05, "loss": 0.0, "step": 900 }, { "epoch": 0.3327239488117002, "grad_norm": 0.0, "learning_rate": 2.80036563071298e-05, "loss": 0.0, "step": 910 }, { "epoch": 0.33638025594149906, "grad_norm": 0.0, "learning_rate": 2.7981718464351006e-05, "loss": 0.0, "step": 920 }, { "epoch": 0.340036563071298, "grad_norm": 0.0, "learning_rate": 2.7959780621572213e-05, "loss": 0.0, "step": 930 }, { "epoch": 0.3436928702010969, "grad_norm": 0.0, "learning_rate": 2.7937842778793422e-05, "loss": 0.0, "step": 940 }, { "epoch": 0.3473491773308958, "grad_norm": 0.0, "learning_rate": 2.7915904936014628e-05, "loss": 0.0, "step": 950 }, { "epoch": 0.3510054844606947, "grad_norm": 0.0, "learning_rate": 2.789396709323583e-05, "loss": 0.0, "step": 960 }, { "epoch": 0.3546617915904936, "grad_norm": 0.0, "learning_rate": 2.7872029250457037e-05, "loss": 0.0, "step": 970 }, { "epoch": 0.3583180987202925, "grad_norm": 0.0, "learning_rate": 2.7850091407678243e-05, "loss": 0.0, "step": 980 }, { "epoch": 0.3619744058500914, "grad_norm": 0.0, "learning_rate": 2.7828153564899453e-05, "loss": 0.0, "step": 990 }, { "epoch": 0.3656307129798903, "grad_norm": 0.0, "learning_rate": 2.780621572212066e-05, "loss": 0.0, "step": 1000 }, { "epoch": 0.3692870201096892, "grad_norm": 0.0, "learning_rate": 2.7784277879341865e-05, "loss": 0.0, "step": 1010 }, { "epoch": 0.37294332723948814, "grad_norm": 0.0, "learning_rate": 2.776234003656307e-05, "loss": 0.0, "step": 1020 }, { "epoch": 0.376599634369287, "grad_norm": 0.0, "learning_rate": 2.774040219378428e-05, "loss": 0.0, "step": 1030 }, { "epoch": 0.3802559414990859, "grad_norm": 0.0, "learning_rate": 2.7718464351005487e-05, "loss": 0.0, "step": 1040 }, { "epoch": 0.38391224862888484, "grad_norm": 0.0, "learning_rate": 2.7696526508226693e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.3875685557586837, "grad_norm": 0.0, "learning_rate": 2.76745886654479e-05, "loss": 0.0, "step": 1060 }, { "epoch": 0.3912248628884826, "grad_norm": 0.0, "learning_rate": 2.7652650822669102e-05, "loss": 0.0, "step": 1070 }, { "epoch": 0.39488117001828155, "grad_norm": 0.0, "learning_rate": 2.763071297989031e-05, "loss": 0.0, "step": 1080 }, { "epoch": 0.39853747714808047, "grad_norm": 0.0, "learning_rate": 2.7608775137111518e-05, "loss": 0.0, "step": 1090 }, { "epoch": 0.40219378427787933, "grad_norm": 0.0, "learning_rate": 2.7586837294332724e-05, "loss": 0.0, "step": 1100 }, { "epoch": 0.40585009140767825, "grad_norm": 0.0, "learning_rate": 2.756489945155393e-05, "loss": 0.0, "step": 1110 }, { "epoch": 0.40950639853747717, "grad_norm": 0.0, "learning_rate": 2.754296160877514e-05, "loss": 0.0038, "step": 1120 }, { "epoch": 0.41316270566727603, "grad_norm": 2.969595432281494, "learning_rate": 2.7521023765996346e-05, "loss": 0.1412, "step": 1130 }, { "epoch": 0.41681901279707495, "grad_norm": 0.8748220205307007, "learning_rate": 2.749908592321755e-05, "loss": 0.0621, "step": 1140 }, { "epoch": 0.42047531992687387, "grad_norm": 0.1589801162481308, "learning_rate": 2.7477148080438758e-05, "loss": 0.0083, "step": 1150 }, { "epoch": 0.42413162705667273, "grad_norm": 0.0, "learning_rate": 2.7455210237659964e-05, "loss": 0.0007, "step": 1160 }, { "epoch": 0.42778793418647165, "grad_norm": 0.0, "learning_rate": 2.743327239488117e-05, "loss": 0.0, "step": 1170 }, { "epoch": 0.43144424131627057, "grad_norm": 0.0, "learning_rate": 2.7411334552102376e-05, "loss": 0.0001, "step": 1180 }, { "epoch": 0.4351005484460695, "grad_norm": 0.0, "learning_rate": 2.7389396709323582e-05, "loss": 0.0001, "step": 1190 }, { "epoch": 0.43875685557586835, "grad_norm": 0.0, "learning_rate": 2.736745886654479e-05, "loss": 0.0, "step": 1200 }, { "epoch": 0.4424131627056673, "grad_norm": 0.0, "learning_rate": 2.7345521023765998e-05, "loss": 0.0, "step": 1210 }, { "epoch": 0.4460694698354662, "grad_norm": 0.0, "learning_rate": 2.7323583180987204e-05, "loss": 0.0, "step": 1220 }, { "epoch": 0.44972577696526506, "grad_norm": 0.0, "learning_rate": 2.730164533820841e-05, "loss": 0.0, "step": 1230 }, { "epoch": 0.453382084095064, "grad_norm": 0.0, "learning_rate": 2.7279707495429616e-05, "loss": 0.0, "step": 1240 }, { "epoch": 0.4570383912248629, "grad_norm": 0.0, "learning_rate": 2.7257769652650826e-05, "loss": 0.0, "step": 1250 }, { "epoch": 0.4606946983546618, "grad_norm": 0.0, "learning_rate": 2.7235831809872032e-05, "loss": 0.0, "step": 1260 }, { "epoch": 0.4643510054844607, "grad_norm": 0.0, "learning_rate": 2.7213893967093235e-05, "loss": 0.0, "step": 1270 }, { "epoch": 0.4680073126142596, "grad_norm": 0.0, "learning_rate": 2.719195612431444e-05, "loss": 0.0, "step": 1280 }, { "epoch": 0.4716636197440585, "grad_norm": 0.0, "learning_rate": 2.717001828153565e-05, "loss": 0.0, "step": 1290 }, { "epoch": 0.4753199268738574, "grad_norm": 0.0, "learning_rate": 2.7148080438756857e-05, "loss": 0.0, "step": 1300 }, { "epoch": 0.4789762340036563, "grad_norm": 0.0, "learning_rate": 2.7126142595978063e-05, "loss": 0.0, "step": 1310 }, { "epoch": 0.4826325411334552, "grad_norm": 0.0, "learning_rate": 2.710420475319927e-05, "loss": 0.0, "step": 1320 }, { "epoch": 0.48628884826325414, "grad_norm": 0.0, "learning_rate": 2.7082266910420475e-05, "loss": 0.0, "step": 1330 }, { "epoch": 0.489945155393053, "grad_norm": 0.0, "learning_rate": 2.7060329067641685e-05, "loss": 0.0, "step": 1340 }, { "epoch": 0.4936014625228519, "grad_norm": 0.0, "learning_rate": 2.703839122486289e-05, "loss": 0.0, "step": 1350 }, { "epoch": 0.49725776965265084, "grad_norm": 0.0, "learning_rate": 2.7016453382084097e-05, "loss": 0.0, "step": 1360 }, { "epoch": 0.5009140767824497, "grad_norm": 0.0, "learning_rate": 2.69945155393053e-05, "loss": 0.0, "step": 1370 }, { "epoch": 0.5045703839122486, "grad_norm": 0.0, "learning_rate": 2.697257769652651e-05, "loss": 0.0, "step": 1380 }, { "epoch": 0.5082266910420475, "grad_norm": 0.0, "learning_rate": 2.6950639853747715e-05, "loss": 0.0, "step": 1390 }, { "epoch": 0.5118829981718465, "grad_norm": 0.0, "learning_rate": 2.692870201096892e-05, "loss": 0.0, "step": 1400 }, { "epoch": 0.5155393053016454, "grad_norm": 0.0, "learning_rate": 2.6906764168190128e-05, "loss": 0.0, "step": 1410 }, { "epoch": 0.5191956124314442, "grad_norm": 0.0, "learning_rate": 2.6884826325411337e-05, "loss": 0.0, "step": 1420 }, { "epoch": 0.5228519195612431, "grad_norm": 0.0, "learning_rate": 2.6862888482632543e-05, "loss": 0.0, "step": 1430 }, { "epoch": 0.526508226691042, "grad_norm": 0.0, "learning_rate": 2.684095063985375e-05, "loss": 0.0, "step": 1440 }, { "epoch": 0.5301645338208409, "grad_norm": 0.0, "learning_rate": 2.6819012797074956e-05, "loss": 0.0, "step": 1450 }, { "epoch": 0.5338208409506399, "grad_norm": 0.0, "learning_rate": 2.6797074954296162e-05, "loss": 0.0, "step": 1460 }, { "epoch": 0.5374771480804388, "grad_norm": 0.0, "learning_rate": 2.6775137111517368e-05, "loss": 0.0, "step": 1470 }, { "epoch": 0.5411334552102377, "grad_norm": 0.0, "learning_rate": 2.6753199268738574e-05, "loss": 0.0, "step": 1480 }, { "epoch": 0.5447897623400365, "grad_norm": 0.0, "learning_rate": 2.673126142595978e-05, "loss": 0.0, "step": 1490 }, { "epoch": 0.5484460694698354, "grad_norm": 0.0, "learning_rate": 2.6709323583180986e-05, "loss": 0.0, "step": 1500 }, { "epoch": 0.5521023765996343, "grad_norm": 0.0, "learning_rate": 2.6687385740402196e-05, "loss": 0.0, "step": 1510 }, { "epoch": 0.5557586837294333, "grad_norm": 0.0, "learning_rate": 2.6665447897623402e-05, "loss": 0.0, "step": 1520 }, { "epoch": 0.5594149908592322, "grad_norm": 0.0, "learning_rate": 2.6643510054844608e-05, "loss": 0.0, "step": 1530 }, { "epoch": 0.5630712979890311, "grad_norm": 0.0, "learning_rate": 2.6621572212065814e-05, "loss": 0.0, "step": 1540 }, { "epoch": 0.56672760511883, "grad_norm": 0.0, "learning_rate": 2.659963436928702e-05, "loss": 0.0, "step": 1550 }, { "epoch": 0.5703839122486288, "grad_norm": 0.0, "learning_rate": 2.657769652650823e-05, "loss": 0.0, "step": 1560 }, { "epoch": 0.5740402193784278, "grad_norm": 0.0, "learning_rate": 2.6555758683729433e-05, "loss": 0.0, "step": 1570 }, { "epoch": 0.5776965265082267, "grad_norm": 0.0, "learning_rate": 2.653382084095064e-05, "loss": 0.0, "step": 1580 }, { "epoch": 0.5813528336380256, "grad_norm": 0.0, "learning_rate": 2.6511882998171845e-05, "loss": 0.0, "step": 1590 }, { "epoch": 0.5850091407678245, "grad_norm": 0.0, "learning_rate": 2.6489945155393054e-05, "loss": 0.0, "step": 1600 }, { "epoch": 0.5886654478976234, "grad_norm": 0.0, "learning_rate": 2.646800731261426e-05, "loss": 0.0, "step": 1610 }, { "epoch": 0.5923217550274223, "grad_norm": 0.0, "learning_rate": 2.6446069469835467e-05, "loss": 0.0, "step": 1620 }, { "epoch": 0.5959780621572212, "grad_norm": 0.0, "learning_rate": 2.6424131627056673e-05, "loss": 0.0, "step": 1630 }, { "epoch": 0.5996343692870201, "grad_norm": 0.0, "learning_rate": 2.6402193784277882e-05, "loss": 0.0, "step": 1640 }, { "epoch": 0.603290676416819, "grad_norm": 0.0, "learning_rate": 2.638025594149909e-05, "loss": 0.0, "step": 1650 }, { "epoch": 0.6069469835466179, "grad_norm": 0.0, "learning_rate": 2.6358318098720295e-05, "loss": 0.0, "step": 1660 }, { "epoch": 0.6106032906764168, "grad_norm": 0.0, "learning_rate": 2.6336380255941497e-05, "loss": 0.0, "step": 1670 }, { "epoch": 0.6142595978062158, "grad_norm": 0.0, "learning_rate": 2.6314442413162704e-05, "loss": 0.0, "step": 1680 }, { "epoch": 0.6179159049360147, "grad_norm": 0.0, "learning_rate": 2.6292504570383913e-05, "loss": 0.0, "step": 1690 }, { "epoch": 0.6215722120658135, "grad_norm": 0.0, "learning_rate": 2.627056672760512e-05, "loss": 0.0, "step": 1700 }, { "epoch": 0.6252285191956124, "grad_norm": 0.0, "learning_rate": 2.6248628884826325e-05, "loss": 0.0, "step": 1710 }, { "epoch": 0.6288848263254113, "grad_norm": 0.0, "learning_rate": 2.622669104204753e-05, "loss": 0.0, "step": 1720 }, { "epoch": 0.6325411334552102, "grad_norm": 0.0, "learning_rate": 2.620475319926874e-05, "loss": 0.0, "step": 1730 }, { "epoch": 0.6361974405850092, "grad_norm": 0.0, "learning_rate": 2.6182815356489947e-05, "loss": 0.0, "step": 1740 }, { "epoch": 0.6398537477148081, "grad_norm": 0.0, "learning_rate": 2.6160877513711153e-05, "loss": 0.0, "step": 1750 }, { "epoch": 0.643510054844607, "grad_norm": 0.0, "learning_rate": 2.613893967093236e-05, "loss": 0.0, "step": 1760 }, { "epoch": 0.6471663619744058, "grad_norm": 0.0, "learning_rate": 2.6117001828153566e-05, "loss": 0.0, "step": 1770 }, { "epoch": 0.6508226691042047, "grad_norm": 0.0, "learning_rate": 2.6095063985374772e-05, "loss": 0.0, "step": 1780 }, { "epoch": 0.6544789762340036, "grad_norm": 0.0, "learning_rate": 2.6073126142595978e-05, "loss": 0.0, "step": 1790 }, { "epoch": 0.6581352833638026, "grad_norm": 0.0, "learning_rate": 2.6051188299817184e-05, "loss": 0.0, "step": 1800 }, { "epoch": 0.6617915904936015, "grad_norm": 0.0, "learning_rate": 2.602925045703839e-05, "loss": 0.0, "step": 1810 }, { "epoch": 0.6654478976234004, "grad_norm": 0.0, "learning_rate": 2.60073126142596e-05, "loss": 0.0, "step": 1820 }, { "epoch": 0.6691042047531993, "grad_norm": 0.0, "learning_rate": 2.5985374771480806e-05, "loss": 0.0, "step": 1830 }, { "epoch": 0.6727605118829981, "grad_norm": 0.0, "learning_rate": 2.5963436928702012e-05, "loss": 0.0, "step": 1840 }, { "epoch": 0.676416819012797, "grad_norm": 0.0, "learning_rate": 2.5941499085923218e-05, "loss": 0.0, "step": 1850 }, { "epoch": 0.680073126142596, "grad_norm": 0.0, "learning_rate": 2.5919561243144428e-05, "loss": 0.0, "step": 1860 }, { "epoch": 0.6837294332723949, "grad_norm": 0.0, "learning_rate": 2.589762340036563e-05, "loss": 0.0, "step": 1870 }, { "epoch": 0.6873857404021938, "grad_norm": 0.0, "learning_rate": 2.5875685557586837e-05, "loss": 0.0, "step": 1880 }, { "epoch": 0.6910420475319927, "grad_norm": 0.0, "learning_rate": 2.5853747714808043e-05, "loss": 0.0, "step": 1890 }, { "epoch": 0.6946983546617916, "grad_norm": 0.0, "learning_rate": 2.583180987202925e-05, "loss": 0.0, "step": 1900 }, { "epoch": 0.6983546617915904, "grad_norm": 0.0, "learning_rate": 2.580987202925046e-05, "loss": 0.0, "step": 1910 }, { "epoch": 0.7020109689213894, "grad_norm": 0.0, "learning_rate": 2.5787934186471665e-05, "loss": 0.0, "step": 1920 }, { "epoch": 0.7056672760511883, "grad_norm": 0.0, "learning_rate": 2.576599634369287e-05, "loss": 0.0, "step": 1930 }, { "epoch": 0.7093235831809872, "grad_norm": 0.0, "learning_rate": 2.5744058500914077e-05, "loss": 0.0, "step": 1940 }, { "epoch": 0.7129798903107861, "grad_norm": 0.0, "learning_rate": 2.5722120658135286e-05, "loss": 0.0, "step": 1950 }, { "epoch": 0.716636197440585, "grad_norm": 0.0, "learning_rate": 2.5700182815356492e-05, "loss": 0.0, "step": 1960 }, { "epoch": 0.720292504570384, "grad_norm": 0.0, "learning_rate": 2.5678244972577695e-05, "loss": 0.0, "step": 1970 }, { "epoch": 0.7239488117001828, "grad_norm": 0.0, "learning_rate": 2.56563071297989e-05, "loss": 0.0, "step": 1980 }, { "epoch": 0.7276051188299817, "grad_norm": 0.0, "learning_rate": 2.563436928702011e-05, "loss": 0.0, "step": 1990 }, { "epoch": 0.7312614259597806, "grad_norm": 0.0, "learning_rate": 2.5612431444241317e-05, "loss": 0.0, "step": 2000 }, { "epoch": 0.7349177330895795, "grad_norm": 0.0, "learning_rate": 2.5590493601462523e-05, "loss": 0.0, "step": 2010 }, { "epoch": 0.7385740402193784, "grad_norm": 0.0, "learning_rate": 2.556855575868373e-05, "loss": 0.0, "step": 2020 }, { "epoch": 0.7422303473491774, "grad_norm": 0.0, "learning_rate": 2.5546617915904935e-05, "loss": 0.0, "step": 2030 }, { "epoch": 0.7458866544789763, "grad_norm": 0.0, "learning_rate": 2.5524680073126145e-05, "loss": 0.0, "step": 2040 }, { "epoch": 0.7495429616087751, "grad_norm": 0.0, "learning_rate": 2.550274223034735e-05, "loss": 0.0, "step": 2050 }, { "epoch": 0.753199268738574, "grad_norm": 0.0, "learning_rate": 2.5480804387568557e-05, "loss": 0.0, "step": 2060 }, { "epoch": 0.7568555758683729, "grad_norm": 0.0, "learning_rate": 2.545886654478976e-05, "loss": 0.0, "step": 2070 }, { "epoch": 0.7605118829981719, "grad_norm": 0.0, "learning_rate": 2.543692870201097e-05, "loss": 0.0, "step": 2080 }, { "epoch": 0.7641681901279708, "grad_norm": 0.0, "learning_rate": 2.5414990859232176e-05, "loss": 0.0, "step": 2090 }, { "epoch": 0.7678244972577697, "grad_norm": 0.0, "learning_rate": 2.5393053016453382e-05, "loss": 0.0, "step": 2100 }, { "epoch": 0.7714808043875686, "grad_norm": 0.0, "learning_rate": 2.5371115173674588e-05, "loss": 0.0, "step": 2110 }, { "epoch": 0.7751371115173674, "grad_norm": 0.0, "learning_rate": 2.5349177330895798e-05, "loss": 0.0, "step": 2120 }, { "epoch": 0.7787934186471663, "grad_norm": 0.0, "learning_rate": 2.5327239488117004e-05, "loss": 0.0, "step": 2130 }, { "epoch": 0.7824497257769653, "grad_norm": 0.0, "learning_rate": 2.530530164533821e-05, "loss": 0.0, "step": 2140 }, { "epoch": 0.7861060329067642, "grad_norm": 0.0, "learning_rate": 2.5283363802559416e-05, "loss": 0.0, "step": 2150 }, { "epoch": 0.7897623400365631, "grad_norm": 0.0, "learning_rate": 2.5261425959780622e-05, "loss": 0.0, "step": 2160 }, { "epoch": 0.793418647166362, "grad_norm": 0.0, "learning_rate": 2.5239488117001828e-05, "loss": 0.0, "step": 2170 }, { "epoch": 0.7970749542961609, "grad_norm": 0.0, "learning_rate": 2.5217550274223034e-05, "loss": 0.0, "step": 2180 }, { "epoch": 0.8007312614259597, "grad_norm": 0.0, "learning_rate": 2.519561243144424e-05, "loss": 0.0, "step": 2190 }, { "epoch": 0.8043875685557587, "grad_norm": 0.0, "learning_rate": 2.5173674588665447e-05, "loss": 0.0, "step": 2200 }, { "epoch": 0.8080438756855576, "grad_norm": 0.0, "learning_rate": 2.5151736745886656e-05, "loss": 0.0, "step": 2210 }, { "epoch": 0.8117001828153565, "grad_norm": 0.0, "learning_rate": 2.5129798903107862e-05, "loss": 0.0, "step": 2220 }, { "epoch": 0.8153564899451554, "grad_norm": 0.0, "learning_rate": 2.510786106032907e-05, "loss": 0.0, "step": 2230 }, { "epoch": 0.8190127970749543, "grad_norm": 0.0, "learning_rate": 2.5085923217550275e-05, "loss": 0.0, "step": 2240 }, { "epoch": 0.8226691042047533, "grad_norm": 0.0, "learning_rate": 2.506398537477148e-05, "loss": 0.0, "step": 2250 }, { "epoch": 0.8263254113345521, "grad_norm": 0.0, "learning_rate": 2.504204753199269e-05, "loss": 0.0, "step": 2260 }, { "epoch": 0.829981718464351, "grad_norm": 0.0, "learning_rate": 2.5020109689213896e-05, "loss": 0.0, "step": 2270 }, { "epoch": 0.8336380255941499, "grad_norm": 0.0, "learning_rate": 2.49981718464351e-05, "loss": 0.0, "step": 2280 }, { "epoch": 0.8372943327239488, "grad_norm": 0.0, "learning_rate": 2.4976234003656305e-05, "loss": 0.0, "step": 2290 }, { "epoch": 0.8409506398537477, "grad_norm": 0.0, "learning_rate": 2.4954296160877515e-05, "loss": 0.0, "step": 2300 }, { "epoch": 0.8446069469835467, "grad_norm": 0.0, "learning_rate": 2.493235831809872e-05, "loss": 0.0, "step": 2310 }, { "epoch": 0.8482632541133455, "grad_norm": 0.0, "learning_rate": 2.4910420475319927e-05, "loss": 0.0, "step": 2320 }, { "epoch": 0.8519195612431444, "grad_norm": 0.0, "learning_rate": 2.4888482632541133e-05, "loss": 0.0, "step": 2330 }, { "epoch": 0.8555758683729433, "grad_norm": 0.0, "learning_rate": 2.4866544789762343e-05, "loss": 0.0, "step": 2340 }, { "epoch": 0.8592321755027422, "grad_norm": 0.0, "learning_rate": 2.484460694698355e-05, "loss": 0.0, "step": 2350 }, { "epoch": 0.8628884826325411, "grad_norm": 0.0, "learning_rate": 2.4822669104204755e-05, "loss": 0.0, "step": 2360 }, { "epoch": 0.8665447897623401, "grad_norm": 0.0, "learning_rate": 2.480073126142596e-05, "loss": 0.0, "step": 2370 }, { "epoch": 0.870201096892139, "grad_norm": 0.0, "learning_rate": 2.4778793418647164e-05, "loss": 0.0, "step": 2380 }, { "epoch": 0.8738574040219378, "grad_norm": 0.0, "learning_rate": 2.4756855575868373e-05, "loss": 0.0, "step": 2390 }, { "epoch": 0.8775137111517367, "grad_norm": 0.0, "learning_rate": 2.473491773308958e-05, "loss": 0.0, "step": 2400 }, { "epoch": 0.8811700182815356, "grad_norm": 0.0, "learning_rate": 2.4712979890310786e-05, "loss": 0.0, "step": 2410 }, { "epoch": 0.8848263254113345, "grad_norm": 0.0, "learning_rate": 2.4691042047531992e-05, "loss": 0.0, "step": 2420 }, { "epoch": 0.8884826325411335, "grad_norm": 0.0, "learning_rate": 2.46691042047532e-05, "loss": 0.0, "step": 2430 }, { "epoch": 0.8921389396709324, "grad_norm": 0.0, "learning_rate": 2.4647166361974408e-05, "loss": 0.0, "step": 2440 }, { "epoch": 0.8957952468007313, "grad_norm": 0.0, "learning_rate": 2.4625228519195614e-05, "loss": 0.0, "step": 2450 }, { "epoch": 0.8994515539305301, "grad_norm": 0.0, "learning_rate": 2.460329067641682e-05, "loss": 0.0, "step": 2460 }, { "epoch": 0.903107861060329, "grad_norm": 0.0, "learning_rate": 2.458135283363803e-05, "loss": 0.0, "step": 2470 }, { "epoch": 0.906764168190128, "grad_norm": 0.0, "learning_rate": 2.4559414990859232e-05, "loss": 0.0, "step": 2480 }, { "epoch": 0.9104204753199269, "grad_norm": 0.0, "learning_rate": 2.4537477148080438e-05, "loss": 0.0, "step": 2490 }, { "epoch": 0.9140767824497258, "grad_norm": 0.0, "learning_rate": 2.4515539305301644e-05, "loss": 0.0, "step": 2500 }, { "epoch": 0.9177330895795247, "grad_norm": 0.0, "learning_rate": 2.449360146252285e-05, "loss": 0.0, "step": 2510 }, { "epoch": 0.9213893967093236, "grad_norm": 0.0, "learning_rate": 2.447166361974406e-05, "loss": 0.0, "step": 2520 }, { "epoch": 0.9250457038391224, "grad_norm": 0.0, "learning_rate": 2.4449725776965266e-05, "loss": 0.0, "step": 2530 }, { "epoch": 0.9287020109689214, "grad_norm": 0.0, "learning_rate": 2.4427787934186472e-05, "loss": 0.0, "step": 2540 }, { "epoch": 0.9323583180987203, "grad_norm": 0.0, "learning_rate": 2.440585009140768e-05, "loss": 0.0, "step": 2550 }, { "epoch": 0.9360146252285192, "grad_norm": 0.0, "learning_rate": 2.4383912248628888e-05, "loss": 0.0, "step": 2560 }, { "epoch": 0.9396709323583181, "grad_norm": 0.0, "learning_rate": 2.4361974405850094e-05, "loss": 0.0, "step": 2570 }, { "epoch": 0.943327239488117, "grad_norm": 0.0, "learning_rate": 2.4340036563071297e-05, "loss": 0.0, "step": 2580 }, { "epoch": 0.946983546617916, "grad_norm": 0.0, "learning_rate": 2.4318098720292503e-05, "loss": 0.0, "step": 2590 }, { "epoch": 0.9506398537477148, "grad_norm": 0.0, "learning_rate": 2.4296160877513713e-05, "loss": 0.0, "step": 2600 }, { "epoch": 0.9542961608775137, "grad_norm": 0.0, "learning_rate": 2.427422303473492e-05, "loss": 0.0, "step": 2610 }, { "epoch": 0.9579524680073126, "grad_norm": 0.0, "learning_rate": 2.4252285191956125e-05, "loss": 0.0, "step": 2620 }, { "epoch": 0.9616087751371115, "grad_norm": 0.0, "learning_rate": 2.423034734917733e-05, "loss": 0.0, "step": 2630 }, { "epoch": 0.9652650822669104, "grad_norm": 0.0, "learning_rate": 2.4208409506398537e-05, "loss": 0.0, "step": 2640 }, { "epoch": 0.9689213893967094, "grad_norm": 0.0, "learning_rate": 2.4186471663619747e-05, "loss": 0.0, "step": 2650 }, { "epoch": 0.9725776965265083, "grad_norm": 0.0, "learning_rate": 2.4164533820840953e-05, "loss": 0.0, "step": 2660 }, { "epoch": 0.9762340036563071, "grad_norm": 0.0, "learning_rate": 2.414259597806216e-05, "loss": 0.0, "step": 2670 }, { "epoch": 0.979890310786106, "grad_norm": 0.0, "learning_rate": 2.4120658135283362e-05, "loss": 0.0, "step": 2680 }, { "epoch": 0.9835466179159049, "grad_norm": 0.0, "learning_rate": 2.409872029250457e-05, "loss": 0.0, "step": 2690 }, { "epoch": 0.9872029250457038, "grad_norm": 0.0, "learning_rate": 2.4076782449725777e-05, "loss": 0.0, "step": 2700 }, { "epoch": 0.9908592321755028, "grad_norm": 0.0, "learning_rate": 2.4054844606946984e-05, "loss": 0.0, "step": 2710 }, { "epoch": 0.9945155393053017, "grad_norm": 0.0, "learning_rate": 2.403290676416819e-05, "loss": 0.0, "step": 2720 }, { "epoch": 0.9981718464351006, "grad_norm": 0.0, "learning_rate": 2.4010968921389396e-05, "loss": 0.0, "step": 2730 }, { "epoch": 1.0018281535648994, "grad_norm": 0.0, "learning_rate": 2.3989031078610605e-05, "loss": 0.0, "step": 2740 }, { "epoch": 1.0054844606946984, "grad_norm": 0.0, "learning_rate": 2.396709323583181e-05, "loss": 0.0, "step": 2750 }, { "epoch": 1.0091407678244972, "grad_norm": 0.0, "learning_rate": 2.3945155393053018e-05, "loss": 0.0, "step": 2760 }, { "epoch": 1.012797074954296, "grad_norm": 0.0, "learning_rate": 2.3923217550274224e-05, "loss": 0.0, "step": 2770 }, { "epoch": 1.016453382084095, "grad_norm": 0.0, "learning_rate": 2.390127970749543e-05, "loss": 0.0, "step": 2780 }, { "epoch": 1.0201096892138939, "grad_norm": 0.0, "learning_rate": 2.3879341864716636e-05, "loss": 0.0, "step": 2790 }, { "epoch": 1.023765996343693, "grad_norm": 0.0, "learning_rate": 2.3857404021937842e-05, "loss": 0.0, "step": 2800 }, { "epoch": 1.0274223034734917, "grad_norm": 0.0, "learning_rate": 2.383546617915905e-05, "loss": 0.0, "step": 2810 }, { "epoch": 1.0310786106032908, "grad_norm": 0.0, "learning_rate": 2.3813528336380258e-05, "loss": 0.0, "step": 2820 }, { "epoch": 1.0347349177330896, "grad_norm": 0.0, "learning_rate": 2.3791590493601464e-05, "loss": 0.0, "step": 2830 }, { "epoch": 1.0383912248628886, "grad_norm": 0.0, "learning_rate": 2.376965265082267e-05, "loss": 0.0, "step": 2840 }, { "epoch": 1.0420475319926874, "grad_norm": 0.0, "learning_rate": 2.3747714808043876e-05, "loss": 0.0, "step": 2850 }, { "epoch": 1.0457038391224862, "grad_norm": 0.0, "learning_rate": 2.3725776965265082e-05, "loss": 0.0, "step": 2860 }, { "epoch": 1.0493601462522852, "grad_norm": 0.0, "learning_rate": 2.3703839122486292e-05, "loss": 0.0, "step": 2870 }, { "epoch": 1.053016453382084, "grad_norm": 0.0, "learning_rate": 2.3681901279707495e-05, "loss": 0.0, "step": 2880 }, { "epoch": 1.056672760511883, "grad_norm": 0.0, "learning_rate": 2.36599634369287e-05, "loss": 0.0, "step": 2890 }, { "epoch": 1.0603290676416819, "grad_norm": 0.0, "learning_rate": 2.3638025594149907e-05, "loss": 0.0, "step": 2900 }, { "epoch": 1.0639853747714807, "grad_norm": 0.0, "learning_rate": 2.3616087751371117e-05, "loss": 0.0, "step": 2910 }, { "epoch": 1.0676416819012797, "grad_norm": 0.0, "learning_rate": 2.3594149908592323e-05, "loss": 0.0, "step": 2920 }, { "epoch": 1.0712979890310785, "grad_norm": 0.0, "learning_rate": 2.357221206581353e-05, "loss": 0.0, "step": 2930 }, { "epoch": 1.0749542961608776, "grad_norm": 0.0, "learning_rate": 2.3550274223034735e-05, "loss": 0.0, "step": 2940 }, { "epoch": 1.0786106032906764, "grad_norm": 0.0, "learning_rate": 2.3528336380255944e-05, "loss": 0.0, "step": 2950 }, { "epoch": 1.0822669104204754, "grad_norm": 0.0, "learning_rate": 2.350639853747715e-05, "loss": 0.0, "step": 2960 }, { "epoch": 1.0859232175502742, "grad_norm": 0.0, "learning_rate": 2.3484460694698357e-05, "loss": 0.0, "step": 2970 }, { "epoch": 1.0895795246800732, "grad_norm": 0.0, "learning_rate": 2.346252285191956e-05, "loss": 0.0, "step": 2980 }, { "epoch": 1.093235831809872, "grad_norm": 0.0, "learning_rate": 2.3440585009140766e-05, "loss": 0.0, "step": 2990 }, { "epoch": 1.0968921389396709, "grad_norm": 0.0, "learning_rate": 2.3418647166361975e-05, "loss": 0.0, "step": 3000 }, { "epoch": 1.1005484460694699, "grad_norm": 0.0, "learning_rate": 2.339670932358318e-05, "loss": 0.0, "step": 3010 }, { "epoch": 1.1042047531992687, "grad_norm": 0.0, "learning_rate": 2.3374771480804387e-05, "loss": 0.0, "step": 3020 }, { "epoch": 1.1078610603290677, "grad_norm": 0.0, "learning_rate": 2.3352833638025594e-05, "loss": 0.0, "step": 3030 }, { "epoch": 1.1115173674588665, "grad_norm": 0.0, "learning_rate": 2.3330895795246803e-05, "loss": 0.0, "step": 3040 }, { "epoch": 1.1151736745886653, "grad_norm": 0.0, "learning_rate": 2.330895795246801e-05, "loss": 0.0, "step": 3050 }, { "epoch": 1.1188299817184644, "grad_norm": 0.0, "learning_rate": 2.3287020109689215e-05, "loss": 0.0, "step": 3060 }, { "epoch": 1.1224862888482632, "grad_norm": 0.0, "learning_rate": 2.326508226691042e-05, "loss": 0.0, "step": 3070 }, { "epoch": 1.1261425959780622, "grad_norm": 0.0, "learning_rate": 2.3243144424131624e-05, "loss": 0.0, "step": 3080 }, { "epoch": 1.129798903107861, "grad_norm": 0.0, "learning_rate": 2.3221206581352834e-05, "loss": 0.0, "step": 3090 }, { "epoch": 1.13345521023766, "grad_norm": 0.0, "learning_rate": 2.319926873857404e-05, "loss": 0.0, "step": 3100 }, { "epoch": 1.1371115173674589, "grad_norm": 0.0, "learning_rate": 2.3177330895795246e-05, "loss": 0.0, "step": 3110 }, { "epoch": 1.1407678244972579, "grad_norm": 0.0, "learning_rate": 2.3155393053016452e-05, "loss": 0.0, "step": 3120 }, { "epoch": 1.1444241316270567, "grad_norm": 0.0, "learning_rate": 2.3133455210237662e-05, "loss": 0.0, "step": 3130 }, { "epoch": 1.1480804387568555, "grad_norm": 0.0, "learning_rate": 2.3111517367458868e-05, "loss": 0.0, "step": 3140 }, { "epoch": 1.1517367458866545, "grad_norm": 0.0, "learning_rate": 2.3089579524680074e-05, "loss": 0.0, "step": 3150 }, { "epoch": 1.1553930530164533, "grad_norm": 0.0, "learning_rate": 2.306764168190128e-05, "loss": 0.0, "step": 3160 }, { "epoch": 1.1590493601462524, "grad_norm": 0.0, "learning_rate": 2.304570383912249e-05, "loss": 0.0, "step": 3170 }, { "epoch": 1.1627056672760512, "grad_norm": 0.0, "learning_rate": 2.3023765996343693e-05, "loss": 0.0, "step": 3180 }, { "epoch": 1.16636197440585, "grad_norm": 0.0, "learning_rate": 2.30018281535649e-05, "loss": 0.0, "step": 3190 }, { "epoch": 1.170018281535649, "grad_norm": 0.0, "learning_rate": 2.2979890310786105e-05, "loss": 0.0, "step": 3200 }, { "epoch": 1.1736745886654478, "grad_norm": 0.0, "learning_rate": 2.295795246800731e-05, "loss": 0.0, "step": 3210 }, { "epoch": 1.1773308957952469, "grad_norm": 0.0, "learning_rate": 2.293601462522852e-05, "loss": 0.0, "step": 3220 }, { "epoch": 1.1809872029250457, "grad_norm": 0.0, "learning_rate": 2.2914076782449727e-05, "loss": 0.0, "step": 3230 }, { "epoch": 1.1846435100548447, "grad_norm": 0.0, "learning_rate": 2.2892138939670933e-05, "loss": 0.0, "step": 3240 }, { "epoch": 1.1882998171846435, "grad_norm": 0.0, "learning_rate": 2.287020109689214e-05, "loss": 0.0, "step": 3250 }, { "epoch": 1.1919561243144425, "grad_norm": 0.0, "learning_rate": 2.284826325411335e-05, "loss": 0.0, "step": 3260 }, { "epoch": 1.1956124314442413, "grad_norm": 0.0, "learning_rate": 2.2826325411334555e-05, "loss": 0.0, "step": 3270 }, { "epoch": 1.1992687385740401, "grad_norm": 0.0, "learning_rate": 2.280438756855576e-05, "loss": 0.0, "step": 3280 }, { "epoch": 1.2029250457038392, "grad_norm": 0.0, "learning_rate": 2.2782449725776963e-05, "loss": 0.0, "step": 3290 }, { "epoch": 1.206581352833638, "grad_norm": 0.0, "learning_rate": 2.2760511882998173e-05, "loss": 0.0, "step": 3300 }, { "epoch": 1.210237659963437, "grad_norm": 0.0, "learning_rate": 2.273857404021938e-05, "loss": 0.0, "step": 3310 }, { "epoch": 1.2138939670932358, "grad_norm": 0.0, "learning_rate": 2.2716636197440585e-05, "loss": 0.0, "step": 3320 }, { "epoch": 1.2175502742230346, "grad_norm": 0.0, "learning_rate": 2.269469835466179e-05, "loss": 0.0, "step": 3330 }, { "epoch": 1.2212065813528337, "grad_norm": 0.0, "learning_rate": 2.2672760511882998e-05, "loss": 0.0, "step": 3340 }, { "epoch": 1.2248628884826325, "grad_norm": 0.0, "learning_rate": 2.2650822669104207e-05, "loss": 0.0, "step": 3350 }, { "epoch": 1.2285191956124315, "grad_norm": 0.0, "learning_rate": 2.2628884826325413e-05, "loss": 0.0, "step": 3360 }, { "epoch": 1.2321755027422303, "grad_norm": 0.0, "learning_rate": 2.260694698354662e-05, "loss": 0.0, "step": 3370 }, { "epoch": 1.2358318098720293, "grad_norm": 0.0, "learning_rate": 2.2585009140767826e-05, "loss": 0.0, "step": 3380 }, { "epoch": 1.2394881170018281, "grad_norm": 0.0, "learning_rate": 2.256307129798903e-05, "loss": 0.0, "step": 3390 }, { "epoch": 1.2431444241316272, "grad_norm": 0.0, "learning_rate": 2.2541133455210238e-05, "loss": 0.0, "step": 3400 }, { "epoch": 1.246800731261426, "grad_norm": 0.0, "learning_rate": 2.2519195612431444e-05, "loss": 0.0, "step": 3410 }, { "epoch": 1.2504570383912248, "grad_norm": 0.0, "learning_rate": 2.249725776965265e-05, "loss": 0.0, "step": 3420 }, { "epoch": 1.2541133455210238, "grad_norm": 0.0, "learning_rate": 2.247531992687386e-05, "loss": 0.0, "step": 3430 }, { "epoch": 1.2577696526508226, "grad_norm": 0.0, "learning_rate": 2.2453382084095066e-05, "loss": 0.0, "step": 3440 }, { "epoch": 1.2614259597806217, "grad_norm": 0.0, "learning_rate": 2.2431444241316272e-05, "loss": 0.0, "step": 3450 }, { "epoch": 1.2650822669104205, "grad_norm": 0.0, "learning_rate": 2.2409506398537478e-05, "loss": 0.0, "step": 3460 }, { "epoch": 1.2687385740402193, "grad_norm": 0.0, "learning_rate": 2.2387568555758684e-05, "loss": 0.0, "step": 3470 }, { "epoch": 1.2723948811700183, "grad_norm": 0.0, "learning_rate": 2.2365630712979894e-05, "loss": 0.0, "step": 3480 }, { "epoch": 1.2760511882998171, "grad_norm": 0.0, "learning_rate": 2.2343692870201096e-05, "loss": 0.0, "step": 3490 }, { "epoch": 1.2797074954296161, "grad_norm": 0.0, "learning_rate": 2.2321755027422303e-05, "loss": 0.0, "step": 3500 }, { "epoch": 1.283363802559415, "grad_norm": 0.0, "learning_rate": 2.229981718464351e-05, "loss": 0.0, "step": 3510 }, { "epoch": 1.2870201096892138, "grad_norm": 0.0, "learning_rate": 2.2277879341864718e-05, "loss": 0.0, "step": 3520 }, { "epoch": 1.2906764168190128, "grad_norm": 0.0, "learning_rate": 2.2255941499085924e-05, "loss": 0.0, "step": 3530 }, { "epoch": 1.2943327239488118, "grad_norm": 0.0, "learning_rate": 2.223400365630713e-05, "loss": 0.0, "step": 3540 }, { "epoch": 1.2979890310786106, "grad_norm": 0.0, "learning_rate": 2.2212065813528337e-05, "loss": 0.0, "step": 3550 }, { "epoch": 1.3016453382084094, "grad_norm": 0.0, "learning_rate": 2.2190127970749543e-05, "loss": 0.0, "step": 3560 }, { "epoch": 1.3053016453382085, "grad_norm": 0.0, "learning_rate": 2.2168190127970752e-05, "loss": 0.0, "step": 3570 }, { "epoch": 1.3089579524680073, "grad_norm": 0.0, "learning_rate": 2.214625228519196e-05, "loss": 0.0, "step": 3580 }, { "epoch": 1.3126142595978063, "grad_norm": 0.0, "learning_rate": 2.212431444241316e-05, "loss": 0.0, "step": 3590 }, { "epoch": 1.3162705667276051, "grad_norm": 0.0, "learning_rate": 2.2102376599634367e-05, "loss": 0.0, "step": 3600 }, { "epoch": 1.319926873857404, "grad_norm": 0.0, "learning_rate": 2.2080438756855577e-05, "loss": 0.0, "step": 3610 }, { "epoch": 1.323583180987203, "grad_norm": 0.0, "learning_rate": 2.2058500914076783e-05, "loss": 0.0, "step": 3620 }, { "epoch": 1.3272394881170018, "grad_norm": 0.0, "learning_rate": 2.203656307129799e-05, "loss": 0.0, "step": 3630 }, { "epoch": 1.3308957952468008, "grad_norm": 0.0, "learning_rate": 2.2014625228519195e-05, "loss": 0.0, "step": 3640 }, { "epoch": 1.3345521023765996, "grad_norm": 0.0, "learning_rate": 2.1992687385740405e-05, "loss": 0.0, "step": 3650 }, { "epoch": 1.3382084095063984, "grad_norm": 0.0, "learning_rate": 2.197074954296161e-05, "loss": 0.0, "step": 3660 }, { "epoch": 1.3418647166361974, "grad_norm": 0.0, "learning_rate": 2.1948811700182817e-05, "loss": 0.0, "step": 3670 }, { "epoch": 1.3455210237659965, "grad_norm": 0.0, "learning_rate": 2.1926873857404023e-05, "loss": 0.0, "step": 3680 }, { "epoch": 1.3491773308957953, "grad_norm": 0.0, "learning_rate": 2.1904936014625226e-05, "loss": 0.0, "step": 3690 }, { "epoch": 1.352833638025594, "grad_norm": 0.0, "learning_rate": 2.1882998171846436e-05, "loss": 0.0, "step": 3700 }, { "epoch": 1.3564899451553931, "grad_norm": 0.0, "learning_rate": 2.1861060329067642e-05, "loss": 0.0, "step": 3710 }, { "epoch": 1.360146252285192, "grad_norm": 0.0, "learning_rate": 2.1839122486288848e-05, "loss": 0.0, "step": 3720 }, { "epoch": 1.363802559414991, "grad_norm": 0.0, "learning_rate": 2.1817184643510054e-05, "loss": 0.0, "step": 3730 }, { "epoch": 1.3674588665447898, "grad_norm": 0.0, "learning_rate": 2.1795246800731264e-05, "loss": 0.0, "step": 3740 }, { "epoch": 1.3711151736745886, "grad_norm": 0.0, "learning_rate": 2.177330895795247e-05, "loss": 0.0, "step": 3750 }, { "epoch": 1.3747714808043876, "grad_norm": 0.0, "learning_rate": 2.1751371115173676e-05, "loss": 0.0, "step": 3760 }, { "epoch": 1.3784277879341864, "grad_norm": 0.0, "learning_rate": 2.1729433272394882e-05, "loss": 0.0, "step": 3770 }, { "epoch": 1.3820840950639854, "grad_norm": 0.0, "learning_rate": 2.170749542961609e-05, "loss": 0.0, "step": 3780 }, { "epoch": 1.3857404021937842, "grad_norm": 0.0, "learning_rate": 2.1685557586837294e-05, "loss": 0.0, "step": 3790 }, { "epoch": 1.389396709323583, "grad_norm": 0.0, "learning_rate": 2.16636197440585e-05, "loss": 0.0, "step": 3800 }, { "epoch": 1.393053016453382, "grad_norm": 0.0, "learning_rate": 2.1641681901279707e-05, "loss": 0.0, "step": 3810 }, { "epoch": 1.3967093235831811, "grad_norm": 0.0, "learning_rate": 2.1619744058500913e-05, "loss": 0.0, "step": 3820 }, { "epoch": 1.40036563071298, "grad_norm": 0.0, "learning_rate": 2.1597806215722122e-05, "loss": 0.0, "step": 3830 }, { "epoch": 1.4040219378427787, "grad_norm": 0.0, "learning_rate": 2.157586837294333e-05, "loss": 0.0, "step": 3840 }, { "epoch": 1.4076782449725778, "grad_norm": 0.0, "learning_rate": 2.1553930530164534e-05, "loss": 0.0, "step": 3850 }, { "epoch": 1.4113345521023766, "grad_norm": 0.0, "learning_rate": 2.153199268738574e-05, "loss": 0.0, "step": 3860 }, { "epoch": 1.4149908592321756, "grad_norm": 0.0, "learning_rate": 2.151005484460695e-05, "loss": 0.0, "step": 3870 }, { "epoch": 1.4186471663619744, "grad_norm": 0.0, "learning_rate": 2.1488117001828156e-05, "loss": 0.0, "step": 3880 }, { "epoch": 1.4223034734917732, "grad_norm": 0.0, "learning_rate": 2.146617915904936e-05, "loss": 0.0, "step": 3890 }, { "epoch": 1.4259597806215722, "grad_norm": 0.0, "learning_rate": 2.1444241316270565e-05, "loss": 0.0, "step": 3900 }, { "epoch": 1.429616087751371, "grad_norm": 0.0, "learning_rate": 2.142230347349177e-05, "loss": 0.0, "step": 3910 }, { "epoch": 1.43327239488117, "grad_norm": 0.0, "learning_rate": 2.140036563071298e-05, "loss": 0.0, "step": 3920 }, { "epoch": 1.436928702010969, "grad_norm": 0.0, "learning_rate": 2.1378427787934187e-05, "loss": 0.0, "step": 3930 }, { "epoch": 1.4405850091407677, "grad_norm": 0.0, "learning_rate": 2.1356489945155393e-05, "loss": 0.0, "step": 3940 }, { "epoch": 1.4442413162705667, "grad_norm": 0.0, "learning_rate": 2.13345521023766e-05, "loss": 0.0, "step": 3950 }, { "epoch": 1.4478976234003658, "grad_norm": 0.0, "learning_rate": 2.131261425959781e-05, "loss": 0.0, "step": 3960 }, { "epoch": 1.4515539305301646, "grad_norm": 0.0, "learning_rate": 2.1290676416819015e-05, "loss": 0.0, "step": 3970 }, { "epoch": 1.4552102376599634, "grad_norm": 0.0, "learning_rate": 2.126873857404022e-05, "loss": 0.0, "step": 3980 }, { "epoch": 1.4588665447897624, "grad_norm": 0.0, "learning_rate": 2.1246800731261424e-05, "loss": 0.0, "step": 3990 }, { "epoch": 1.4625228519195612, "grad_norm": 0.0, "learning_rate": 2.1224862888482633e-05, "loss": 0.0, "step": 4000 }, { "epoch": 1.4661791590493602, "grad_norm": 0.0, "learning_rate": 2.120292504570384e-05, "loss": 0.0, "step": 4010 }, { "epoch": 1.469835466179159, "grad_norm": 0.0, "learning_rate": 2.1180987202925046e-05, "loss": 0.0, "step": 4020 }, { "epoch": 1.4734917733089579, "grad_norm": 0.0, "learning_rate": 2.1159049360146252e-05, "loss": 0.0, "step": 4030 }, { "epoch": 1.477148080438757, "grad_norm": 0.0, "learning_rate": 2.1137111517367458e-05, "loss": 0.0, "step": 4040 }, { "epoch": 1.4808043875685557, "grad_norm": 0.0, "learning_rate": 2.1115173674588667e-05, "loss": 0.0, "step": 4050 }, { "epoch": 1.4844606946983547, "grad_norm": 0.0, "learning_rate": 2.1093235831809874e-05, "loss": 0.0, "step": 4060 }, { "epoch": 1.4881170018281535, "grad_norm": 0.0, "learning_rate": 2.107129798903108e-05, "loss": 0.0, "step": 4070 }, { "epoch": 1.4917733089579523, "grad_norm": 0.0, "learning_rate": 2.1049360146252286e-05, "loss": 0.0, "step": 4080 }, { "epoch": 1.4954296160877514, "grad_norm": 0.0, "learning_rate": 2.1027422303473492e-05, "loss": 0.0, "step": 4090 }, { "epoch": 1.4990859232175504, "grad_norm": 0.0, "learning_rate": 2.1005484460694698e-05, "loss": 0.0, "step": 4100 }, { "epoch": 1.5027422303473492, "grad_norm": 0.0, "learning_rate": 2.0983546617915904e-05, "loss": 0.0, "step": 4110 }, { "epoch": 1.506398537477148, "grad_norm": 0.0, "learning_rate": 2.096160877513711e-05, "loss": 0.0, "step": 4120 }, { "epoch": 1.5100548446069468, "grad_norm": 0.0, "learning_rate": 2.093967093235832e-05, "loss": 0.0, "step": 4130 }, { "epoch": 1.5137111517367459, "grad_norm": 0.0, "learning_rate": 2.0917733089579526e-05, "loss": 0.0, "step": 4140 }, { "epoch": 1.517367458866545, "grad_norm": 0.0, "learning_rate": 2.0895795246800732e-05, "loss": 0.0, "step": 4150 }, { "epoch": 1.5210237659963437, "grad_norm": 0.0, "learning_rate": 2.087385740402194e-05, "loss": 0.0, "step": 4160 }, { "epoch": 1.5246800731261425, "grad_norm": 0.0, "learning_rate": 2.0851919561243145e-05, "loss": 0.0, "step": 4170 }, { "epoch": 1.5283363802559415, "grad_norm": 0.0, "learning_rate": 2.0829981718464354e-05, "loss": 0.0, "step": 4180 }, { "epoch": 1.5319926873857403, "grad_norm": 0.0, "learning_rate": 2.0808043875685557e-05, "loss": 0.0, "step": 4190 }, { "epoch": 1.5356489945155394, "grad_norm": 0.0, "learning_rate": 2.0786106032906763e-05, "loss": 0.0, "step": 4200 }, { "epoch": 1.5393053016453382, "grad_norm": 0.0, "learning_rate": 2.076416819012797e-05, "loss": 0.0, "step": 4210 }, { "epoch": 1.542961608775137, "grad_norm": 0.0, "learning_rate": 2.074223034734918e-05, "loss": 0.0, "step": 4220 }, { "epoch": 1.546617915904936, "grad_norm": 0.0, "learning_rate": 2.0720292504570385e-05, "loss": 0.0, "step": 4230 }, { "epoch": 1.550274223034735, "grad_norm": 0.0, "learning_rate": 2.069835466179159e-05, "loss": 0.0, "step": 4240 }, { "epoch": 1.5539305301645339, "grad_norm": 0.0, "learning_rate": 2.0676416819012797e-05, "loss": 0.0, "step": 4250 }, { "epoch": 1.5575868372943327, "grad_norm": 0.0, "learning_rate": 2.0654478976234007e-05, "loss": 0.0, "step": 4260 }, { "epoch": 1.5612431444241315, "grad_norm": 0.0, "learning_rate": 2.0632541133455213e-05, "loss": 0.0, "step": 4270 }, { "epoch": 1.5648994515539305, "grad_norm": 0.0, "learning_rate": 2.061060329067642e-05, "loss": 0.0, "step": 4280 }, { "epoch": 1.5685557586837295, "grad_norm": 0.0, "learning_rate": 2.058866544789762e-05, "loss": 0.0, "step": 4290 }, { "epoch": 1.5722120658135283, "grad_norm": 0.0, "learning_rate": 2.0566727605118828e-05, "loss": 0.0, "step": 4300 }, { "epoch": 1.5758683729433272, "grad_norm": 0.0, "learning_rate": 2.0544789762340037e-05, "loss": 0.0, "step": 4310 }, { "epoch": 1.5795246800731262, "grad_norm": 0.0, "learning_rate": 2.0522851919561243e-05, "loss": 0.0, "step": 4320 }, { "epoch": 1.583180987202925, "grad_norm": 0.0, "learning_rate": 2.050091407678245e-05, "loss": 0.0, "step": 4330 }, { "epoch": 1.586837294332724, "grad_norm": 0.0, "learning_rate": 2.0478976234003656e-05, "loss": 0.0, "step": 4340 }, { "epoch": 1.5904936014625228, "grad_norm": 0.0, "learning_rate": 2.0457038391224865e-05, "loss": 0.0, "step": 4350 }, { "epoch": 1.5941499085923216, "grad_norm": 0.0, "learning_rate": 2.043510054844607e-05, "loss": 0.0, "step": 4360 }, { "epoch": 1.5978062157221207, "grad_norm": 0.0, "learning_rate": 2.0413162705667278e-05, "loss": 0.0, "step": 4370 }, { "epoch": 1.6014625228519197, "grad_norm": 0.0, "learning_rate": 2.0391224862888484e-05, "loss": 0.0, "step": 4380 }, { "epoch": 1.6051188299817185, "grad_norm": 0.0, "learning_rate": 2.036928702010969e-05, "loss": 0.0, "step": 4390 }, { "epoch": 1.6087751371115173, "grad_norm": 0.0, "learning_rate": 2.0347349177330896e-05, "loss": 0.0, "step": 4400 }, { "epoch": 1.6124314442413161, "grad_norm": 0.0, "learning_rate": 2.0325411334552102e-05, "loss": 0.0, "step": 4410 }, { "epoch": 1.6160877513711152, "grad_norm": 0.0, "learning_rate": 2.0303473491773308e-05, "loss": 0.0, "step": 4420 }, { "epoch": 1.6197440585009142, "grad_norm": 0.0, "learning_rate": 2.0281535648994514e-05, "loss": 0.0, "step": 4430 }, { "epoch": 1.623400365630713, "grad_norm": 0.0, "learning_rate": 2.0259597806215724e-05, "loss": 0.0, "step": 4440 }, { "epoch": 1.6270566727605118, "grad_norm": 0.0, "learning_rate": 2.023765996343693e-05, "loss": 0.0, "step": 4450 }, { "epoch": 1.6307129798903108, "grad_norm": 0.0, "learning_rate": 2.0215722120658136e-05, "loss": 0.0, "step": 4460 }, { "epoch": 1.6343692870201096, "grad_norm": 0.0, "learning_rate": 2.0193784277879342e-05, "loss": 0.0, "step": 4470 }, { "epoch": 1.6380255941499087, "grad_norm": 0.0, "learning_rate": 2.0171846435100552e-05, "loss": 0.0, "step": 4480 }, { "epoch": 1.6416819012797075, "grad_norm": 0.0, "learning_rate": 2.0149908592321758e-05, "loss": 0.0, "step": 4490 }, { "epoch": 1.6453382084095063, "grad_norm": 0.0, "learning_rate": 2.012797074954296e-05, "loss": 0.0, "step": 4500 }, { "epoch": 1.6489945155393053, "grad_norm": 0.0, "learning_rate": 2.0106032906764167e-05, "loss": 0.0, "step": 4510 }, { "epoch": 1.6526508226691043, "grad_norm": 0.0, "learning_rate": 2.0084095063985373e-05, "loss": 0.0, "step": 4520 }, { "epoch": 1.6563071297989032, "grad_norm": 0.0, "learning_rate": 2.0062157221206583e-05, "loss": 0.0, "step": 4530 }, { "epoch": 1.659963436928702, "grad_norm": 0.0, "learning_rate": 2.004021937842779e-05, "loss": 0.0, "step": 4540 }, { "epoch": 1.6636197440585008, "grad_norm": 0.0, "learning_rate": 2.0018281535648995e-05, "loss": 0.0, "step": 4550 }, { "epoch": 1.6672760511882998, "grad_norm": 0.0, "learning_rate": 1.99963436928702e-05, "loss": 0.0, "step": 4560 }, { "epoch": 1.6709323583180988, "grad_norm": 0.0, "learning_rate": 1.997440585009141e-05, "loss": 0.0, "step": 4570 }, { "epoch": 1.6745886654478976, "grad_norm": 0.0, "learning_rate": 1.9952468007312617e-05, "loss": 0.0, "step": 4580 }, { "epoch": 1.6782449725776964, "grad_norm": 0.0, "learning_rate": 1.9930530164533823e-05, "loss": 0.0, "step": 4590 }, { "epoch": 1.6819012797074955, "grad_norm": 0.0, "learning_rate": 1.9908592321755026e-05, "loss": 0.0, "step": 4600 }, { "epoch": 1.6855575868372943, "grad_norm": 0.0, "learning_rate": 1.9886654478976235e-05, "loss": 0.0, "step": 4610 }, { "epoch": 1.6892138939670933, "grad_norm": 0.0, "learning_rate": 1.986471663619744e-05, "loss": 0.0, "step": 4620 }, { "epoch": 1.6928702010968921, "grad_norm": 0.0, "learning_rate": 1.9842778793418647e-05, "loss": 0.0, "step": 4630 }, { "epoch": 1.696526508226691, "grad_norm": 0.0, "learning_rate": 1.9820840950639853e-05, "loss": 0.0, "step": 4640 }, { "epoch": 1.70018281535649, "grad_norm": 0.0, "learning_rate": 1.979890310786106e-05, "loss": 0.0, "step": 4650 }, { "epoch": 1.703839122486289, "grad_norm": 0.0, "learning_rate": 1.977696526508227e-05, "loss": 0.0, "step": 4660 }, { "epoch": 1.7074954296160878, "grad_norm": 0.0, "learning_rate": 1.9755027422303475e-05, "loss": 0.0, "step": 4670 }, { "epoch": 1.7111517367458866, "grad_norm": 0.0, "learning_rate": 1.973308957952468e-05, "loss": 0.0, "step": 4680 }, { "epoch": 1.7148080438756854, "grad_norm": 0.0, "learning_rate": 1.9711151736745888e-05, "loss": 0.0, "step": 4690 }, { "epoch": 1.7184643510054844, "grad_norm": 0.0, "learning_rate": 1.9689213893967094e-05, "loss": 0.0, "step": 4700 }, { "epoch": 1.7221206581352835, "grad_norm": 0.0, "learning_rate": 1.96672760511883e-05, "loss": 0.0, "step": 4710 }, { "epoch": 1.7257769652650823, "grad_norm": 0.0, "learning_rate": 1.9645338208409506e-05, "loss": 0.0, "step": 4720 }, { "epoch": 1.729433272394881, "grad_norm": 0.0, "learning_rate": 1.9623400365630712e-05, "loss": 0.0, "step": 4730 }, { "epoch": 1.7330895795246801, "grad_norm": 0.0, "learning_rate": 1.9601462522851918e-05, "loss": 0.0, "step": 4740 }, { "epoch": 1.736745886654479, "grad_norm": 0.0, "learning_rate": 1.9579524680073128e-05, "loss": 0.0, "step": 4750 }, { "epoch": 1.740402193784278, "grad_norm": 0.0, "learning_rate": 1.9557586837294334e-05, "loss": 0.0, "step": 4760 }, { "epoch": 1.7440585009140768, "grad_norm": 0.0, "learning_rate": 1.953564899451554e-05, "loss": 0.0, "step": 4770 }, { "epoch": 1.7477148080438756, "grad_norm": 0.0, "learning_rate": 1.9513711151736746e-05, "loss": 0.0, "step": 4780 }, { "epoch": 1.7513711151736746, "grad_norm": 0.0, "learning_rate": 1.9491773308957956e-05, "loss": 0.0, "step": 4790 }, { "epoch": 1.7550274223034736, "grad_norm": 0.0, "learning_rate": 1.946983546617916e-05, "loss": 0.0, "step": 4800 }, { "epoch": 1.7586837294332724, "grad_norm": 0.0, "learning_rate": 1.9447897623400365e-05, "loss": 0.0, "step": 4810 }, { "epoch": 1.7623400365630713, "grad_norm": 0.0, "learning_rate": 1.942595978062157e-05, "loss": 0.0, "step": 4820 }, { "epoch": 1.76599634369287, "grad_norm": 0.0, "learning_rate": 1.940402193784278e-05, "loss": 0.0, "step": 4830 }, { "epoch": 1.769652650822669, "grad_norm": 0.0, "learning_rate": 1.9382084095063986e-05, "loss": 0.0, "step": 4840 }, { "epoch": 1.7733089579524681, "grad_norm": 0.0, "learning_rate": 1.9360146252285193e-05, "loss": 0.0, "step": 4850 }, { "epoch": 1.776965265082267, "grad_norm": 0.0, "learning_rate": 1.93382084095064e-05, "loss": 0.0, "step": 4860 }, { "epoch": 1.7806215722120657, "grad_norm": 0.0, "learning_rate": 1.9316270566727605e-05, "loss": 0.0, "step": 4870 }, { "epoch": 1.7842778793418648, "grad_norm": 0.0, "learning_rate": 1.9294332723948814e-05, "loss": 0.0, "step": 4880 }, { "epoch": 1.7879341864716636, "grad_norm": 0.0, "learning_rate": 1.927239488117002e-05, "loss": 0.0, "step": 4890 }, { "epoch": 1.7915904936014626, "grad_norm": 0.0, "learning_rate": 1.9250457038391223e-05, "loss": 0.0, "step": 4900 }, { "epoch": 1.7952468007312614, "grad_norm": 0.0, "learning_rate": 1.922851919561243e-05, "loss": 0.0, "step": 4910 }, { "epoch": 1.7989031078610602, "grad_norm": 0.0, "learning_rate": 1.920658135283364e-05, "loss": 0.0, "step": 4920 }, { "epoch": 1.8025594149908593, "grad_norm": 0.0, "learning_rate": 1.9184643510054845e-05, "loss": 0.0, "step": 4930 }, { "epoch": 1.8062157221206583, "grad_norm": 0.0, "learning_rate": 1.916270566727605e-05, "loss": 0.0, "step": 4940 }, { "epoch": 1.809872029250457, "grad_norm": 0.0, "learning_rate": 1.9140767824497257e-05, "loss": 0.0, "step": 4950 }, { "epoch": 1.813528336380256, "grad_norm": 0.0, "learning_rate": 1.9118829981718467e-05, "loss": 0.0, "step": 4960 }, { "epoch": 1.8171846435100547, "grad_norm": 0.0, "learning_rate": 1.9096892138939673e-05, "loss": 0.0, "step": 4970 }, { "epoch": 1.8208409506398537, "grad_norm": 0.0, "learning_rate": 1.907495429616088e-05, "loss": 0.0, "step": 4980 }, { "epoch": 1.8244972577696528, "grad_norm": 0.0, "learning_rate": 1.9053016453382085e-05, "loss": 0.0, "step": 4990 }, { "epoch": 1.8281535648994516, "grad_norm": 0.0, "learning_rate": 1.9031078610603288e-05, "loss": 0.0, "step": 5000 }, { "epoch": 1.8318098720292504, "grad_norm": 0.0, "learning_rate": 1.9009140767824498e-05, "loss": 0.0, "step": 5010 }, { "epoch": 1.8354661791590492, "grad_norm": 0.0, "learning_rate": 1.8987202925045704e-05, "loss": 0.0, "step": 5020 }, { "epoch": 1.8391224862888482, "grad_norm": 0.0, "learning_rate": 1.896526508226691e-05, "loss": 0.0, "step": 5030 }, { "epoch": 1.8427787934186473, "grad_norm": 0.0, "learning_rate": 1.8943327239488116e-05, "loss": 0.0, "step": 5040 }, { "epoch": 1.846435100548446, "grad_norm": 0.0, "learning_rate": 1.8921389396709326e-05, "loss": 0.0, "step": 5050 }, { "epoch": 1.8500914076782449, "grad_norm": 0.0, "learning_rate": 1.8899451553930532e-05, "loss": 0.0, "step": 5060 }, { "epoch": 1.853747714808044, "grad_norm": 0.0, "learning_rate": 1.8877513711151738e-05, "loss": 0.0, "step": 5070 }, { "epoch": 1.857404021937843, "grad_norm": 0.0, "learning_rate": 1.8855575868372944e-05, "loss": 0.0, "step": 5080 }, { "epoch": 1.8610603290676417, "grad_norm": 0.0, "learning_rate": 1.8833638025594154e-05, "loss": 0.0, "step": 5090 }, { "epoch": 1.8647166361974405, "grad_norm": 0.0, "learning_rate": 1.8811700182815356e-05, "loss": 0.0, "step": 5100 }, { "epoch": 1.8683729433272394, "grad_norm": 0.0, "learning_rate": 1.8789762340036562e-05, "loss": 0.0, "step": 5110 }, { "epoch": 1.8720292504570384, "grad_norm": 0.0, "learning_rate": 1.876782449725777e-05, "loss": 0.0, "step": 5120 }, { "epoch": 1.8756855575868374, "grad_norm": 0.0, "learning_rate": 1.8745886654478975e-05, "loss": 0.0, "step": 5130 }, { "epoch": 1.8793418647166362, "grad_norm": 0.0, "learning_rate": 1.8723948811700184e-05, "loss": 0.0, "step": 5140 }, { "epoch": 1.882998171846435, "grad_norm": 0.0, "learning_rate": 1.870201096892139e-05, "loss": 0.0, "step": 5150 }, { "epoch": 1.8866544789762338, "grad_norm": 0.0, "learning_rate": 1.8680073126142597e-05, "loss": 0.0, "step": 5160 }, { "epoch": 1.8903107861060329, "grad_norm": 0.0, "learning_rate": 1.8658135283363803e-05, "loss": 0.0, "step": 5170 }, { "epoch": 1.893967093235832, "grad_norm": 0.0, "learning_rate": 1.8636197440585012e-05, "loss": 0.0, "step": 5180 }, { "epoch": 1.8976234003656307, "grad_norm": 0.0, "learning_rate": 1.861425959780622e-05, "loss": 0.0, "step": 5190 }, { "epoch": 1.9012797074954295, "grad_norm": 0.0, "learning_rate": 1.859232175502742e-05, "loss": 0.0, "step": 5200 }, { "epoch": 1.9049360146252285, "grad_norm": 0.0, "learning_rate": 1.8570383912248627e-05, "loss": 0.0, "step": 5210 }, { "epoch": 1.9085923217550276, "grad_norm": 0.0, "learning_rate": 1.8548446069469833e-05, "loss": 0.0, "step": 5220 }, { "epoch": 1.9122486288848264, "grad_norm": 0.0, "learning_rate": 1.8526508226691043e-05, "loss": 0.0, "step": 5230 }, { "epoch": 1.9159049360146252, "grad_norm": 0.0, "learning_rate": 1.850457038391225e-05, "loss": 0.0, "step": 5240 }, { "epoch": 1.919561243144424, "grad_norm": 0.0, "learning_rate": 1.8482632541133455e-05, "loss": 0.0, "step": 5250 }, { "epoch": 1.923217550274223, "grad_norm": 0.0, "learning_rate": 1.846069469835466e-05, "loss": 0.0, "step": 5260 }, { "epoch": 1.926873857404022, "grad_norm": 0.0, "learning_rate": 1.843875685557587e-05, "loss": 0.0, "step": 5270 }, { "epoch": 1.9305301645338209, "grad_norm": 0.0, "learning_rate": 1.8416819012797077e-05, "loss": 0.0, "step": 5280 }, { "epoch": 1.9341864716636197, "grad_norm": 0.0, "learning_rate": 1.8394881170018283e-05, "loss": 0.0, "step": 5290 }, { "epoch": 1.9378427787934185, "grad_norm": 0.0, "learning_rate": 1.8372943327239486e-05, "loss": 0.0, "step": 5300 }, { "epoch": 1.9414990859232175, "grad_norm": 0.0, "learning_rate": 1.8351005484460695e-05, "loss": 0.0, "step": 5310 }, { "epoch": 1.9451553930530165, "grad_norm": 0.0, "learning_rate": 1.83290676416819e-05, "loss": 0.0, "step": 5320 }, { "epoch": 1.9488117001828154, "grad_norm": 0.0, "learning_rate": 1.8307129798903108e-05, "loss": 0.0, "step": 5330 }, { "epoch": 1.9524680073126142, "grad_norm": 0.0, "learning_rate": 1.8285191956124314e-05, "loss": 0.0, "step": 5340 }, { "epoch": 1.9561243144424132, "grad_norm": 0.0, "learning_rate": 1.826325411334552e-05, "loss": 0.0, "step": 5350 }, { "epoch": 1.9597806215722122, "grad_norm": 0.0, "learning_rate": 1.824131627056673e-05, "loss": 0.0, "step": 5360 }, { "epoch": 1.963436928702011, "grad_norm": 0.0, "learning_rate": 1.8219378427787936e-05, "loss": 0.0, "step": 5370 }, { "epoch": 1.9670932358318098, "grad_norm": 0.0, "learning_rate": 1.8197440585009142e-05, "loss": 0.0, "step": 5380 }, { "epoch": 1.9707495429616086, "grad_norm": 0.0, "learning_rate": 1.8175502742230348e-05, "loss": 0.0, "step": 5390 }, { "epoch": 1.9744058500914077, "grad_norm": 0.0, "learning_rate": 1.8153564899451554e-05, "loss": 0.0, "step": 5400 }, { "epoch": 1.9780621572212067, "grad_norm": 0.0, "learning_rate": 1.813162705667276e-05, "loss": 0.0, "step": 5410 }, { "epoch": 1.9817184643510055, "grad_norm": 0.0, "learning_rate": 1.8109689213893966e-05, "loss": 0.0, "step": 5420 }, { "epoch": 1.9853747714808043, "grad_norm": 0.0, "learning_rate": 1.8087751371115173e-05, "loss": 0.0, "step": 5430 }, { "epoch": 1.9890310786106031, "grad_norm": 0.0, "learning_rate": 1.8065813528336382e-05, "loss": 0.0, "step": 5440 }, { "epoch": 1.9926873857404022, "grad_norm": 0.0, "learning_rate": 1.8043875685557588e-05, "loss": 0.0, "step": 5450 }, { "epoch": 1.9963436928702012, "grad_norm": 0.0, "learning_rate": 1.8021937842778794e-05, "loss": 0.0, "step": 5460 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 1.8e-05, "loss": 0.0, "step": 5470 }, { "epoch": 2.003656307129799, "grad_norm": 0.0, "learning_rate": 1.7978062157221207e-05, "loss": 0.0, "step": 5480 }, { "epoch": 2.0073126142595976, "grad_norm": 0.0, "learning_rate": 1.7956124314442416e-05, "loss": 0.0, "step": 5490 }, { "epoch": 2.010968921389397, "grad_norm": 0.0, "learning_rate": 1.7934186471663622e-05, "loss": 0.0, "step": 5500 }, { "epoch": 2.0146252285191957, "grad_norm": 0.0, "learning_rate": 1.7912248628884825e-05, "loss": 0.0, "step": 5510 }, { "epoch": 2.0182815356489945, "grad_norm": 0.0, "learning_rate": 1.789031078610603e-05, "loss": 0.0, "step": 5520 }, { "epoch": 2.0219378427787933, "grad_norm": 0.0, "learning_rate": 1.786837294332724e-05, "loss": 0.0, "step": 5530 }, { "epoch": 2.025594149908592, "grad_norm": 0.0, "learning_rate": 1.7846435100548447e-05, "loss": 0.0, "step": 5540 }, { "epoch": 2.0292504570383914, "grad_norm": 0.0, "learning_rate": 1.7824497257769653e-05, "loss": 0.0, "step": 5550 }, { "epoch": 2.03290676416819, "grad_norm": 0.0, "learning_rate": 1.780255941499086e-05, "loss": 0.0, "step": 5560 }, { "epoch": 2.036563071297989, "grad_norm": 0.0, "learning_rate": 1.7780621572212065e-05, "loss": 0.0, "step": 5570 }, { "epoch": 2.0402193784277878, "grad_norm": 0.0, "learning_rate": 1.7758683729433275e-05, "loss": 0.0, "step": 5580 }, { "epoch": 2.043875685557587, "grad_norm": 0.0, "learning_rate": 1.773674588665448e-05, "loss": 0.0, "step": 5590 }, { "epoch": 2.047531992687386, "grad_norm": 0.0, "learning_rate": 1.7714808043875687e-05, "loss": 0.0, "step": 5600 }, { "epoch": 2.0511882998171846, "grad_norm": 0.0, "learning_rate": 1.769287020109689e-05, "loss": 0.0, "step": 5610 }, { "epoch": 2.0548446069469835, "grad_norm": 0.0, "learning_rate": 1.76709323583181e-05, "loss": 0.0, "step": 5620 }, { "epoch": 2.0585009140767823, "grad_norm": 0.0, "learning_rate": 1.7648994515539305e-05, "loss": 0.0, "step": 5630 }, { "epoch": 2.0621572212065815, "grad_norm": 0.0, "learning_rate": 1.762705667276051e-05, "loss": 0.0, "step": 5640 }, { "epoch": 2.0658135283363803, "grad_norm": 0.0, "learning_rate": 1.7605118829981718e-05, "loss": 0.0, "step": 5650 }, { "epoch": 2.069469835466179, "grad_norm": 0.0, "learning_rate": 1.7583180987202927e-05, "loss": 0.0, "step": 5660 }, { "epoch": 2.073126142595978, "grad_norm": 0.0, "learning_rate": 1.7561243144424133e-05, "loss": 0.0, "step": 5670 }, { "epoch": 2.076782449725777, "grad_norm": 0.0, "learning_rate": 1.753930530164534e-05, "loss": 0.0, "step": 5680 }, { "epoch": 2.080438756855576, "grad_norm": 0.0, "learning_rate": 1.7517367458866546e-05, "loss": 0.0, "step": 5690 }, { "epoch": 2.084095063985375, "grad_norm": 0.0, "learning_rate": 1.7495429616087752e-05, "loss": 0.0, "step": 5700 }, { "epoch": 2.0877513711151736, "grad_norm": 0.0, "learning_rate": 1.7473491773308958e-05, "loss": 0.0, "step": 5710 }, { "epoch": 2.0914076782449724, "grad_norm": 0.0, "learning_rate": 1.7451553930530164e-05, "loss": 0.0, "step": 5720 }, { "epoch": 2.0950639853747717, "grad_norm": 0.0, "learning_rate": 1.742961608775137e-05, "loss": 0.0001, "step": 5730 }, { "epoch": 2.0987202925045705, "grad_norm": 0.24371479451656342, "learning_rate": 1.7407678244972576e-05, "loss": 0.0011, "step": 5740 }, { "epoch": 2.1023765996343693, "grad_norm": 0.0, "learning_rate": 1.7385740402193786e-05, "loss": 0.0006, "step": 5750 }, { "epoch": 2.106032906764168, "grad_norm": 0.0, "learning_rate": 1.7363802559414992e-05, "loss": 0.0003, "step": 5760 }, { "epoch": 2.109689213893967, "grad_norm": 0.0, "learning_rate": 1.7341864716636198e-05, "loss": 0.0005, "step": 5770 }, { "epoch": 2.113345521023766, "grad_norm": 0.0, "learning_rate": 1.7319926873857404e-05, "loss": 0.0, "step": 5780 }, { "epoch": 2.117001828153565, "grad_norm": 0.0, "learning_rate": 1.7297989031078614e-05, "loss": 0.0002, "step": 5790 }, { "epoch": 2.1206581352833638, "grad_norm": 0.0, "learning_rate": 1.727605118829982e-05, "loss": 0.0, "step": 5800 }, { "epoch": 2.1243144424131626, "grad_norm": 0.0, "learning_rate": 1.7254113345521023e-05, "loss": 0.0, "step": 5810 }, { "epoch": 2.1279707495429614, "grad_norm": 0.0, "learning_rate": 1.723217550274223e-05, "loss": 0.0, "step": 5820 }, { "epoch": 2.1316270566727606, "grad_norm": 0.0, "learning_rate": 1.7210237659963435e-05, "loss": 0.0, "step": 5830 }, { "epoch": 2.1352833638025595, "grad_norm": 0.0, "learning_rate": 1.7188299817184645e-05, "loss": 0.0, "step": 5840 }, { "epoch": 2.1389396709323583, "grad_norm": 0.0, "learning_rate": 1.716636197440585e-05, "loss": 0.0, "step": 5850 }, { "epoch": 2.142595978062157, "grad_norm": 0.0, "learning_rate": 1.7144424131627057e-05, "loss": 0.0, "step": 5860 }, { "epoch": 2.1462522851919563, "grad_norm": 0.0, "learning_rate": 1.7122486288848263e-05, "loss": 0.0, "step": 5870 }, { "epoch": 2.149908592321755, "grad_norm": 0.0, "learning_rate": 1.7100548446069473e-05, "loss": 0.0, "step": 5880 }, { "epoch": 2.153564899451554, "grad_norm": 0.0, "learning_rate": 1.707861060329068e-05, "loss": 0.0, "step": 5890 }, { "epoch": 2.1572212065813527, "grad_norm": 0.0, "learning_rate": 1.7056672760511885e-05, "loss": 0.0, "step": 5900 }, { "epoch": 2.1608775137111516, "grad_norm": 0.0, "learning_rate": 1.7034734917733088e-05, "loss": 0.0, "step": 5910 }, { "epoch": 2.164533820840951, "grad_norm": 0.0, "learning_rate": 1.7012797074954294e-05, "loss": 0.0, "step": 5920 }, { "epoch": 2.1681901279707496, "grad_norm": 0.0, "learning_rate": 1.6990859232175503e-05, "loss": 0.0, "step": 5930 }, { "epoch": 2.1718464351005484, "grad_norm": 0.0, "learning_rate": 1.696892138939671e-05, "loss": 0.0, "step": 5940 }, { "epoch": 2.1755027422303472, "grad_norm": 0.0, "learning_rate": 1.6946983546617916e-05, "loss": 0.0, "step": 5950 }, { "epoch": 2.1791590493601465, "grad_norm": 0.0, "learning_rate": 1.6925045703839122e-05, "loss": 0.0, "step": 5960 }, { "epoch": 2.1828153564899453, "grad_norm": 0.0, "learning_rate": 1.690310786106033e-05, "loss": 0.0, "step": 5970 }, { "epoch": 2.186471663619744, "grad_norm": 0.0, "learning_rate": 1.6881170018281537e-05, "loss": 0.0, "step": 5980 }, { "epoch": 2.190127970749543, "grad_norm": 0.0, "learning_rate": 1.6859232175502744e-05, "loss": 0.0, "step": 5990 }, { "epoch": 2.1937842778793417, "grad_norm": 0.0, "learning_rate": 1.683729433272395e-05, "loss": 0.0, "step": 6000 }, { "epoch": 2.197440585009141, "grad_norm": 0.0, "learning_rate": 1.6815356489945156e-05, "loss": 0.0, "step": 6010 }, { "epoch": 2.2010968921389398, "grad_norm": 0.0, "learning_rate": 1.6793418647166362e-05, "loss": 0.0, "step": 6020 }, { "epoch": 2.2047531992687386, "grad_norm": 0.0, "learning_rate": 1.6771480804387568e-05, "loss": 0.0, "step": 6030 }, { "epoch": 2.2084095063985374, "grad_norm": 0.0, "learning_rate": 1.6749542961608774e-05, "loss": 0.0, "step": 6040 }, { "epoch": 2.212065813528336, "grad_norm": 0.0, "learning_rate": 1.672760511882998e-05, "loss": 0.0, "step": 6050 }, { "epoch": 2.2157221206581355, "grad_norm": 0.0, "learning_rate": 1.670566727605119e-05, "loss": 0.0, "step": 6060 }, { "epoch": 2.2193784277879343, "grad_norm": 0.0, "learning_rate": 1.6683729433272396e-05, "loss": 0.0, "step": 6070 }, { "epoch": 2.223034734917733, "grad_norm": 0.0, "learning_rate": 1.6661791590493602e-05, "loss": 0.0, "step": 6080 }, { "epoch": 2.226691042047532, "grad_norm": 0.0, "learning_rate": 1.6639853747714808e-05, "loss": 0.0, "step": 6090 }, { "epoch": 2.2303473491773307, "grad_norm": 0.0, "learning_rate": 1.6617915904936018e-05, "loss": 0.0, "step": 6100 }, { "epoch": 2.23400365630713, "grad_norm": 0.0, "learning_rate": 1.659597806215722e-05, "loss": 0.0, "step": 6110 }, { "epoch": 2.2376599634369287, "grad_norm": 0.0, "learning_rate": 1.6574040219378427e-05, "loss": 0.0, "step": 6120 }, { "epoch": 2.2413162705667276, "grad_norm": 0.0, "learning_rate": 1.6552102376599633e-05, "loss": 0.0, "step": 6130 }, { "epoch": 2.2449725776965264, "grad_norm": 0.0, "learning_rate": 1.6530164533820842e-05, "loss": 0.0, "step": 6140 }, { "epoch": 2.2486288848263256, "grad_norm": 0.0, "learning_rate": 1.650822669104205e-05, "loss": 0.0, "step": 6150 }, { "epoch": 2.2522851919561244, "grad_norm": 0.0, "learning_rate": 1.6486288848263255e-05, "loss": 0.0, "step": 6160 }, { "epoch": 2.2559414990859232, "grad_norm": 0.0, "learning_rate": 1.646435100548446e-05, "loss": 0.0, "step": 6170 }, { "epoch": 2.259597806215722, "grad_norm": 0.0, "learning_rate": 1.6442413162705667e-05, "loss": 0.0, "step": 6180 }, { "epoch": 2.263254113345521, "grad_norm": 0.0, "learning_rate": 1.6420475319926876e-05, "loss": 0.0, "step": 6190 }, { "epoch": 2.26691042047532, "grad_norm": 0.0, "learning_rate": 1.6398537477148083e-05, "loss": 0.0, "step": 6200 }, { "epoch": 2.270566727605119, "grad_norm": 0.0, "learning_rate": 1.6376599634369285e-05, "loss": 0.0, "step": 6210 }, { "epoch": 2.2742230347349177, "grad_norm": 0.0, "learning_rate": 1.635466179159049e-05, "loss": 0.0, "step": 6220 }, { "epoch": 2.2778793418647165, "grad_norm": 0.0, "learning_rate": 1.63327239488117e-05, "loss": 0.0, "step": 6230 }, { "epoch": 2.2815356489945158, "grad_norm": 0.0, "learning_rate": 1.6310786106032907e-05, "loss": 0.0, "step": 6240 }, { "epoch": 2.2851919561243146, "grad_norm": 0.0, "learning_rate": 1.6288848263254113e-05, "loss": 0.0, "step": 6250 }, { "epoch": 2.2888482632541134, "grad_norm": 0.0, "learning_rate": 1.626691042047532e-05, "loss": 0.0, "step": 6260 }, { "epoch": 2.292504570383912, "grad_norm": 0.0, "learning_rate": 1.624497257769653e-05, "loss": 0.0, "step": 6270 }, { "epoch": 2.296160877513711, "grad_norm": 0.0, "learning_rate": 1.6223034734917735e-05, "loss": 0.0, "step": 6280 }, { "epoch": 2.2998171846435103, "grad_norm": 0.0, "learning_rate": 1.620109689213894e-05, "loss": 0.0, "step": 6290 }, { "epoch": 2.303473491773309, "grad_norm": 0.0, "learning_rate": 1.6179159049360147e-05, "loss": 0.0, "step": 6300 }, { "epoch": 2.307129798903108, "grad_norm": 0.0, "learning_rate": 1.615722120658135e-05, "loss": 0.0, "step": 6310 }, { "epoch": 2.3107861060329067, "grad_norm": 0.0, "learning_rate": 1.613528336380256e-05, "loss": 0.0, "step": 6320 }, { "epoch": 2.3144424131627055, "grad_norm": 0.0, "learning_rate": 1.6113345521023766e-05, "loss": 0.0, "step": 6330 }, { "epoch": 2.3180987202925047, "grad_norm": 0.0, "learning_rate": 1.6091407678244972e-05, "loss": 0.0, "step": 6340 }, { "epoch": 2.3217550274223036, "grad_norm": 0.0, "learning_rate": 1.6069469835466178e-05, "loss": 0.0, "step": 6350 }, { "epoch": 2.3254113345521024, "grad_norm": 0.0, "learning_rate": 1.6047531992687388e-05, "loss": 0.0, "step": 6360 }, { "epoch": 2.329067641681901, "grad_norm": 0.0, "learning_rate": 1.6025594149908594e-05, "loss": 0.0, "step": 6370 }, { "epoch": 2.3327239488117, "grad_norm": 0.0, "learning_rate": 1.60036563071298e-05, "loss": 0.0, "step": 6380 }, { "epoch": 2.3363802559414992, "grad_norm": 0.0, "learning_rate": 1.5981718464351006e-05, "loss": 0.0, "step": 6390 }, { "epoch": 2.340036563071298, "grad_norm": 0.0, "learning_rate": 1.5959780621572212e-05, "loss": 0.0, "step": 6400 }, { "epoch": 2.343692870201097, "grad_norm": 0.0, "learning_rate": 1.593784277879342e-05, "loss": 0.0, "step": 6410 }, { "epoch": 2.3473491773308957, "grad_norm": 0.0, "learning_rate": 1.5915904936014625e-05, "loss": 0.0, "step": 6420 }, { "epoch": 2.3510054844606945, "grad_norm": 0.0, "learning_rate": 1.589396709323583e-05, "loss": 0.0, "step": 6430 }, { "epoch": 2.3546617915904937, "grad_norm": 0.0, "learning_rate": 1.5872029250457037e-05, "loss": 0.0, "step": 6440 }, { "epoch": 2.3583180987202925, "grad_norm": 0.0, "learning_rate": 1.5850091407678246e-05, "loss": 0.0, "step": 6450 }, { "epoch": 2.3619744058500913, "grad_norm": 0.0, "learning_rate": 1.5828153564899452e-05, "loss": 0.0, "step": 6460 }, { "epoch": 2.36563071297989, "grad_norm": 0.0, "learning_rate": 1.580621572212066e-05, "loss": 0.0, "step": 6470 }, { "epoch": 2.3692870201096894, "grad_norm": 0.0, "learning_rate": 1.5784277879341865e-05, "loss": 0.0, "step": 6480 }, { "epoch": 2.372943327239488, "grad_norm": 0.0, "learning_rate": 1.5762340036563074e-05, "loss": 0.0, "step": 6490 }, { "epoch": 2.376599634369287, "grad_norm": 0.0, "learning_rate": 1.574040219378428e-05, "loss": 0.0, "step": 6500 }, { "epoch": 2.380255941499086, "grad_norm": 0.0, "learning_rate": 1.5718464351005483e-05, "loss": 0.0, "step": 6510 }, { "epoch": 2.383912248628885, "grad_norm": 0.0, "learning_rate": 1.569652650822669e-05, "loss": 0.0, "step": 6520 }, { "epoch": 2.387568555758684, "grad_norm": 0.0, "learning_rate": 1.5674588665447895e-05, "loss": 0.0, "step": 6530 }, { "epoch": 2.3912248628884827, "grad_norm": 0.0, "learning_rate": 1.5652650822669105e-05, "loss": 0.0, "step": 6540 }, { "epoch": 2.3948811700182815, "grad_norm": 0.0, "learning_rate": 1.563071297989031e-05, "loss": 0.0, "step": 6550 }, { "epoch": 2.3985374771480803, "grad_norm": 0.0, "learning_rate": 1.5608775137111517e-05, "loss": 0.0, "step": 6560 }, { "epoch": 2.4021937842778796, "grad_norm": 0.0, "learning_rate": 1.5586837294332723e-05, "loss": 0.0, "step": 6570 }, { "epoch": 2.4058500914076784, "grad_norm": 0.0, "learning_rate": 1.5564899451553933e-05, "loss": 0.0, "step": 6580 }, { "epoch": 2.409506398537477, "grad_norm": 0.0, "learning_rate": 1.554296160877514e-05, "loss": 0.0, "step": 6590 }, { "epoch": 2.413162705667276, "grad_norm": 0.0, "learning_rate": 1.5521023765996345e-05, "loss": 0.0, "step": 6600 }, { "epoch": 2.416819012797075, "grad_norm": 0.0, "learning_rate": 1.549908592321755e-05, "loss": 0.0, "step": 6610 }, { "epoch": 2.420475319926874, "grad_norm": 0.0, "learning_rate": 1.5477148080438757e-05, "loss": 0.0, "step": 6620 }, { "epoch": 2.424131627056673, "grad_norm": 0.0, "learning_rate": 1.5455210237659964e-05, "loss": 0.0, "step": 6630 }, { "epoch": 2.4277879341864717, "grad_norm": 0.0, "learning_rate": 1.543327239488117e-05, "loss": 0.0, "step": 6640 }, { "epoch": 2.4314442413162705, "grad_norm": 0.0, "learning_rate": 1.5411334552102376e-05, "loss": 0.0, "step": 6650 }, { "epoch": 2.4351005484460693, "grad_norm": 0.0, "learning_rate": 1.5389396709323582e-05, "loss": 0.0, "step": 6660 }, { "epoch": 2.4387568555758685, "grad_norm": 0.0, "learning_rate": 1.536745886654479e-05, "loss": 0.0, "step": 6670 }, { "epoch": 2.4424131627056673, "grad_norm": 0.0, "learning_rate": 1.5345521023765998e-05, "loss": 0.0, "step": 6680 }, { "epoch": 2.446069469835466, "grad_norm": 0.0, "learning_rate": 1.5323583180987204e-05, "loss": 0.0, "step": 6690 }, { "epoch": 2.449725776965265, "grad_norm": 0.0, "learning_rate": 1.530164533820841e-05, "loss": 0.0, "step": 6700 }, { "epoch": 2.4533820840950638, "grad_norm": 0.0, "learning_rate": 1.527970749542962e-05, "loss": 0.0, "step": 6710 }, { "epoch": 2.457038391224863, "grad_norm": 0.0, "learning_rate": 1.5257769652650824e-05, "loss": 0.0, "step": 6720 }, { "epoch": 2.460694698354662, "grad_norm": 0.0, "learning_rate": 1.523583180987203e-05, "loss": 0.0, "step": 6730 }, { "epoch": 2.4643510054844606, "grad_norm": 0.0, "learning_rate": 1.5213893967093235e-05, "loss": 0.0, "step": 6740 }, { "epoch": 2.4680073126142594, "grad_norm": 0.0, "learning_rate": 1.519195612431444e-05, "loss": 0.0, "step": 6750 }, { "epoch": 2.4716636197440587, "grad_norm": 0.0, "learning_rate": 1.517001828153565e-05, "loss": 0.0, "step": 6760 }, { "epoch": 2.4753199268738575, "grad_norm": 0.0, "learning_rate": 1.5148080438756856e-05, "loss": 0.0, "step": 6770 }, { "epoch": 2.4789762340036563, "grad_norm": 0.0, "learning_rate": 1.5126142595978063e-05, "loss": 0.0, "step": 6780 }, { "epoch": 2.482632541133455, "grad_norm": 0.0, "learning_rate": 1.5104204753199267e-05, "loss": 0.0, "step": 6790 }, { "epoch": 2.4862888482632544, "grad_norm": 0.0, "learning_rate": 1.5082266910420477e-05, "loss": 0.0, "step": 6800 }, { "epoch": 2.489945155393053, "grad_norm": 0.0, "learning_rate": 1.5060329067641683e-05, "loss": 0.0, "step": 6810 }, { "epoch": 2.493601462522852, "grad_norm": 0.0, "learning_rate": 1.5038391224862889e-05, "loss": 0.0, "step": 6820 }, { "epoch": 2.497257769652651, "grad_norm": 0.0, "learning_rate": 1.5016453382084095e-05, "loss": 0.0, "step": 6830 }, { "epoch": 2.5009140767824496, "grad_norm": 0.0, "learning_rate": 1.4994515539305301e-05, "loss": 0.0, "step": 6840 }, { "epoch": 2.504570383912249, "grad_norm": 0.0, "learning_rate": 1.4972577696526509e-05, "loss": 0.0, "step": 6850 }, { "epoch": 2.5082266910420477, "grad_norm": 0.0, "learning_rate": 1.4950639853747715e-05, "loss": 0.0, "step": 6860 }, { "epoch": 2.5118829981718465, "grad_norm": 0.0, "learning_rate": 1.4928702010968921e-05, "loss": 0.0, "step": 6870 }, { "epoch": 2.5155393053016453, "grad_norm": 0.0, "learning_rate": 1.4906764168190129e-05, "loss": 0.0, "step": 6880 }, { "epoch": 2.519195612431444, "grad_norm": 0.0, "learning_rate": 1.4884826325411333e-05, "loss": 0.0, "step": 6890 }, { "epoch": 2.5228519195612433, "grad_norm": 0.0, "learning_rate": 1.4862888482632541e-05, "loss": 0.0, "step": 6900 }, { "epoch": 2.526508226691042, "grad_norm": 0.0, "learning_rate": 1.4840950639853747e-05, "loss": 0.0, "step": 6910 }, { "epoch": 2.530164533820841, "grad_norm": 0.0, "learning_rate": 1.4819012797074955e-05, "loss": 0.0, "step": 6920 }, { "epoch": 2.5338208409506398, "grad_norm": 0.0, "learning_rate": 1.4797074954296161e-05, "loss": 0.0, "step": 6930 }, { "epoch": 2.5374771480804386, "grad_norm": 0.0, "learning_rate": 1.4775137111517368e-05, "loss": 0.0, "step": 6940 }, { "epoch": 2.541133455210238, "grad_norm": 0.0, "learning_rate": 1.4753199268738574e-05, "loss": 0.0, "step": 6950 }, { "epoch": 2.5447897623400366, "grad_norm": 0.0, "learning_rate": 1.4731261425959782e-05, "loss": 0.0, "step": 6960 }, { "epoch": 2.5484460694698354, "grad_norm": 0.0, "learning_rate": 1.4709323583180988e-05, "loss": 0.0, "step": 6970 }, { "epoch": 2.5521023765996342, "grad_norm": 0.0, "learning_rate": 1.4687385740402196e-05, "loss": 0.0, "step": 6980 }, { "epoch": 2.555758683729433, "grad_norm": 0.0, "learning_rate": 1.46654478976234e-05, "loss": 0.0, "step": 6990 }, { "epoch": 2.5594149908592323, "grad_norm": 0.0, "learning_rate": 1.4643510054844606e-05, "loss": 0.0, "step": 7000 }, { "epoch": 2.563071297989031, "grad_norm": 0.0, "learning_rate": 1.4621572212065814e-05, "loss": 0.0, "step": 7010 }, { "epoch": 2.56672760511883, "grad_norm": 0.0, "learning_rate": 1.459963436928702e-05, "loss": 0.0, "step": 7020 }, { "epoch": 2.5703839122486287, "grad_norm": 0.0, "learning_rate": 1.4577696526508228e-05, "loss": 0.0, "step": 7030 }, { "epoch": 2.5740402193784275, "grad_norm": 0.0, "learning_rate": 1.4555758683729432e-05, "loss": 0.0, "step": 7040 }, { "epoch": 2.577696526508227, "grad_norm": 0.0, "learning_rate": 1.453382084095064e-05, "loss": 0.0, "step": 7050 }, { "epoch": 2.5813528336380256, "grad_norm": 0.0, "learning_rate": 1.4511882998171846e-05, "loss": 0.0, "step": 7060 }, { "epoch": 2.5850091407678244, "grad_norm": 0.0, "learning_rate": 1.4489945155393054e-05, "loss": 0.0, "step": 7070 }, { "epoch": 2.5886654478976237, "grad_norm": 0.0, "learning_rate": 1.446800731261426e-05, "loss": 0.0, "step": 7080 }, { "epoch": 2.5923217550274225, "grad_norm": 0.0, "learning_rate": 1.4446069469835466e-05, "loss": 0.0, "step": 7090 }, { "epoch": 2.5959780621572213, "grad_norm": 0.0, "learning_rate": 1.4424131627056673e-05, "loss": 0.0, "step": 7100 }, { "epoch": 2.59963436928702, "grad_norm": 0.0, "learning_rate": 1.4402193784277879e-05, "loss": 0.0, "step": 7110 }, { "epoch": 2.603290676416819, "grad_norm": 0.0, "learning_rate": 1.4380255941499087e-05, "loss": 0.0, "step": 7120 }, { "epoch": 2.606946983546618, "grad_norm": 0.0, "learning_rate": 1.4358318098720293e-05, "loss": 0.0, "step": 7130 }, { "epoch": 2.610603290676417, "grad_norm": 0.0, "learning_rate": 1.43363802559415e-05, "loss": 0.0, "step": 7140 }, { "epoch": 2.6142595978062158, "grad_norm": 0.0, "learning_rate": 1.4314442413162705e-05, "loss": 0.0, "step": 7150 }, { "epoch": 2.6179159049360146, "grad_norm": 0.0, "learning_rate": 1.4292504570383913e-05, "loss": 0.0, "step": 7160 }, { "epoch": 2.6215722120658134, "grad_norm": 0.0, "learning_rate": 1.4270566727605119e-05, "loss": 0.0, "step": 7170 }, { "epoch": 2.6252285191956126, "grad_norm": 0.0, "learning_rate": 1.4248628884826327e-05, "loss": 0.0, "step": 7180 }, { "epoch": 2.6288848263254114, "grad_norm": 0.0, "learning_rate": 1.4226691042047533e-05, "loss": 0.0, "step": 7190 }, { "epoch": 2.6325411334552102, "grad_norm": 0.0, "learning_rate": 1.4204753199268739e-05, "loss": 0.0, "step": 7200 }, { "epoch": 2.636197440585009, "grad_norm": 0.0, "learning_rate": 1.4182815356489945e-05, "loss": 0.0, "step": 7210 }, { "epoch": 2.639853747714808, "grad_norm": 0.0, "learning_rate": 1.4160877513711153e-05, "loss": 0.0, "step": 7220 }, { "epoch": 2.643510054844607, "grad_norm": 0.0, "learning_rate": 1.413893967093236e-05, "loss": 0.0, "step": 7230 }, { "epoch": 2.647166361974406, "grad_norm": 0.0, "learning_rate": 1.4117001828153565e-05, "loss": 0.0, "step": 7240 }, { "epoch": 2.6508226691042047, "grad_norm": 0.0, "learning_rate": 1.4095063985374771e-05, "loss": 0.0, "step": 7250 }, { "epoch": 2.6544789762340035, "grad_norm": 0.0, "learning_rate": 1.4073126142595978e-05, "loss": 0.0, "step": 7260 }, { "epoch": 2.6581352833638023, "grad_norm": 0.0, "learning_rate": 1.4051188299817185e-05, "loss": 0.0, "step": 7270 }, { "epoch": 2.6617915904936016, "grad_norm": 0.0, "learning_rate": 1.4029250457038392e-05, "loss": 0.0, "step": 7280 }, { "epoch": 2.6654478976234004, "grad_norm": 0.0, "learning_rate": 1.40073126142596e-05, "loss": 0.0, "step": 7290 }, { "epoch": 2.669104204753199, "grad_norm": 0.0, "learning_rate": 1.3985374771480804e-05, "loss": 0.0, "step": 7300 }, { "epoch": 2.672760511882998, "grad_norm": 0.0, "learning_rate": 1.3963436928702012e-05, "loss": 0.0, "step": 7310 }, { "epoch": 2.676416819012797, "grad_norm": 0.0, "learning_rate": 1.3941499085923218e-05, "loss": 0.0, "step": 7320 }, { "epoch": 2.680073126142596, "grad_norm": 0.0, "learning_rate": 1.3919561243144426e-05, "loss": 0.0, "step": 7330 }, { "epoch": 2.683729433272395, "grad_norm": 0.0, "learning_rate": 1.3897623400365632e-05, "loss": 0.0, "step": 7340 }, { "epoch": 2.6873857404021937, "grad_norm": 0.0, "learning_rate": 1.3875685557586836e-05, "loss": 0.0, "step": 7350 }, { "epoch": 2.691042047531993, "grad_norm": 0.0, "learning_rate": 1.3853747714808044e-05, "loss": 0.0, "step": 7360 }, { "epoch": 2.6946983546617918, "grad_norm": 0.0, "learning_rate": 1.383180987202925e-05, "loss": 0.0, "step": 7370 }, { "epoch": 2.6983546617915906, "grad_norm": 0.0, "learning_rate": 1.3809872029250458e-05, "loss": 0.0, "step": 7380 }, { "epoch": 2.7020109689213894, "grad_norm": 0.0, "learning_rate": 1.3787934186471664e-05, "loss": 0.0, "step": 7390 }, { "epoch": 2.705667276051188, "grad_norm": 0.0, "learning_rate": 1.376599634369287e-05, "loss": 0.0, "step": 7400 }, { "epoch": 2.7093235831809874, "grad_norm": 0.0, "learning_rate": 1.3744058500914077e-05, "loss": 0.0, "step": 7410 }, { "epoch": 2.7129798903107862, "grad_norm": 0.0, "learning_rate": 1.3722120658135284e-05, "loss": 0.0, "step": 7420 }, { "epoch": 2.716636197440585, "grad_norm": 0.0, "learning_rate": 1.370018281535649e-05, "loss": 0.0, "step": 7430 }, { "epoch": 2.720292504570384, "grad_norm": 0.0, "learning_rate": 1.3678244972577698e-05, "loss": 0.0, "step": 7440 }, { "epoch": 2.7239488117001827, "grad_norm": 0.0, "learning_rate": 1.3656307129798903e-05, "loss": 0.0, "step": 7450 }, { "epoch": 2.727605118829982, "grad_norm": 0.0, "learning_rate": 1.3634369287020109e-05, "loss": 0.0, "step": 7460 }, { "epoch": 2.7312614259597807, "grad_norm": 0.0, "learning_rate": 1.3612431444241317e-05, "loss": 0.0, "step": 7470 }, { "epoch": 2.7349177330895795, "grad_norm": 0.0, "learning_rate": 1.3590493601462523e-05, "loss": 0.0, "step": 7480 }, { "epoch": 2.7385740402193783, "grad_norm": 0.0, "learning_rate": 1.356855575868373e-05, "loss": 0.0, "step": 7490 }, { "epoch": 2.742230347349177, "grad_norm": 0.0, "learning_rate": 1.3546617915904935e-05, "loss": 0.0, "step": 7500 }, { "epoch": 2.7458866544789764, "grad_norm": 0.0, "learning_rate": 1.3524680073126143e-05, "loss": 0.0, "step": 7510 }, { "epoch": 2.749542961608775, "grad_norm": 0.0, "learning_rate": 1.3502742230347349e-05, "loss": 0.0, "step": 7520 }, { "epoch": 2.753199268738574, "grad_norm": 0.0, "learning_rate": 1.3480804387568557e-05, "loss": 0.0, "step": 7530 }, { "epoch": 2.756855575868373, "grad_norm": 0.0, "learning_rate": 1.3458866544789763e-05, "loss": 0.0, "step": 7540 }, { "epoch": 2.7605118829981716, "grad_norm": 0.0, "learning_rate": 1.343692870201097e-05, "loss": 0.0, "step": 7550 }, { "epoch": 2.764168190127971, "grad_norm": 0.0, "learning_rate": 1.3414990859232175e-05, "loss": 0.0, "step": 7560 }, { "epoch": 2.7678244972577697, "grad_norm": 0.0, "learning_rate": 1.3393053016453383e-05, "loss": 0.0, "step": 7570 }, { "epoch": 2.7714808043875685, "grad_norm": 0.0, "learning_rate": 1.337111517367459e-05, "loss": 0.0, "step": 7580 }, { "epoch": 2.7751371115173673, "grad_norm": 0.0, "learning_rate": 1.3349177330895796e-05, "loss": 0.0, "step": 7590 }, { "epoch": 2.778793418647166, "grad_norm": 0.0, "learning_rate": 1.3327239488117002e-05, "loss": 0.0, "step": 7600 }, { "epoch": 2.7824497257769654, "grad_norm": 0.0, "learning_rate": 1.3305301645338208e-05, "loss": 0.0, "step": 7610 }, { "epoch": 2.786106032906764, "grad_norm": 0.0, "learning_rate": 1.3283363802559416e-05, "loss": 0.0, "step": 7620 }, { "epoch": 2.789762340036563, "grad_norm": 0.0, "learning_rate": 1.3261425959780622e-05, "loss": 0.0, "step": 7630 }, { "epoch": 2.7934186471663622, "grad_norm": 0.0, "learning_rate": 1.323948811700183e-05, "loss": 0.0, "step": 7640 }, { "epoch": 2.797074954296161, "grad_norm": 0.0, "learning_rate": 1.3217550274223034e-05, "loss": 0.0, "step": 7650 }, { "epoch": 2.80073126142596, "grad_norm": 0.0, "learning_rate": 1.3195612431444242e-05, "loss": 0.0, "step": 7660 }, { "epoch": 2.8043875685557587, "grad_norm": 0.0, "learning_rate": 1.3173674588665448e-05, "loss": 0.0, "step": 7670 }, { "epoch": 2.8080438756855575, "grad_norm": 0.0, "learning_rate": 1.3151736745886656e-05, "loss": 0.0, "step": 7680 }, { "epoch": 2.8117001828153567, "grad_norm": 0.0, "learning_rate": 1.3129798903107862e-05, "loss": 0.0, "step": 7690 }, { "epoch": 2.8153564899451555, "grad_norm": 0.0, "learning_rate": 1.3107861060329066e-05, "loss": 0.0, "step": 7700 }, { "epoch": 2.8190127970749543, "grad_norm": 0.0, "learning_rate": 1.3085923217550274e-05, "loss": 0.0, "step": 7710 }, { "epoch": 2.822669104204753, "grad_norm": 0.0, "learning_rate": 1.306398537477148e-05, "loss": 0.0, "step": 7720 }, { "epoch": 2.826325411334552, "grad_norm": 0.0, "learning_rate": 1.3042047531992688e-05, "loss": 0.0, "step": 7730 }, { "epoch": 2.829981718464351, "grad_norm": 0.0, "learning_rate": 1.3020109689213894e-05, "loss": 0.0, "step": 7740 }, { "epoch": 2.83363802559415, "grad_norm": 0.0, "learning_rate": 1.29981718464351e-05, "loss": 0.0, "step": 7750 }, { "epoch": 2.837294332723949, "grad_norm": 0.0, "learning_rate": 1.2976234003656307e-05, "loss": 0.0, "step": 7760 }, { "epoch": 2.8409506398537476, "grad_norm": 0.0, "learning_rate": 1.2954296160877515e-05, "loss": 0.0, "step": 7770 }, { "epoch": 2.8446069469835464, "grad_norm": 0.0, "learning_rate": 1.293235831809872e-05, "loss": 0.0, "step": 7780 }, { "epoch": 2.8482632541133457, "grad_norm": 0.0, "learning_rate": 1.2910420475319929e-05, "loss": 0.0, "step": 7790 }, { "epoch": 2.8519195612431445, "grad_norm": 0.0, "learning_rate": 1.2888482632541133e-05, "loss": 0.0, "step": 7800 }, { "epoch": 2.8555758683729433, "grad_norm": 0.0, "learning_rate": 1.286654478976234e-05, "loss": 0.0, "step": 7810 }, { "epoch": 2.859232175502742, "grad_norm": 0.0, "learning_rate": 1.2844606946983547e-05, "loss": 0.0, "step": 7820 }, { "epoch": 2.862888482632541, "grad_norm": 0.0, "learning_rate": 1.2822669104204753e-05, "loss": 0.0, "step": 7830 }, { "epoch": 2.86654478976234, "grad_norm": 0.0, "learning_rate": 1.2800731261425961e-05, "loss": 0.0, "step": 7840 }, { "epoch": 2.870201096892139, "grad_norm": 0.0, "learning_rate": 1.2778793418647165e-05, "loss": 0.0, "step": 7850 }, { "epoch": 2.873857404021938, "grad_norm": 0.0, "learning_rate": 1.2756855575868373e-05, "loss": 0.0, "step": 7860 }, { "epoch": 2.8775137111517366, "grad_norm": 0.0, "learning_rate": 1.273491773308958e-05, "loss": 0.0, "step": 7870 }, { "epoch": 2.8811700182815354, "grad_norm": 0.0, "learning_rate": 1.2712979890310787e-05, "loss": 0.0, "step": 7880 }, { "epoch": 2.8848263254113347, "grad_norm": 0.0, "learning_rate": 1.2691042047531993e-05, "loss": 0.0, "step": 7890 }, { "epoch": 2.8884826325411335, "grad_norm": 0.0, "learning_rate": 1.26691042047532e-05, "loss": 0.0, "step": 7900 }, { "epoch": 2.8921389396709323, "grad_norm": 0.0, "learning_rate": 1.2647166361974406e-05, "loss": 0.0, "step": 7910 }, { "epoch": 2.8957952468007315, "grad_norm": 0.0, "learning_rate": 1.2625228519195613e-05, "loss": 0.0, "step": 7920 }, { "epoch": 2.89945155393053, "grad_norm": 0.0, "learning_rate": 1.260329067641682e-05, "loss": 0.0, "step": 7930 }, { "epoch": 2.903107861060329, "grad_norm": 0.0, "learning_rate": 1.2581352833638026e-05, "loss": 0.0, "step": 7940 }, { "epoch": 2.906764168190128, "grad_norm": 0.0, "learning_rate": 1.2559414990859232e-05, "loss": 0.0, "step": 7950 }, { "epoch": 2.9104204753199268, "grad_norm": 0.0, "learning_rate": 1.2537477148080438e-05, "loss": 0.0, "step": 7960 }, { "epoch": 2.914076782449726, "grad_norm": 0.0, "learning_rate": 1.2515539305301646e-05, "loss": 0.0, "step": 7970 }, { "epoch": 2.917733089579525, "grad_norm": 0.0, "learning_rate": 1.2493601462522852e-05, "loss": 0.0, "step": 7980 }, { "epoch": 2.9213893967093236, "grad_norm": 0.0, "learning_rate": 1.247166361974406e-05, "loss": 0.0, "step": 7990 }, { "epoch": 2.9250457038391224, "grad_norm": 0.0, "learning_rate": 1.2449725776965264e-05, "loss": 0.0, "step": 8000 }, { "epoch": 2.9287020109689212, "grad_norm": 0.0, "learning_rate": 1.2427787934186472e-05, "loss": 0.0, "step": 8010 }, { "epoch": 2.9323583180987205, "grad_norm": 0.0, "learning_rate": 1.2405850091407678e-05, "loss": 0.0, "step": 8020 }, { "epoch": 2.9360146252285193, "grad_norm": 0.0, "learning_rate": 1.2383912248628886e-05, "loss": 0.0, "step": 8030 }, { "epoch": 2.939670932358318, "grad_norm": 0.0, "learning_rate": 1.2361974405850092e-05, "loss": 0.0, "step": 8040 }, { "epoch": 2.943327239488117, "grad_norm": 0.0, "learning_rate": 1.2340036563071298e-05, "loss": 0.0, "step": 8050 }, { "epoch": 2.9469835466179157, "grad_norm": 0.0, "learning_rate": 1.2318098720292504e-05, "loss": 0.0, "step": 8060 }, { "epoch": 2.950639853747715, "grad_norm": 0.0, "learning_rate": 1.229616087751371e-05, "loss": 0.0, "step": 8070 }, { "epoch": 2.954296160877514, "grad_norm": 0.0, "learning_rate": 1.2274223034734918e-05, "loss": 0.0, "step": 8080 }, { "epoch": 2.9579524680073126, "grad_norm": 0.0, "learning_rate": 1.2252285191956125e-05, "loss": 0.0, "step": 8090 }, { "epoch": 2.9616087751371114, "grad_norm": 0.0, "learning_rate": 1.223034734917733e-05, "loss": 0.0, "step": 8100 }, { "epoch": 2.96526508226691, "grad_norm": 0.0, "learning_rate": 1.2208409506398537e-05, "loss": 0.0, "step": 8110 }, { "epoch": 2.9689213893967095, "grad_norm": 0.0, "learning_rate": 1.2186471663619745e-05, "loss": 0.0, "step": 8120 }, { "epoch": 2.9725776965265083, "grad_norm": 0.0, "learning_rate": 1.2164533820840951e-05, "loss": 0.0, "step": 8130 }, { "epoch": 2.976234003656307, "grad_norm": 0.0, "learning_rate": 1.2142595978062159e-05, "loss": 0.0, "step": 8140 }, { "epoch": 2.979890310786106, "grad_norm": 0.0, "learning_rate": 1.2120658135283363e-05, "loss": 0.0, "step": 8150 }, { "epoch": 2.9835466179159047, "grad_norm": 0.0, "learning_rate": 1.2098720292504571e-05, "loss": 0.0, "step": 8160 }, { "epoch": 2.987202925045704, "grad_norm": 0.0, "learning_rate": 1.2076782449725777e-05, "loss": 0.0, "step": 8170 }, { "epoch": 2.9908592321755028, "grad_norm": 0.0, "learning_rate": 1.2054844606946983e-05, "loss": 0.0, "step": 8180 }, { "epoch": 2.9945155393053016, "grad_norm": 0.0, "learning_rate": 1.2032906764168191e-05, "loss": 0.0, "step": 8190 }, { "epoch": 2.998171846435101, "grad_norm": 0.0, "learning_rate": 1.2010968921389397e-05, "loss": 0.0, "step": 8200 }, { "epoch": 3.0018281535648996, "grad_norm": 0.0, "learning_rate": 1.1989031078610603e-05, "loss": 0.0, "step": 8210 }, { "epoch": 3.0054844606946984, "grad_norm": 0.0, "learning_rate": 1.196709323583181e-05, "loss": 0.0, "step": 8220 }, { "epoch": 3.0091407678244972, "grad_norm": 0.0, "learning_rate": 1.1945155393053017e-05, "loss": 0.0, "step": 8230 }, { "epoch": 3.012797074954296, "grad_norm": 0.0, "learning_rate": 1.1923217550274223e-05, "loss": 0.0, "step": 8240 }, { "epoch": 3.016453382084095, "grad_norm": 0.0, "learning_rate": 1.1901279707495431e-05, "loss": 0.0, "step": 8250 }, { "epoch": 3.020109689213894, "grad_norm": 0.0, "learning_rate": 1.1879341864716636e-05, "loss": 0.0, "step": 8260 }, { "epoch": 3.023765996343693, "grad_norm": 0.0, "learning_rate": 1.1857404021937844e-05, "loss": 0.0, "step": 8270 }, { "epoch": 3.0274223034734917, "grad_norm": 0.0, "learning_rate": 1.183546617915905e-05, "loss": 0.0, "step": 8280 }, { "epoch": 3.0310786106032905, "grad_norm": 0.0, "learning_rate": 1.1813528336380256e-05, "loss": 0.0, "step": 8290 }, { "epoch": 3.03473491773309, "grad_norm": 0.0, "learning_rate": 1.1791590493601464e-05, "loss": 0.0, "step": 8300 }, { "epoch": 3.0383912248628886, "grad_norm": 0.0, "learning_rate": 1.1769652650822668e-05, "loss": 0.0, "step": 8310 }, { "epoch": 3.0420475319926874, "grad_norm": 0.0, "learning_rate": 1.1747714808043876e-05, "loss": 0.0, "step": 8320 }, { "epoch": 3.045703839122486, "grad_norm": 0.0, "learning_rate": 1.1725776965265082e-05, "loss": 0.0, "step": 8330 }, { "epoch": 3.049360146252285, "grad_norm": 0.0, "learning_rate": 1.170383912248629e-05, "loss": 0.0, "step": 8340 }, { "epoch": 3.0530164533820843, "grad_norm": 0.0, "learning_rate": 1.1681901279707496e-05, "loss": 0.0, "step": 8350 }, { "epoch": 3.056672760511883, "grad_norm": 0.0, "learning_rate": 1.1659963436928702e-05, "loss": 0.0, "step": 8360 }, { "epoch": 3.060329067641682, "grad_norm": 0.0, "learning_rate": 1.1638025594149908e-05, "loss": 0.0, "step": 8370 }, { "epoch": 3.0639853747714807, "grad_norm": 0.0, "learning_rate": 1.1616087751371116e-05, "loss": 0.0, "step": 8380 }, { "epoch": 3.0676416819012795, "grad_norm": 0.0, "learning_rate": 1.1594149908592322e-05, "loss": 0.0, "step": 8390 }, { "epoch": 3.0712979890310788, "grad_norm": 0.0, "learning_rate": 1.157221206581353e-05, "loss": 0.0, "step": 8400 }, { "epoch": 3.0749542961608776, "grad_norm": 0.0, "learning_rate": 1.1550274223034735e-05, "loss": 0.0, "step": 8410 }, { "epoch": 3.0786106032906764, "grad_norm": 0.0, "learning_rate": 1.152833638025594e-05, "loss": 0.0, "step": 8420 }, { "epoch": 3.082266910420475, "grad_norm": 0.0, "learning_rate": 1.1506398537477149e-05, "loss": 0.0, "step": 8430 }, { "epoch": 3.0859232175502744, "grad_norm": 0.0, "learning_rate": 1.1484460694698355e-05, "loss": 0.0, "step": 8440 }, { "epoch": 3.0895795246800732, "grad_norm": 0.0, "learning_rate": 1.1462522851919563e-05, "loss": 0.0, "step": 8450 }, { "epoch": 3.093235831809872, "grad_norm": 0.0, "learning_rate": 1.1440585009140767e-05, "loss": 0.0, "step": 8460 }, { "epoch": 3.096892138939671, "grad_norm": 0.0, "learning_rate": 1.1418647166361975e-05, "loss": 0.0, "step": 8470 }, { "epoch": 3.1005484460694697, "grad_norm": 0.0, "learning_rate": 1.1396709323583181e-05, "loss": 0.0, "step": 8480 }, { "epoch": 3.104204753199269, "grad_norm": 0.0, "learning_rate": 1.1374771480804389e-05, "loss": 0.0, "step": 8490 }, { "epoch": 3.1078610603290677, "grad_norm": 0.0, "learning_rate": 1.1352833638025595e-05, "loss": 0.0, "step": 8500 }, { "epoch": 3.1115173674588665, "grad_norm": 0.0, "learning_rate": 1.1330895795246801e-05, "loss": 0.0, "step": 8510 }, { "epoch": 3.1151736745886653, "grad_norm": 0.0, "learning_rate": 1.1308957952468007e-05, "loss": 0.0, "step": 8520 }, { "epoch": 3.118829981718464, "grad_norm": 0.0, "learning_rate": 1.1287020109689213e-05, "loss": 0.0, "step": 8530 }, { "epoch": 3.1224862888482634, "grad_norm": 0.0, "learning_rate": 1.1265082266910421e-05, "loss": 0.0, "step": 8540 }, { "epoch": 3.126142595978062, "grad_norm": 0.0, "learning_rate": 1.1243144424131627e-05, "loss": 0.0, "step": 8550 }, { "epoch": 3.129798903107861, "grad_norm": 0.0, "learning_rate": 1.1221206581352834e-05, "loss": 0.0, "step": 8560 }, { "epoch": 3.13345521023766, "grad_norm": 0.0, "learning_rate": 1.119926873857404e-05, "loss": 0.0, "step": 8570 }, { "epoch": 3.137111517367459, "grad_norm": 0.0, "learning_rate": 1.1177330895795248e-05, "loss": 0.0, "step": 8580 }, { "epoch": 3.140767824497258, "grad_norm": 0.0, "learning_rate": 1.1155393053016454e-05, "loss": 0.0, "step": 8590 }, { "epoch": 3.1444241316270567, "grad_norm": 0.0, "learning_rate": 1.1133455210237662e-05, "loss": 0.0, "step": 8600 }, { "epoch": 3.1480804387568555, "grad_norm": 0.0, "learning_rate": 1.1111517367458866e-05, "loss": 0.0, "step": 8610 }, { "epoch": 3.1517367458866543, "grad_norm": 0.0, "learning_rate": 1.1089579524680074e-05, "loss": 0.0, "step": 8620 }, { "epoch": 3.1553930530164536, "grad_norm": 0.0, "learning_rate": 1.106764168190128e-05, "loss": 0.0, "step": 8630 }, { "epoch": 3.1590493601462524, "grad_norm": 0.0, "learning_rate": 1.1045703839122488e-05, "loss": 0.0, "step": 8640 }, { "epoch": 3.162705667276051, "grad_norm": 0.0, "learning_rate": 1.1023765996343694e-05, "loss": 0.0, "step": 8650 }, { "epoch": 3.16636197440585, "grad_norm": 0.0, "learning_rate": 1.1001828153564898e-05, "loss": 0.0, "step": 8660 }, { "epoch": 3.170018281535649, "grad_norm": 0.0, "learning_rate": 1.0979890310786106e-05, "loss": 0.0, "step": 8670 }, { "epoch": 3.173674588665448, "grad_norm": 0.0, "learning_rate": 1.0957952468007312e-05, "loss": 0.0, "step": 8680 }, { "epoch": 3.177330895795247, "grad_norm": 0.0, "learning_rate": 1.093601462522852e-05, "loss": 0.0, "step": 8690 }, { "epoch": 3.1809872029250457, "grad_norm": 0.0, "learning_rate": 1.0914076782449726e-05, "loss": 0.0, "step": 8700 }, { "epoch": 3.1846435100548445, "grad_norm": 0.0, "learning_rate": 1.0892138939670932e-05, "loss": 0.0, "step": 8710 }, { "epoch": 3.1882998171846433, "grad_norm": 0.0, "learning_rate": 1.0870201096892139e-05, "loss": 0.0, "step": 8720 }, { "epoch": 3.1919561243144425, "grad_norm": 0.0, "learning_rate": 1.0848263254113346e-05, "loss": 0.0, "step": 8730 }, { "epoch": 3.1956124314442413, "grad_norm": 0.0, "learning_rate": 1.0826325411334553e-05, "loss": 0.0, "step": 8740 }, { "epoch": 3.19926873857404, "grad_norm": 0.0, "learning_rate": 1.080438756855576e-05, "loss": 0.0, "step": 8750 }, { "epoch": 3.202925045703839, "grad_norm": 0.0, "learning_rate": 1.0782449725776965e-05, "loss": 0.0, "step": 8760 }, { "epoch": 3.206581352833638, "grad_norm": 0.0, "learning_rate": 1.0760511882998171e-05, "loss": 0.0, "step": 8770 }, { "epoch": 3.210237659963437, "grad_norm": 0.0, "learning_rate": 1.0738574040219379e-05, "loss": 0.0, "step": 8780 }, { "epoch": 3.213893967093236, "grad_norm": 0.0, "learning_rate": 1.0716636197440585e-05, "loss": 0.0, "step": 8790 }, { "epoch": 3.2175502742230346, "grad_norm": 0.0, "learning_rate": 1.0694698354661793e-05, "loss": 0.0, "step": 8800 }, { "epoch": 3.2212065813528334, "grad_norm": 0.0, "learning_rate": 1.0672760511882997e-05, "loss": 0.0, "step": 8810 }, { "epoch": 3.2248628884826327, "grad_norm": 0.0, "learning_rate": 1.0650822669104205e-05, "loss": 0.0, "step": 8820 }, { "epoch": 3.2285191956124315, "grad_norm": 0.0, "learning_rate": 1.0628884826325411e-05, "loss": 0.0, "step": 8830 }, { "epoch": 3.2321755027422303, "grad_norm": 0.0, "learning_rate": 1.0606946983546619e-05, "loss": 0.0, "step": 8840 }, { "epoch": 3.235831809872029, "grad_norm": 0.0, "learning_rate": 1.0585009140767825e-05, "loss": 0.0, "step": 8850 }, { "epoch": 3.2394881170018284, "grad_norm": 0.0, "learning_rate": 1.0563071297989031e-05, "loss": 0.0, "step": 8860 }, { "epoch": 3.243144424131627, "grad_norm": 0.0, "learning_rate": 1.0541133455210237e-05, "loss": 0.0, "step": 8870 }, { "epoch": 3.246800731261426, "grad_norm": 0.0, "learning_rate": 1.0519195612431444e-05, "loss": 0.0, "step": 8880 }, { "epoch": 3.250457038391225, "grad_norm": 0.0, "learning_rate": 1.0497257769652651e-05, "loss": 0.0, "step": 8890 }, { "epoch": 3.2541133455210236, "grad_norm": 0.0, "learning_rate": 1.0475319926873858e-05, "loss": 0.0, "step": 8900 }, { "epoch": 3.257769652650823, "grad_norm": 0.0, "learning_rate": 1.0453382084095064e-05, "loss": 0.0, "step": 8910 }, { "epoch": 3.2614259597806217, "grad_norm": 0.0, "learning_rate": 1.043144424131627e-05, "loss": 0.0, "step": 8920 }, { "epoch": 3.2650822669104205, "grad_norm": 0.0, "learning_rate": 1.0409506398537478e-05, "loss": 0.0, "step": 8930 }, { "epoch": 3.2687385740402193, "grad_norm": 0.0, "learning_rate": 1.0387568555758684e-05, "loss": 0.0, "step": 8940 }, { "epoch": 3.272394881170018, "grad_norm": 0.0, "learning_rate": 1.0365630712979892e-05, "loss": 0.0, "step": 8950 }, { "epoch": 3.2760511882998173, "grad_norm": 0.0, "learning_rate": 1.0343692870201096e-05, "loss": 0.0, "step": 8960 }, { "epoch": 3.279707495429616, "grad_norm": 0.0, "learning_rate": 1.0321755027422304e-05, "loss": 0.0, "step": 8970 }, { "epoch": 3.283363802559415, "grad_norm": 0.0, "learning_rate": 1.029981718464351e-05, "loss": 0.0, "step": 8980 }, { "epoch": 3.2870201096892138, "grad_norm": 0.0, "learning_rate": 1.0277879341864718e-05, "loss": 0.0, "step": 8990 }, { "epoch": 3.2906764168190126, "grad_norm": 0.0, "learning_rate": 1.0255941499085924e-05, "loss": 0.0, "step": 9000 }, { "epoch": 3.294332723948812, "grad_norm": 0.0, "learning_rate": 1.0234003656307129e-05, "loss": 0.0, "step": 9010 }, { "epoch": 3.2979890310786106, "grad_norm": 0.0, "learning_rate": 1.0212065813528336e-05, "loss": 0.0, "step": 9020 }, { "epoch": 3.3016453382084094, "grad_norm": 0.0, "learning_rate": 1.0190127970749543e-05, "loss": 0.0, "step": 9030 }, { "epoch": 3.3053016453382082, "grad_norm": 0.0, "learning_rate": 1.016819012797075e-05, "loss": 0.0, "step": 9040 }, { "epoch": 3.3089579524680075, "grad_norm": 0.0, "learning_rate": 1.0146252285191956e-05, "loss": 0.0, "step": 9050 }, { "epoch": 3.3126142595978063, "grad_norm": 0.0, "learning_rate": 1.0124314442413163e-05, "loss": 0.0, "step": 9060 }, { "epoch": 3.316270566727605, "grad_norm": 0.0, "learning_rate": 1.0102376599634369e-05, "loss": 0.0, "step": 9070 }, { "epoch": 3.319926873857404, "grad_norm": 0.0, "learning_rate": 1.0080438756855577e-05, "loss": 0.0, "step": 9080 }, { "epoch": 3.3235831809872027, "grad_norm": 0.0, "learning_rate": 1.0058500914076783e-05, "loss": 0.0, "step": 9090 }, { "epoch": 3.327239488117002, "grad_norm": 0.0, "learning_rate": 1.003656307129799e-05, "loss": 0.0, "step": 9100 }, { "epoch": 3.330895795246801, "grad_norm": 0.0, "learning_rate": 1.0014625228519195e-05, "loss": 0.0, "step": 9110 }, { "epoch": 3.3345521023765996, "grad_norm": 0.0, "learning_rate": 9.992687385740401e-06, "loss": 0.0, "step": 9120 }, { "epoch": 3.3382084095063984, "grad_norm": 0.0, "learning_rate": 9.970749542961609e-06, "loss": 0.0, "step": 9130 }, { "epoch": 3.3418647166361977, "grad_norm": 0.0, "learning_rate": 9.948811700182815e-06, "loss": 0.0, "step": 9140 }, { "epoch": 3.3455210237659965, "grad_norm": 0.0, "learning_rate": 9.926873857404023e-06, "loss": 0.0, "step": 9150 }, { "epoch": 3.3491773308957953, "grad_norm": 0.0, "learning_rate": 9.904936014625227e-06, "loss": 0.0, "step": 9160 }, { "epoch": 3.352833638025594, "grad_norm": 0.0, "learning_rate": 9.882998171846435e-06, "loss": 0.0, "step": 9170 }, { "epoch": 3.356489945155393, "grad_norm": 0.0, "learning_rate": 9.861060329067641e-06, "loss": 0.0, "step": 9180 }, { "epoch": 3.360146252285192, "grad_norm": 0.0, "learning_rate": 9.83912248628885e-06, "loss": 0.0, "step": 9190 }, { "epoch": 3.363802559414991, "grad_norm": 0.0, "learning_rate": 9.817184643510055e-06, "loss": 0.0, "step": 9200 }, { "epoch": 3.3674588665447898, "grad_norm": 0.0, "learning_rate": 9.795246800731262e-06, "loss": 0.0, "step": 9210 }, { "epoch": 3.3711151736745886, "grad_norm": 0.0, "learning_rate": 9.773308957952468e-06, "loss": 0.0, "step": 9220 }, { "epoch": 3.3747714808043874, "grad_norm": 0.0, "learning_rate": 9.751371115173675e-06, "loss": 0.0, "step": 9230 }, { "epoch": 3.3784277879341866, "grad_norm": 0.0, "learning_rate": 9.729433272394882e-06, "loss": 0.0, "step": 9240 }, { "epoch": 3.3820840950639854, "grad_norm": 0.0, "learning_rate": 9.707495429616088e-06, "loss": 0.0, "step": 9250 }, { "epoch": 3.3857404021937842, "grad_norm": 0.0, "learning_rate": 9.685557586837294e-06, "loss": 0.0, "step": 9260 }, { "epoch": 3.389396709323583, "grad_norm": 0.0, "learning_rate": 9.6636197440585e-06, "loss": 0.0, "step": 9270 }, { "epoch": 3.393053016453382, "grad_norm": 0.0, "learning_rate": 9.641681901279708e-06, "loss": 0.0, "step": 9280 }, { "epoch": 3.396709323583181, "grad_norm": 0.0, "learning_rate": 9.619744058500914e-06, "loss": 0.0, "step": 9290 }, { "epoch": 3.40036563071298, "grad_norm": 0.0, "learning_rate": 9.597806215722122e-06, "loss": 0.0, "step": 9300 }, { "epoch": 3.4040219378427787, "grad_norm": 0.0, "learning_rate": 9.575868372943328e-06, "loss": 0.0, "step": 9310 }, { "epoch": 3.4076782449725775, "grad_norm": 0.0, "learning_rate": 9.553930530164534e-06, "loss": 0.0, "step": 9320 }, { "epoch": 3.411334552102377, "grad_norm": 0.0, "learning_rate": 9.53199268738574e-06, "loss": 0.0, "step": 9330 }, { "epoch": 3.4149908592321756, "grad_norm": 0.0, "learning_rate": 9.510054844606948e-06, "loss": 0.0, "step": 9340 }, { "epoch": 3.4186471663619744, "grad_norm": 0.0, "learning_rate": 9.488117001828154e-06, "loss": 0.0, "step": 9350 }, { "epoch": 3.422303473491773, "grad_norm": 0.0, "learning_rate": 9.46617915904936e-06, "loss": 0.0, "step": 9360 }, { "epoch": 3.425959780621572, "grad_norm": 0.0, "learning_rate": 9.444241316270567e-06, "loss": 0.0, "step": 9370 }, { "epoch": 3.4296160877513713, "grad_norm": 0.0, "learning_rate": 9.422303473491773e-06, "loss": 0.0, "step": 9380 }, { "epoch": 3.43327239488117, "grad_norm": 0.0, "learning_rate": 9.40036563071298e-06, "loss": 0.0, "step": 9390 }, { "epoch": 3.436928702010969, "grad_norm": 0.0, "learning_rate": 9.378427787934187e-06, "loss": 0.0, "step": 9400 }, { "epoch": 3.4405850091407677, "grad_norm": 0.0, "learning_rate": 9.356489945155395e-06, "loss": 0.0, "step": 9410 }, { "epoch": 3.444241316270567, "grad_norm": 0.0, "learning_rate": 9.334552102376599e-06, "loss": 0.0, "step": 9420 }, { "epoch": 3.4478976234003658, "grad_norm": 0.0, "learning_rate": 9.312614259597807e-06, "loss": 0.0, "step": 9430 }, { "epoch": 3.4515539305301646, "grad_norm": 0.0, "learning_rate": 9.290676416819013e-06, "loss": 0.0, "step": 9440 }, { "epoch": 3.4552102376599634, "grad_norm": 0.0, "learning_rate": 9.26873857404022e-06, "loss": 0.0, "step": 9450 }, { "epoch": 3.458866544789762, "grad_norm": 0.0, "learning_rate": 9.246800731261427e-06, "loss": 0.0, "step": 9460 }, { "epoch": 3.4625228519195614, "grad_norm": 0.0, "learning_rate": 9.224862888482633e-06, "loss": 0.0, "step": 9470 }, { "epoch": 3.4661791590493602, "grad_norm": 0.0, "learning_rate": 9.20292504570384e-06, "loss": 0.0, "step": 9480 }, { "epoch": 3.469835466179159, "grad_norm": 0.0, "learning_rate": 9.180987202925045e-06, "loss": 0.0, "step": 9490 }, { "epoch": 3.473491773308958, "grad_norm": 0.0, "learning_rate": 9.159049360146253e-06, "loss": 0.0, "step": 9500 }, { "epoch": 3.4771480804387567, "grad_norm": 0.0, "learning_rate": 9.13711151736746e-06, "loss": 0.0, "step": 9510 }, { "epoch": 3.480804387568556, "grad_norm": 0.0, "learning_rate": 9.115173674588665e-06, "loss": 0.0, "step": 9520 }, { "epoch": 3.4844606946983547, "grad_norm": 0.0, "learning_rate": 9.093235831809872e-06, "loss": 0.0, "step": 9530 }, { "epoch": 3.4881170018281535, "grad_norm": 0.0, "learning_rate": 9.07129798903108e-06, "loss": 0.0, "step": 9540 }, { "epoch": 3.4917733089579523, "grad_norm": 0.0, "learning_rate": 9.049360146252286e-06, "loss": 0.0, "step": 9550 }, { "epoch": 3.495429616087751, "grad_norm": 0.0, "learning_rate": 9.027422303473493e-06, "loss": 0.0, "step": 9560 }, { "epoch": 3.4990859232175504, "grad_norm": 0.0, "learning_rate": 9.005484460694698e-06, "loss": 0.0, "step": 9570 }, { "epoch": 3.502742230347349, "grad_norm": 0.0, "learning_rate": 8.983546617915906e-06, "loss": 0.0, "step": 9580 }, { "epoch": 3.506398537477148, "grad_norm": 0.0, "learning_rate": 8.961608775137112e-06, "loss": 0.0, "step": 9590 }, { "epoch": 3.510054844606947, "grad_norm": 0.0, "learning_rate": 8.939670932358318e-06, "loss": 0.0, "step": 9600 }, { "epoch": 3.5137111517367456, "grad_norm": 0.0, "learning_rate": 8.917733089579526e-06, "loss": 0.0, "step": 9610 }, { "epoch": 3.517367458866545, "grad_norm": 0.0, "learning_rate": 8.89579524680073e-06, "loss": 0.0, "step": 9620 }, { "epoch": 3.5210237659963437, "grad_norm": 0.0, "learning_rate": 8.873857404021938e-06, "loss": 0.0, "step": 9630 }, { "epoch": 3.5246800731261425, "grad_norm": 0.0, "learning_rate": 8.851919561243144e-06, "loss": 0.0, "step": 9640 }, { "epoch": 3.5283363802559418, "grad_norm": 0.0, "learning_rate": 8.829981718464352e-06, "loss": 0.0, "step": 9650 }, { "epoch": 3.53199268738574, "grad_norm": 0.0, "learning_rate": 8.808043875685558e-06, "loss": 0.0, "step": 9660 }, { "epoch": 3.5356489945155394, "grad_norm": 0.0, "learning_rate": 8.786106032906764e-06, "loss": 0.0, "step": 9670 }, { "epoch": 3.539305301645338, "grad_norm": 0.0, "learning_rate": 8.76416819012797e-06, "loss": 0.0, "step": 9680 }, { "epoch": 3.542961608775137, "grad_norm": 0.0, "learning_rate": 8.742230347349178e-06, "loss": 0.0, "step": 9690 }, { "epoch": 3.5466179159049362, "grad_norm": 0.0, "learning_rate": 8.720292504570384e-06, "loss": 0.0, "step": 9700 }, { "epoch": 3.550274223034735, "grad_norm": 0.0, "learning_rate": 8.69835466179159e-06, "loss": 0.0, "step": 9710 }, { "epoch": 3.553930530164534, "grad_norm": 0.0, "learning_rate": 8.676416819012797e-06, "loss": 0.0, "step": 9720 }, { "epoch": 3.5575868372943327, "grad_norm": 0.0, "learning_rate": 8.654478976234003e-06, "loss": 0.0, "step": 9730 }, { "epoch": 3.5612431444241315, "grad_norm": 0.0, "learning_rate": 8.63254113345521e-06, "loss": 0.0, "step": 9740 }, { "epoch": 3.5648994515539307, "grad_norm": 0.0, "learning_rate": 8.610603290676417e-06, "loss": 0.0, "step": 9750 }, { "epoch": 3.5685557586837295, "grad_norm": 0.0, "learning_rate": 8.588665447897625e-06, "loss": 0.0, "step": 9760 }, { "epoch": 3.5722120658135283, "grad_norm": 0.0, "learning_rate": 8.566727605118829e-06, "loss": 0.0, "step": 9770 }, { "epoch": 3.575868372943327, "grad_norm": 0.0, "learning_rate": 8.544789762340037e-06, "loss": 0.0, "step": 9780 }, { "epoch": 3.579524680073126, "grad_norm": 0.0, "learning_rate": 8.522851919561243e-06, "loss": 0.0, "step": 9790 }, { "epoch": 3.583180987202925, "grad_norm": 0.0, "learning_rate": 8.500914076782451e-06, "loss": 0.0, "step": 9800 }, { "epoch": 3.586837294332724, "grad_norm": 0.0, "learning_rate": 8.478976234003657e-06, "loss": 0.0, "step": 9810 }, { "epoch": 3.590493601462523, "grad_norm": 0.0, "learning_rate": 8.457038391224863e-06, "loss": 0.0, "step": 9820 }, { "epoch": 3.5941499085923216, "grad_norm": 0.0, "learning_rate": 8.43510054844607e-06, "loss": 0.0, "step": 9830 }, { "epoch": 3.5978062157221204, "grad_norm": 0.0, "learning_rate": 8.413162705667276e-06, "loss": 0.0, "step": 9840 }, { "epoch": 3.6014625228519197, "grad_norm": 0.0, "learning_rate": 8.391224862888483e-06, "loss": 0.0, "step": 9850 }, { "epoch": 3.6051188299817185, "grad_norm": 0.0, "learning_rate": 8.36928702010969e-06, "loss": 0.0, "step": 9860 }, { "epoch": 3.6087751371115173, "grad_norm": 0.0, "learning_rate": 8.347349177330896e-06, "loss": 0.0, "step": 9870 }, { "epoch": 3.612431444241316, "grad_norm": 0.0, "learning_rate": 8.325411334552102e-06, "loss": 0.0, "step": 9880 }, { "epoch": 3.616087751371115, "grad_norm": 0.0, "learning_rate": 8.30347349177331e-06, "loss": 0.0, "step": 9890 }, { "epoch": 3.619744058500914, "grad_norm": 0.0, "learning_rate": 8.281535648994516e-06, "loss": 0.0, "step": 9900 }, { "epoch": 3.623400365630713, "grad_norm": 0.0, "learning_rate": 8.259597806215724e-06, "loss": 0.0, "step": 9910 }, { "epoch": 3.627056672760512, "grad_norm": 0.0, "learning_rate": 8.237659963436928e-06, "loss": 0.0, "step": 9920 }, { "epoch": 3.630712979890311, "grad_norm": 0.0, "learning_rate": 8.215722120658136e-06, "loss": 0.0, "step": 9930 }, { "epoch": 3.6343692870201094, "grad_norm": 0.0, "learning_rate": 8.193784277879342e-06, "loss": 0.0, "step": 9940 }, { "epoch": 3.6380255941499087, "grad_norm": 0.0, "learning_rate": 8.171846435100548e-06, "loss": 0.0, "step": 9950 }, { "epoch": 3.6416819012797075, "grad_norm": 0.0, "learning_rate": 8.149908592321756e-06, "loss": 0.0, "step": 9960 }, { "epoch": 3.6453382084095063, "grad_norm": 0.0, "learning_rate": 8.12797074954296e-06, "loss": 0.0, "step": 9970 }, { "epoch": 3.6489945155393055, "grad_norm": 0.0, "learning_rate": 8.106032906764168e-06, "loss": 0.0, "step": 9980 }, { "epoch": 3.6526508226691043, "grad_norm": 0.0, "learning_rate": 8.084095063985374e-06, "loss": 0.0, "step": 9990 }, { "epoch": 3.656307129798903, "grad_norm": 0.0, "learning_rate": 8.062157221206582e-06, "loss": 0.0, "step": 10000 }, { "epoch": 3.659963436928702, "grad_norm": 0.0, "learning_rate": 8.040219378427788e-06, "loss": 0.0, "step": 10010 }, { "epoch": 3.6636197440585008, "grad_norm": 0.0, "learning_rate": 8.018281535648995e-06, "loss": 0.0, "step": 10020 }, { "epoch": 3.6672760511883, "grad_norm": 0.0, "learning_rate": 7.9963436928702e-06, "loss": 0.0, "step": 10030 }, { "epoch": 3.670932358318099, "grad_norm": 0.0, "learning_rate": 7.974405850091408e-06, "loss": 0.0, "step": 10040 }, { "epoch": 3.6745886654478976, "grad_norm": 0.0, "learning_rate": 7.952468007312615e-06, "loss": 0.0, "step": 10050 }, { "epoch": 3.6782449725776964, "grad_norm": 0.0, "learning_rate": 7.930530164533822e-06, "loss": 0.0, "step": 10060 }, { "epoch": 3.6819012797074953, "grad_norm": 0.0, "learning_rate": 7.908592321755027e-06, "loss": 0.0, "step": 10070 }, { "epoch": 3.6855575868372945, "grad_norm": 0.0, "learning_rate": 7.886654478976233e-06, "loss": 0.0, "step": 10080 }, { "epoch": 3.6892138939670933, "grad_norm": 0.0, "learning_rate": 7.864716636197441e-06, "loss": 0.0, "step": 10090 }, { "epoch": 3.692870201096892, "grad_norm": 0.0, "learning_rate": 7.842778793418647e-06, "loss": 0.0, "step": 10100 }, { "epoch": 3.696526508226691, "grad_norm": 0.0, "learning_rate": 7.820840950639855e-06, "loss": 0.0, "step": 10110 }, { "epoch": 3.7001828153564897, "grad_norm": 0.0, "learning_rate": 7.79890310786106e-06, "loss": 0.0, "step": 10120 }, { "epoch": 3.703839122486289, "grad_norm": 0.0, "learning_rate": 7.776965265082267e-06, "loss": 0.0, "step": 10130 }, { "epoch": 3.707495429616088, "grad_norm": 0.0, "learning_rate": 7.755027422303473e-06, "loss": 0.0, "step": 10140 }, { "epoch": 3.7111517367458866, "grad_norm": 0.0, "learning_rate": 7.733089579524681e-06, "loss": 0.0, "step": 10150 }, { "epoch": 3.7148080438756854, "grad_norm": 0.0, "learning_rate": 7.711151736745887e-06, "loss": 0.0, "step": 10160 }, { "epoch": 3.7184643510054842, "grad_norm": 0.0, "learning_rate": 7.689213893967093e-06, "loss": 0.0, "step": 10170 }, { "epoch": 3.7221206581352835, "grad_norm": 0.0, "learning_rate": 7.6672760511883e-06, "loss": 0.0, "step": 10180 }, { "epoch": 3.7257769652650823, "grad_norm": 0.0, "learning_rate": 7.645338208409506e-06, "loss": 0.0, "step": 10190 }, { "epoch": 3.729433272394881, "grad_norm": 0.0, "learning_rate": 7.6234003656307135e-06, "loss": 0.0, "step": 10200 }, { "epoch": 3.7330895795246803, "grad_norm": 0.0, "learning_rate": 7.601462522851919e-06, "loss": 0.0, "step": 10210 }, { "epoch": 3.7367458866544787, "grad_norm": 0.0, "learning_rate": 7.579524680073127e-06, "loss": 0.0, "step": 10220 }, { "epoch": 3.740402193784278, "grad_norm": 0.0, "learning_rate": 7.557586837294333e-06, "loss": 0.0, "step": 10230 }, { "epoch": 3.7440585009140768, "grad_norm": 0.0, "learning_rate": 7.53564899451554e-06, "loss": 0.0, "step": 10240 }, { "epoch": 3.7477148080438756, "grad_norm": 0.0, "learning_rate": 7.513711151736746e-06, "loss": 0.0, "step": 10250 }, { "epoch": 3.751371115173675, "grad_norm": 0.0, "learning_rate": 7.491773308957952e-06, "loss": 0.0, "step": 10260 }, { "epoch": 3.7550274223034736, "grad_norm": 0.0, "learning_rate": 7.469835466179159e-06, "loss": 0.0, "step": 10270 }, { "epoch": 3.7586837294332724, "grad_norm": 0.0, "learning_rate": 7.447897623400366e-06, "loss": 0.0, "step": 10280 }, { "epoch": 3.7623400365630713, "grad_norm": 0.0, "learning_rate": 7.425959780621572e-06, "loss": 0.0, "step": 10290 }, { "epoch": 3.76599634369287, "grad_norm": 0.0, "learning_rate": 7.404021937842779e-06, "loss": 0.0, "step": 10300 }, { "epoch": 3.7696526508226693, "grad_norm": 0.0, "learning_rate": 7.382084095063985e-06, "loss": 0.0, "step": 10310 }, { "epoch": 3.773308957952468, "grad_norm": 0.0, "learning_rate": 7.360146252285192e-06, "loss": 0.0, "step": 10320 }, { "epoch": 3.776965265082267, "grad_norm": 0.0, "learning_rate": 7.338208409506399e-06, "loss": 0.0, "step": 10330 }, { "epoch": 3.7806215722120657, "grad_norm": 0.0, "learning_rate": 7.3162705667276054e-06, "loss": 0.0, "step": 10340 }, { "epoch": 3.7842778793418645, "grad_norm": 0.0, "learning_rate": 7.294332723948812e-06, "loss": 0.0, "step": 10350 }, { "epoch": 3.787934186471664, "grad_norm": 0.0, "learning_rate": 7.272394881170018e-06, "loss": 0.0, "step": 10360 }, { "epoch": 3.7915904936014626, "grad_norm": 0.0, "learning_rate": 7.250457038391225e-06, "loss": 0.0, "step": 10370 }, { "epoch": 3.7952468007312614, "grad_norm": 0.0, "learning_rate": 7.228519195612432e-06, "loss": 0.0, "step": 10380 }, { "epoch": 3.7989031078610602, "grad_norm": 0.0, "learning_rate": 7.206581352833638e-06, "loss": 0.0, "step": 10390 }, { "epoch": 3.802559414990859, "grad_norm": 0.0, "learning_rate": 7.184643510054845e-06, "loss": 0.0, "step": 10400 }, { "epoch": 3.8062157221206583, "grad_norm": 0.0, "learning_rate": 7.162705667276051e-06, "loss": 0.0, "step": 10410 }, { "epoch": 3.809872029250457, "grad_norm": 0.0, "learning_rate": 7.140767824497258e-06, "loss": 0.0, "step": 10420 }, { "epoch": 3.813528336380256, "grad_norm": 0.0, "learning_rate": 7.118829981718465e-06, "loss": 0.0, "step": 10430 }, { "epoch": 3.8171846435100547, "grad_norm": 0.0, "learning_rate": 7.096892138939671e-06, "loss": 0.0, "step": 10440 }, { "epoch": 3.8208409506398535, "grad_norm": 0.0, "learning_rate": 7.074954296160878e-06, "loss": 0.0, "step": 10450 }, { "epoch": 3.8244972577696528, "grad_norm": 0.0, "learning_rate": 7.053016453382084e-06, "loss": 0.0, "step": 10460 }, { "epoch": 3.8281535648994516, "grad_norm": 0.0, "learning_rate": 7.031078610603291e-06, "loss": 0.0, "step": 10470 }, { "epoch": 3.8318098720292504, "grad_norm": 0.0, "learning_rate": 7.009140767824497e-06, "loss": 0.0, "step": 10480 }, { "epoch": 3.835466179159049, "grad_norm": 0.0, "learning_rate": 6.9872029250457035e-06, "loss": 0.0, "step": 10490 }, { "epoch": 3.839122486288848, "grad_norm": 0.0, "learning_rate": 6.9652650822669105e-06, "loss": 0.0, "step": 10500 }, { "epoch": 3.8427787934186473, "grad_norm": 0.0, "learning_rate": 6.943327239488117e-06, "loss": 0.0, "step": 10510 }, { "epoch": 3.846435100548446, "grad_norm": 0.0, "learning_rate": 6.921389396709324e-06, "loss": 0.0, "step": 10520 }, { "epoch": 3.850091407678245, "grad_norm": 0.0, "learning_rate": 6.899451553930531e-06, "loss": 0.0, "step": 10530 }, { "epoch": 3.853747714808044, "grad_norm": 0.0, "learning_rate": 6.877513711151737e-06, "loss": 0.0, "step": 10540 }, { "epoch": 3.857404021937843, "grad_norm": 0.0, "learning_rate": 6.855575868372944e-06, "loss": 0.0, "step": 10550 }, { "epoch": 3.8610603290676417, "grad_norm": 0.0, "learning_rate": 6.83363802559415e-06, "loss": 0.0, "step": 10560 }, { "epoch": 3.8647166361974405, "grad_norm": 0.0, "learning_rate": 6.811700182815357e-06, "loss": 0.0, "step": 10570 }, { "epoch": 3.8683729433272394, "grad_norm": 0.0, "learning_rate": 6.789762340036564e-06, "loss": 0.0, "step": 10580 }, { "epoch": 3.8720292504570386, "grad_norm": 0.0, "learning_rate": 6.76782449725777e-06, "loss": 0.0, "step": 10590 }, { "epoch": 3.8756855575868374, "grad_norm": 0.0, "learning_rate": 6.745886654478976e-06, "loss": 0.0, "step": 10600 }, { "epoch": 3.8793418647166362, "grad_norm": 0.0, "learning_rate": 6.723948811700182e-06, "loss": 0.0, "step": 10610 }, { "epoch": 3.882998171846435, "grad_norm": 0.0, "learning_rate": 6.702010968921389e-06, "loss": 0.0, "step": 10620 }, { "epoch": 3.886654478976234, "grad_norm": 0.0, "learning_rate": 6.680073126142596e-06, "loss": 0.0, "step": 10630 }, { "epoch": 3.890310786106033, "grad_norm": 0.0, "learning_rate": 6.658135283363802e-06, "loss": 0.0, "step": 10640 }, { "epoch": 3.893967093235832, "grad_norm": 0.0, "learning_rate": 6.636197440585009e-06, "loss": 0.0, "step": 10650 }, { "epoch": 3.8976234003656307, "grad_norm": 0.0, "learning_rate": 6.6142595978062155e-06, "loss": 0.0, "step": 10660 }, { "epoch": 3.9012797074954295, "grad_norm": 0.0, "learning_rate": 6.5923217550274225e-06, "loss": 0.0, "step": 10670 }, { "epoch": 3.9049360146252283, "grad_norm": 0.0, "learning_rate": 6.5703839122486295e-06, "loss": 0.0, "step": 10680 }, { "epoch": 3.9085923217550276, "grad_norm": 0.0, "learning_rate": 6.548446069469836e-06, "loss": 0.0, "step": 10690 }, { "epoch": 3.9122486288848264, "grad_norm": 0.0, "learning_rate": 6.526508226691043e-06, "loss": 0.0, "step": 10700 }, { "epoch": 3.915904936014625, "grad_norm": 0.0, "learning_rate": 6.50457038391225e-06, "loss": 0.0, "step": 10710 }, { "epoch": 3.919561243144424, "grad_norm": 0.0, "learning_rate": 6.482632541133455e-06, "loss": 0.0, "step": 10720 }, { "epoch": 3.923217550274223, "grad_norm": 0.0, "learning_rate": 6.460694698354662e-06, "loss": 0.0, "step": 10730 }, { "epoch": 3.926873857404022, "grad_norm": 0.0, "learning_rate": 6.438756855575868e-06, "loss": 0.0, "step": 10740 }, { "epoch": 3.930530164533821, "grad_norm": 0.0, "learning_rate": 6.416819012797075e-06, "loss": 0.0, "step": 10750 }, { "epoch": 3.9341864716636197, "grad_norm": 0.0, "learning_rate": 6.394881170018282e-06, "loss": 0.0, "step": 10760 }, { "epoch": 3.9378427787934185, "grad_norm": 0.0, "learning_rate": 6.372943327239488e-06, "loss": 0.0, "step": 10770 }, { "epoch": 3.9414990859232173, "grad_norm": 0.0, "learning_rate": 6.351005484460695e-06, "loss": 0.0, "step": 10780 }, { "epoch": 3.9451553930530165, "grad_norm": 0.0, "learning_rate": 6.329067641681901e-06, "loss": 0.0, "step": 10790 }, { "epoch": 3.9488117001828154, "grad_norm": 0.0, "learning_rate": 6.307129798903108e-06, "loss": 0.0, "step": 10800 }, { "epoch": 3.952468007312614, "grad_norm": 0.0, "learning_rate": 6.285191956124315e-06, "loss": 0.0, "step": 10810 }, { "epoch": 3.9561243144424134, "grad_norm": 0.0, "learning_rate": 6.263254113345521e-06, "loss": 0.0, "step": 10820 }, { "epoch": 3.9597806215722122, "grad_norm": 0.0, "learning_rate": 6.241316270566728e-06, "loss": 0.0, "step": 10830 }, { "epoch": 3.963436928702011, "grad_norm": 0.0, "learning_rate": 6.219378427787934e-06, "loss": 0.0, "step": 10840 }, { "epoch": 3.96709323583181, "grad_norm": 0.0, "learning_rate": 6.197440585009141e-06, "loss": 0.0, "step": 10850 }, { "epoch": 3.9707495429616086, "grad_norm": 0.0, "learning_rate": 6.175502742230348e-06, "loss": 0.0, "step": 10860 }, { "epoch": 3.974405850091408, "grad_norm": 0.0, "learning_rate": 6.153564899451554e-06, "loss": 0.0, "step": 10870 }, { "epoch": 3.9780621572212067, "grad_norm": 0.0, "learning_rate": 6.131627056672761e-06, "loss": 0.0, "step": 10880 }, { "epoch": 3.9817184643510055, "grad_norm": 0.0, "learning_rate": 6.109689213893967e-06, "loss": 0.0, "step": 10890 }, { "epoch": 3.9853747714808043, "grad_norm": 0.0, "learning_rate": 6.087751371115174e-06, "loss": 0.0, "step": 10900 }, { "epoch": 3.989031078610603, "grad_norm": 0.0, "learning_rate": 6.065813528336381e-06, "loss": 0.0, "step": 10910 }, { "epoch": 3.9926873857404024, "grad_norm": 0.0, "learning_rate": 6.043875685557587e-06, "loss": 0.0, "step": 10920 }, { "epoch": 3.996343692870201, "grad_norm": 0.0, "learning_rate": 6.021937842778794e-06, "loss": 0.0, "step": 10930 }, { "epoch": 4.0, "grad_norm": 0.0, "learning_rate": 6e-06, "loss": 0.0, "step": 10940 }, { "epoch": 4.003656307129799, "grad_norm": 0.0, "learning_rate": 5.978062157221207e-06, "loss": 0.0, "step": 10950 }, { "epoch": 4.007312614259598, "grad_norm": 0.0, "learning_rate": 5.956124314442413e-06, "loss": 0.0, "step": 10960 }, { "epoch": 4.010968921389397, "grad_norm": 0.0, "learning_rate": 5.934186471663619e-06, "loss": 0.0, "step": 10970 }, { "epoch": 4.014625228519195, "grad_norm": 0.0, "learning_rate": 5.912248628884826e-06, "loss": 0.0, "step": 10980 }, { "epoch": 4.0182815356489945, "grad_norm": 0.0, "learning_rate": 5.8903107861060326e-06, "loss": 0.0, "step": 10990 }, { "epoch": 4.021937842778794, "grad_norm": 0.0, "learning_rate": 5.8683729433272395e-06, "loss": 0.0, "step": 11000 }, { "epoch": 4.025594149908592, "grad_norm": 0.0, "learning_rate": 5.8464351005484465e-06, "loss": 0.0, "step": 11010 }, { "epoch": 4.029250457038391, "grad_norm": 0.0, "learning_rate": 5.824497257769653e-06, "loss": 0.0, "step": 11020 }, { "epoch": 4.03290676416819, "grad_norm": 0.0, "learning_rate": 5.80255941499086e-06, "loss": 0.0, "step": 11030 }, { "epoch": 4.036563071297989, "grad_norm": 0.0, "learning_rate": 5.780621572212066e-06, "loss": 0.0, "step": 11040 }, { "epoch": 4.040219378427788, "grad_norm": 0.0, "learning_rate": 5.758683729433273e-06, "loss": 0.0, "step": 11050 }, { "epoch": 4.043875685557587, "grad_norm": 0.0, "learning_rate": 5.73674588665448e-06, "loss": 0.0, "step": 11060 }, { "epoch": 4.047531992687386, "grad_norm": 0.0, "learning_rate": 5.714808043875686e-06, "loss": 0.0, "step": 11070 }, { "epoch": 4.051188299817184, "grad_norm": 0.0, "learning_rate": 5.692870201096892e-06, "loss": 0.0, "step": 11080 }, { "epoch": 4.0548446069469835, "grad_norm": 0.0, "learning_rate": 5.670932358318098e-06, "loss": 0.0, "step": 11090 }, { "epoch": 4.058500914076783, "grad_norm": 0.0, "learning_rate": 5.648994515539305e-06, "loss": 0.0, "step": 11100 }, { "epoch": 4.062157221206581, "grad_norm": 0.0, "learning_rate": 5.627056672760512e-06, "loss": 0.0, "step": 11110 }, { "epoch": 4.06581352833638, "grad_norm": 0.0, "learning_rate": 5.605118829981718e-06, "loss": 0.0, "step": 11120 }, { "epoch": 4.06946983546618, "grad_norm": 0.0, "learning_rate": 5.583180987202925e-06, "loss": 0.0, "step": 11130 }, { "epoch": 4.073126142595978, "grad_norm": 0.0, "learning_rate": 5.5612431444241314e-06, "loss": 0.0, "step": 11140 }, { "epoch": 4.076782449725777, "grad_norm": 0.0, "learning_rate": 5.5393053016453384e-06, "loss": 0.0, "step": 11150 }, { "epoch": 4.0804387568555756, "grad_norm": 0.0, "learning_rate": 5.517367458866545e-06, "loss": 0.0, "step": 11160 }, { "epoch": 4.084095063985375, "grad_norm": 0.0, "learning_rate": 5.4954296160877516e-06, "loss": 0.0, "step": 11170 }, { "epoch": 4.087751371115174, "grad_norm": 0.0, "learning_rate": 5.4734917733089585e-06, "loss": 0.0, "step": 11180 }, { "epoch": 4.091407678244972, "grad_norm": 0.0, "learning_rate": 5.451553930530164e-06, "loss": 0.0, "step": 11190 }, { "epoch": 4.095063985374772, "grad_norm": 0.0, "learning_rate": 5.429616087751371e-06, "loss": 0.0, "step": 11200 }, { "epoch": 4.09872029250457, "grad_norm": 0.0, "learning_rate": 5.407678244972578e-06, "loss": 0.0, "step": 11210 }, { "epoch": 4.102376599634369, "grad_norm": 0.0, "learning_rate": 5.385740402193784e-06, "loss": 0.0, "step": 11220 }, { "epoch": 4.1060329067641685, "grad_norm": 0.0, "learning_rate": 5.363802559414991e-06, "loss": 0.0, "step": 11230 }, { "epoch": 4.109689213893967, "grad_norm": 0.0, "learning_rate": 5.341864716636198e-06, "loss": 0.0, "step": 11240 }, { "epoch": 4.113345521023766, "grad_norm": 0.0, "learning_rate": 5.319926873857404e-06, "loss": 0.0, "step": 11250 }, { "epoch": 4.1170018281535645, "grad_norm": 0.0, "learning_rate": 5.297989031078611e-06, "loss": 0.0, "step": 11260 }, { "epoch": 4.120658135283364, "grad_norm": 0.0, "learning_rate": 5.276051188299817e-06, "loss": 0.0, "step": 11270 }, { "epoch": 4.124314442413163, "grad_norm": 0.0, "learning_rate": 5.254113345521024e-06, "loss": 0.0, "step": 11280 }, { "epoch": 4.127970749542961, "grad_norm": 0.0, "learning_rate": 5.232175502742231e-06, "loss": 0.0, "step": 11290 }, { "epoch": 4.131627056672761, "grad_norm": 0.0, "learning_rate": 5.210237659963437e-06, "loss": 0.0, "step": 11300 }, { "epoch": 4.135283363802559, "grad_norm": 0.0, "learning_rate": 5.1882998171846435e-06, "loss": 0.0, "step": 11310 }, { "epoch": 4.138939670932358, "grad_norm": 0.0, "learning_rate": 5.16636197440585e-06, "loss": 0.0, "step": 11320 }, { "epoch": 4.1425959780621575, "grad_norm": 0.0, "learning_rate": 5.144424131627057e-06, "loss": 0.0, "step": 11330 }, { "epoch": 4.146252285191956, "grad_norm": 0.0, "learning_rate": 5.122486288848264e-06, "loss": 0.0, "step": 11340 }, { "epoch": 4.149908592321755, "grad_norm": 0.0, "learning_rate": 5.10054844606947e-06, "loss": 0.0, "step": 11350 }, { "epoch": 4.153564899451554, "grad_norm": 0.0, "learning_rate": 5.078610603290677e-06, "loss": 0.0, "step": 11360 }, { "epoch": 4.157221206581353, "grad_norm": 0.0, "learning_rate": 5.056672760511883e-06, "loss": 0.0, "step": 11370 }, { "epoch": 4.160877513711152, "grad_norm": 0.0, "learning_rate": 5.03473491773309e-06, "loss": 0.0, "step": 11380 }, { "epoch": 4.16453382084095, "grad_norm": 0.0, "learning_rate": 5.012797074954297e-06, "loss": 0.0, "step": 11390 }, { "epoch": 4.16819012797075, "grad_norm": 0.0, "learning_rate": 4.990859232175503e-06, "loss": 0.0, "step": 11400 }, { "epoch": 4.171846435100549, "grad_norm": 0.0, "learning_rate": 4.96892138939671e-06, "loss": 0.0, "step": 11410 }, { "epoch": 4.175502742230347, "grad_norm": 0.0, "learning_rate": 4.946983546617916e-06, "loss": 0.0, "step": 11420 }, { "epoch": 4.1791590493601465, "grad_norm": 0.0, "learning_rate": 4.925045703839122e-06, "loss": 0.0, "step": 11430 }, { "epoch": 4.182815356489945, "grad_norm": 0.0, "learning_rate": 4.903107861060329e-06, "loss": 0.0, "step": 11440 }, { "epoch": 4.186471663619744, "grad_norm": 0.0, "learning_rate": 4.881170018281535e-06, "loss": 0.0, "step": 11450 }, { "epoch": 4.190127970749543, "grad_norm": 0.0, "learning_rate": 4.859232175502742e-06, "loss": 0.0, "step": 11460 }, { "epoch": 4.193784277879342, "grad_norm": 0.0, "learning_rate": 4.8372943327239485e-06, "loss": 0.0, "step": 11470 }, { "epoch": 4.197440585009141, "grad_norm": 0.0, "learning_rate": 4.8153564899451555e-06, "loss": 0.0, "step": 11480 }, { "epoch": 4.201096892138939, "grad_norm": 0.0, "learning_rate": 4.7934186471663625e-06, "loss": 0.0, "step": 11490 }, { "epoch": 4.204753199268739, "grad_norm": 0.0, "learning_rate": 4.771480804387569e-06, "loss": 0.0, "step": 11500 }, { "epoch": 4.208409506398538, "grad_norm": 0.0, "learning_rate": 4.749542961608776e-06, "loss": 0.0, "step": 11510 }, { "epoch": 4.212065813528336, "grad_norm": 0.0, "learning_rate": 4.727605118829982e-06, "loss": 0.0, "step": 11520 }, { "epoch": 4.2157221206581355, "grad_norm": 0.0, "learning_rate": 4.705667276051189e-06, "loss": 0.0, "step": 11530 }, { "epoch": 4.219378427787934, "grad_norm": 0.0, "learning_rate": 4.683729433272396e-06, "loss": 0.0, "step": 11540 }, { "epoch": 4.223034734917733, "grad_norm": 0.0, "learning_rate": 4.661791590493601e-06, "loss": 0.0, "step": 11550 }, { "epoch": 4.226691042047532, "grad_norm": 0.0, "learning_rate": 4.639853747714808e-06, "loss": 0.0, "step": 11560 }, { "epoch": 4.230347349177331, "grad_norm": 0.0, "learning_rate": 4.617915904936014e-06, "loss": 0.0, "step": 11570 }, { "epoch": 4.23400365630713, "grad_norm": 0.0, "learning_rate": 4.595978062157221e-06, "loss": 0.0, "step": 11580 }, { "epoch": 4.237659963436928, "grad_norm": 0.0, "learning_rate": 4.574040219378428e-06, "loss": 0.0, "step": 11590 }, { "epoch": 4.2413162705667276, "grad_norm": 0.0, "learning_rate": 4.552102376599634e-06, "loss": 0.0, "step": 11600 }, { "epoch": 4.244972577696527, "grad_norm": 0.0, "learning_rate": 4.530164533820841e-06, "loss": 0.0, "step": 11610 }, { "epoch": 4.248628884826325, "grad_norm": 0.0, "learning_rate": 4.508226691042047e-06, "loss": 0.0, "step": 11620 }, { "epoch": 4.252285191956124, "grad_norm": 0.0, "learning_rate": 4.486288848263254e-06, "loss": 0.0, "step": 11630 }, { "epoch": 4.255941499085923, "grad_norm": 0.0, "learning_rate": 4.464351005484461e-06, "loss": 0.0, "step": 11640 }, { "epoch": 4.259597806215722, "grad_norm": 0.0, "learning_rate": 4.4424131627056675e-06, "loss": 0.0, "step": 11650 }, { "epoch": 4.263254113345521, "grad_norm": 0.0, "learning_rate": 4.4204753199268745e-06, "loss": 0.0, "step": 11660 }, { "epoch": 4.26691042047532, "grad_norm": 0.0, "learning_rate": 4.39853747714808e-06, "loss": 0.0, "step": 11670 }, { "epoch": 4.270566727605119, "grad_norm": 0.0, "learning_rate": 4.376599634369287e-06, "loss": 0.0, "step": 11680 }, { "epoch": 4.274223034734918, "grad_norm": 0.0, "learning_rate": 4.354661791590494e-06, "loss": 0.0, "step": 11690 }, { "epoch": 4.2778793418647165, "grad_norm": 0.0, "learning_rate": 4.3327239488117e-06, "loss": 0.0, "step": 11700 }, { "epoch": 4.281535648994516, "grad_norm": 0.0, "learning_rate": 4.310786106032907e-06, "loss": 0.0, "step": 11710 }, { "epoch": 4.285191956124314, "grad_norm": 0.0, "learning_rate": 4.288848263254113e-06, "loss": 0.0, "step": 11720 }, { "epoch": 4.288848263254113, "grad_norm": 0.0, "learning_rate": 4.26691042047532e-06, "loss": 0.0, "step": 11730 }, { "epoch": 4.292504570383913, "grad_norm": 0.0, "learning_rate": 4.244972577696527e-06, "loss": 0.0, "step": 11740 }, { "epoch": 4.296160877513711, "grad_norm": 0.0, "learning_rate": 4.223034734917733e-06, "loss": 0.0, "step": 11750 }, { "epoch": 4.29981718464351, "grad_norm": 0.0, "learning_rate": 4.20109689213894e-06, "loss": 0.0, "step": 11760 }, { "epoch": 4.303473491773309, "grad_norm": 0.0, "learning_rate": 4.179159049360146e-06, "loss": 0.0, "step": 11770 }, { "epoch": 4.307129798903108, "grad_norm": 0.0, "learning_rate": 4.157221206581353e-06, "loss": 0.0, "step": 11780 }, { "epoch": 4.310786106032907, "grad_norm": 0.0, "learning_rate": 4.135283363802559e-06, "loss": 0.0, "step": 11790 }, { "epoch": 4.3144424131627055, "grad_norm": 0.0, "learning_rate": 4.1133455210237655e-06, "loss": 0.0, "step": 11800 }, { "epoch": 4.318098720292505, "grad_norm": 0.0, "learning_rate": 4.0914076782449725e-06, "loss": 0.0, "step": 11810 }, { "epoch": 4.321755027422303, "grad_norm": 0.0, "learning_rate": 4.0694698354661795e-06, "loss": 0.0, "step": 11820 }, { "epoch": 4.325411334552102, "grad_norm": 0.0, "learning_rate": 4.047531992687386e-06, "loss": 0.0, "step": 11830 }, { "epoch": 4.329067641681902, "grad_norm": 0.0, "learning_rate": 4.025594149908593e-06, "loss": 0.0, "step": 11840 }, { "epoch": 4.3327239488117, "grad_norm": 0.0, "learning_rate": 4.003656307129799e-06, "loss": 0.0, "step": 11850 }, { "epoch": 4.336380255941499, "grad_norm": 0.0, "learning_rate": 3.981718464351006e-06, "loss": 0.0, "step": 11860 }, { "epoch": 4.340036563071298, "grad_norm": 0.0, "learning_rate": 3.959780621572213e-06, "loss": 0.0, "step": 11870 }, { "epoch": 4.343692870201097, "grad_norm": 0.0, "learning_rate": 3.937842778793419e-06, "loss": 0.0, "step": 11880 }, { "epoch": 4.347349177330896, "grad_norm": 0.0, "learning_rate": 3.915904936014626e-06, "loss": 0.0, "step": 11890 }, { "epoch": 4.3510054844606945, "grad_norm": 0.0, "learning_rate": 3.893967093235831e-06, "loss": 0.0, "step": 11900 }, { "epoch": 4.354661791590494, "grad_norm": 0.0, "learning_rate": 3.872029250457038e-06, "loss": 0.0, "step": 11910 }, { "epoch": 4.358318098720293, "grad_norm": 0.0, "learning_rate": 3.850091407678245e-06, "loss": 0.0, "step": 11920 }, { "epoch": 4.361974405850091, "grad_norm": 0.0, "learning_rate": 3.828153564899451e-06, "loss": 0.0, "step": 11930 }, { "epoch": 4.365630712979891, "grad_norm": 0.0, "learning_rate": 3.8062157221206583e-06, "loss": 0.0, "step": 11940 }, { "epoch": 4.369287020109689, "grad_norm": 0.0, "learning_rate": 3.784277879341865e-06, "loss": 0.0, "step": 11950 }, { "epoch": 4.372943327239488, "grad_norm": 0.0, "learning_rate": 3.7623400365630714e-06, "loss": 0.0, "step": 11960 }, { "epoch": 4.376599634369287, "grad_norm": 0.0, "learning_rate": 3.740402193784278e-06, "loss": 0.0, "step": 11970 }, { "epoch": 4.380255941499086, "grad_norm": 0.0, "learning_rate": 3.7184643510054846e-06, "loss": 0.0, "step": 11980 }, { "epoch": 4.383912248628885, "grad_norm": 0.0, "learning_rate": 3.696526508226691e-06, "loss": 0.0, "step": 11990 }, { "epoch": 4.387568555758683, "grad_norm": 0.0, "learning_rate": 3.6745886654478977e-06, "loss": 0.0, "step": 12000 } ], "logging_steps": 10, "max_steps": 13675, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }