{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 25688, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9992214263469324e-05, "loss": 3.6476, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9984428526938652e-05, "loss": 0.3029, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9976642790407974e-05, "loss": 0.4554, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.9968857053877296e-05, "loss": 0.3546, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.9961071317346625e-05, "loss": 0.4369, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9953285580815947e-05, "loss": 0.4187, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.9945499844285272e-05, "loss": 0.3758, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.9937714107754594e-05, "loss": 0.442, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.992992837122392e-05, "loss": 0.3408, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.9922142634693244e-05, "loss": 0.2933, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.9914356898162566e-05, "loss": 0.5627, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.990657116163189e-05, "loss": 0.406, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.9898785425101217e-05, "loss": 0.3029, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.989099968857054e-05, "loss": 0.3583, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.9883213952039864e-05, "loss": 0.4022, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.987542821550919e-05, "loss": 0.3508, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.986764247897851e-05, "loss": 0.3816, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.9859856742447837e-05, "loss": 0.422, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.9852071005917162e-05, "loss": 0.4494, "step": 190 }, { "epoch": 0.02, "learning_rate": 1.9844285269386484e-05, "loss": 0.3588, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.983649953285581e-05, "loss": 0.4859, "step": 210 }, { "epoch": 0.02, "learning_rate": 1.9828713796325135e-05, "loss": 0.4338, "step": 220 }, { "epoch": 0.02, "learning_rate": 1.9820928059794456e-05, "loss": 0.4002, "step": 230 }, { "epoch": 0.02, "learning_rate": 1.9813142323263782e-05, "loss": 0.4142, "step": 240 }, { "epoch": 0.02, "learning_rate": 1.9805356586733107e-05, "loss": 0.369, "step": 250 }, { "epoch": 0.02, "learning_rate": 1.979757085020243e-05, "loss": 0.2912, "step": 260 }, { "epoch": 0.02, "learning_rate": 1.9789785113671754e-05, "loss": 0.4347, "step": 270 }, { "epoch": 0.02, "learning_rate": 1.978199937714108e-05, "loss": 0.3762, "step": 280 }, { "epoch": 0.02, "learning_rate": 1.97742136406104e-05, "loss": 0.2991, "step": 290 }, { "epoch": 0.02, "learning_rate": 1.9766427904079727e-05, "loss": 0.355, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.9758642167549052e-05, "loss": 0.3871, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.9750856431018377e-05, "loss": 0.3469, "step": 320 }, { "epoch": 0.03, "learning_rate": 1.97430706944877e-05, "loss": 0.3732, "step": 330 }, { "epoch": 0.03, "learning_rate": 1.9735284957957025e-05, "loss": 0.3906, "step": 340 }, { "epoch": 0.03, "learning_rate": 1.972749922142635e-05, "loss": 0.3773, "step": 350 }, { "epoch": 0.03, "learning_rate": 1.9719713484895672e-05, "loss": 0.4073, "step": 360 }, { "epoch": 0.03, "learning_rate": 1.9711927748364997e-05, "loss": 0.3961, "step": 370 }, { "epoch": 0.03, "learning_rate": 1.9704142011834322e-05, "loss": 0.4076, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.9696356275303644e-05, "loss": 0.4342, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.968857053877297e-05, "loss": 0.4247, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.9680784802242295e-05, "loss": 0.4857, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.9672999065711617e-05, "loss": 0.3507, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.9665213329180942e-05, "loss": 0.447, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.9657427592650267e-05, "loss": 0.4799, "step": 440 }, { "epoch": 0.04, "learning_rate": 1.964964185611959e-05, "loss": 0.4324, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.9641856119588915e-05, "loss": 0.3724, "step": 460 }, { "epoch": 0.04, "learning_rate": 1.963407038305824e-05, "loss": 0.3797, "step": 470 }, { "epoch": 0.04, "learning_rate": 1.9626284646527562e-05, "loss": 0.342, "step": 480 }, { "epoch": 0.04, "learning_rate": 1.9618498909996887e-05, "loss": 0.3696, "step": 490 }, { "epoch": 0.04, "learning_rate": 1.9610713173466212e-05, "loss": 0.3531, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.9602927436935534e-05, "loss": 0.3707, "step": 510 }, { "epoch": 0.04, "learning_rate": 1.959514170040486e-05, "loss": 0.3054, "step": 520 }, { "epoch": 0.04, "learning_rate": 1.9587355963874185e-05, "loss": 0.333, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.9579570227343507e-05, "loss": 0.4072, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.9571784490812832e-05, "loss": 0.3363, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.9563998754282157e-05, "loss": 0.3723, "step": 560 }, { "epoch": 0.04, "learning_rate": 1.9556213017751483e-05, "loss": 0.4651, "step": 570 }, { "epoch": 0.05, "learning_rate": 1.9548427281220805e-05, "loss": 0.3766, "step": 580 }, { "epoch": 0.05, "learning_rate": 1.954064154469013e-05, "loss": 0.3247, "step": 590 }, { "epoch": 0.05, "learning_rate": 1.9532855808159455e-05, "loss": 0.4209, "step": 600 }, { "epoch": 0.05, "learning_rate": 1.9525070071628777e-05, "loss": 0.4038, "step": 610 }, { "epoch": 0.05, "learning_rate": 1.9517284335098102e-05, "loss": 0.305, "step": 620 }, { "epoch": 0.05, "learning_rate": 1.9509498598567428e-05, "loss": 0.4737, "step": 630 }, { "epoch": 0.05, "learning_rate": 1.950171286203675e-05, "loss": 0.3816, "step": 640 }, { "epoch": 0.05, "learning_rate": 1.9493927125506075e-05, "loss": 0.3406, "step": 650 }, { "epoch": 0.05, "learning_rate": 1.94861413889754e-05, "loss": 0.4612, "step": 660 }, { "epoch": 0.05, "learning_rate": 1.9478355652444722e-05, "loss": 0.396, "step": 670 }, { "epoch": 0.05, "learning_rate": 1.9470569915914047e-05, "loss": 0.2885, "step": 680 }, { "epoch": 0.05, "learning_rate": 1.9462784179383373e-05, "loss": 0.372, "step": 690 }, { "epoch": 0.05, "learning_rate": 1.9454998442852695e-05, "loss": 0.3646, "step": 700 }, { "epoch": 0.06, "learning_rate": 1.944721270632202e-05, "loss": 0.3517, "step": 710 }, { "epoch": 0.06, "learning_rate": 1.9439426969791345e-05, "loss": 0.3244, "step": 720 }, { "epoch": 0.06, "learning_rate": 1.9431641233260667e-05, "loss": 0.3107, "step": 730 }, { "epoch": 0.06, "learning_rate": 1.9423855496729992e-05, "loss": 0.4312, "step": 740 }, { "epoch": 0.06, "learning_rate": 1.9416069760199318e-05, "loss": 0.3501, "step": 750 }, { "epoch": 0.06, "learning_rate": 1.940828402366864e-05, "loss": 0.3394, "step": 760 }, { "epoch": 0.06, "learning_rate": 1.9400498287137965e-05, "loss": 0.2721, "step": 770 }, { "epoch": 0.06, "learning_rate": 1.939271255060729e-05, "loss": 0.3824, "step": 780 }, { "epoch": 0.06, "learning_rate": 1.9384926814076612e-05, "loss": 0.3357, "step": 790 }, { "epoch": 0.06, "learning_rate": 1.9377141077545937e-05, "loss": 0.5051, "step": 800 }, { "epoch": 0.06, "learning_rate": 1.9369355341015263e-05, "loss": 0.3467, "step": 810 }, { "epoch": 0.06, "learning_rate": 1.9361569604484588e-05, "loss": 0.3079, "step": 820 }, { "epoch": 0.06, "learning_rate": 1.935378386795391e-05, "loss": 0.3362, "step": 830 }, { "epoch": 0.07, "learning_rate": 1.9345998131423235e-05, "loss": 0.3535, "step": 840 }, { "epoch": 0.07, "learning_rate": 1.933821239489256e-05, "loss": 0.3338, "step": 850 }, { "epoch": 0.07, "learning_rate": 1.9330426658361883e-05, "loss": 0.3705, "step": 860 }, { "epoch": 0.07, "learning_rate": 1.9322640921831204e-05, "loss": 0.3507, "step": 870 }, { "epoch": 0.07, "learning_rate": 1.9314855185300533e-05, "loss": 0.2796, "step": 880 }, { "epoch": 0.07, "learning_rate": 1.9307069448769855e-05, "loss": 0.3828, "step": 890 }, { "epoch": 0.07, "learning_rate": 1.9299283712239177e-05, "loss": 0.3091, "step": 900 }, { "epoch": 0.07, "learning_rate": 1.9291497975708506e-05, "loss": 0.294, "step": 910 }, { "epoch": 0.07, "learning_rate": 1.9283712239177828e-05, "loss": 0.3353, "step": 920 }, { "epoch": 0.07, "learning_rate": 1.927592650264715e-05, "loss": 0.4199, "step": 930 }, { "epoch": 0.07, "learning_rate": 1.9268140766116478e-05, "loss": 0.4175, "step": 940 }, { "epoch": 0.07, "learning_rate": 1.92603550295858e-05, "loss": 0.396, "step": 950 }, { "epoch": 0.07, "learning_rate": 1.9252569293055125e-05, "loss": 0.3726, "step": 960 }, { "epoch": 0.08, "learning_rate": 1.924478355652445e-05, "loss": 0.4031, "step": 970 }, { "epoch": 0.08, "learning_rate": 1.9236997819993773e-05, "loss": 0.3575, "step": 980 }, { "epoch": 0.08, "learning_rate": 1.9229212083463098e-05, "loss": 0.3118, "step": 990 }, { "epoch": 0.08, "learning_rate": 1.922142634693242e-05, "loss": 0.272, "step": 1000 }, { "epoch": 0.08, "learning_rate": 1.9213640610401745e-05, "loss": 0.3698, "step": 1010 }, { "epoch": 0.08, "learning_rate": 1.920585487387107e-05, "loss": 0.3232, "step": 1020 }, { "epoch": 0.08, "learning_rate": 1.9198069137340392e-05, "loss": 0.4195, "step": 1030 }, { "epoch": 0.08, "learning_rate": 1.9190283400809718e-05, "loss": 0.377, "step": 1040 }, { "epoch": 0.08, "learning_rate": 1.9182497664279043e-05, "loss": 0.371, "step": 1050 }, { "epoch": 0.08, "learning_rate": 1.9174711927748365e-05, "loss": 0.3674, "step": 1060 }, { "epoch": 0.08, "learning_rate": 1.9166926191217693e-05, "loss": 0.3503, "step": 1070 }, { "epoch": 0.08, "learning_rate": 1.9159140454687015e-05, "loss": 0.3805, "step": 1080 }, { "epoch": 0.08, "learning_rate": 1.9151354718156337e-05, "loss": 0.3474, "step": 1090 }, { "epoch": 0.09, "learning_rate": 1.9143568981625663e-05, "loss": 0.3868, "step": 1100 }, { "epoch": 0.09, "learning_rate": 1.9135783245094988e-05, "loss": 0.3652, "step": 1110 }, { "epoch": 0.09, "learning_rate": 1.912799750856431e-05, "loss": 0.3521, "step": 1120 }, { "epoch": 0.09, "learning_rate": 1.9120211772033635e-05, "loss": 0.3004, "step": 1130 }, { "epoch": 0.09, "learning_rate": 1.911242603550296e-05, "loss": 0.3605, "step": 1140 }, { "epoch": 0.09, "learning_rate": 1.9104640298972282e-05, "loss": 0.2845, "step": 1150 }, { "epoch": 0.09, "learning_rate": 1.9096854562441608e-05, "loss": 0.3038, "step": 1160 }, { "epoch": 0.09, "learning_rate": 1.9089068825910933e-05, "loss": 0.3712, "step": 1170 }, { "epoch": 0.09, "learning_rate": 1.9081283089380255e-05, "loss": 0.303, "step": 1180 }, { "epoch": 0.09, "learning_rate": 1.907349735284958e-05, "loss": 0.2412, "step": 1190 }, { "epoch": 0.09, "learning_rate": 1.9065711616318905e-05, "loss": 0.3342, "step": 1200 }, { "epoch": 0.09, "learning_rate": 1.905792587978823e-05, "loss": 0.2995, "step": 1210 }, { "epoch": 0.09, "learning_rate": 1.9050140143257553e-05, "loss": 0.4164, "step": 1220 }, { "epoch": 0.1, "learning_rate": 1.9042354406726878e-05, "loss": 0.3316, "step": 1230 }, { "epoch": 0.1, "learning_rate": 1.9034568670196203e-05, "loss": 0.3575, "step": 1240 }, { "epoch": 0.1, "learning_rate": 1.9026782933665525e-05, "loss": 0.3399, "step": 1250 }, { "epoch": 0.1, "learning_rate": 1.901899719713485e-05, "loss": 0.3008, "step": 1260 }, { "epoch": 0.1, "learning_rate": 1.9011211460604176e-05, "loss": 0.3817, "step": 1270 }, { "epoch": 0.1, "learning_rate": 1.9003425724073498e-05, "loss": 0.2931, "step": 1280 }, { "epoch": 0.1, "learning_rate": 1.8995639987542823e-05, "loss": 0.3154, "step": 1290 }, { "epoch": 0.1, "learning_rate": 1.8987854251012148e-05, "loss": 0.3168, "step": 1300 }, { "epoch": 0.1, "learning_rate": 1.898006851448147e-05, "loss": 0.2592, "step": 1310 }, { "epoch": 0.1, "learning_rate": 1.8972282777950795e-05, "loss": 0.3804, "step": 1320 }, { "epoch": 0.1, "learning_rate": 1.896449704142012e-05, "loss": 0.3438, "step": 1330 }, { "epoch": 0.1, "learning_rate": 1.8956711304889443e-05, "loss": 0.3112, "step": 1340 }, { "epoch": 0.11, "learning_rate": 1.8948925568358768e-05, "loss": 0.3406, "step": 1350 }, { "epoch": 0.11, "learning_rate": 1.8941139831828093e-05, "loss": 0.4156, "step": 1360 }, { "epoch": 0.11, "learning_rate": 1.8933354095297415e-05, "loss": 0.3699, "step": 1370 }, { "epoch": 0.11, "learning_rate": 1.892556835876674e-05, "loss": 0.3685, "step": 1380 }, { "epoch": 0.11, "learning_rate": 1.8917782622236066e-05, "loss": 0.3602, "step": 1390 }, { "epoch": 0.11, "learning_rate": 1.8909996885705388e-05, "loss": 0.3602, "step": 1400 }, { "epoch": 0.11, "learning_rate": 1.8902211149174713e-05, "loss": 0.399, "step": 1410 }, { "epoch": 0.11, "learning_rate": 1.8894425412644038e-05, "loss": 0.3202, "step": 1420 }, { "epoch": 0.11, "learning_rate": 1.888663967611336e-05, "loss": 0.4622, "step": 1430 }, { "epoch": 0.11, "learning_rate": 1.8878853939582685e-05, "loss": 0.3126, "step": 1440 }, { "epoch": 0.11, "learning_rate": 1.887106820305201e-05, "loss": 0.3729, "step": 1450 }, { "epoch": 0.11, "learning_rate": 1.8863282466521336e-05, "loss": 0.345, "step": 1460 }, { "epoch": 0.11, "learning_rate": 1.8855496729990658e-05, "loss": 0.3146, "step": 1470 }, { "epoch": 0.12, "learning_rate": 1.8847710993459983e-05, "loss": 0.3615, "step": 1480 }, { "epoch": 0.12, "learning_rate": 1.883992525692931e-05, "loss": 0.3023, "step": 1490 }, { "epoch": 0.12, "learning_rate": 1.883213952039863e-05, "loss": 0.3833, "step": 1500 }, { "epoch": 0.12, "learning_rate": 1.8824353783867956e-05, "loss": 0.3573, "step": 1510 }, { "epoch": 0.12, "learning_rate": 1.881656804733728e-05, "loss": 0.2883, "step": 1520 }, { "epoch": 0.12, "learning_rate": 1.8808782310806603e-05, "loss": 0.4392, "step": 1530 }, { "epoch": 0.12, "learning_rate": 1.880099657427593e-05, "loss": 0.3942, "step": 1540 }, { "epoch": 0.12, "learning_rate": 1.8793210837745254e-05, "loss": 0.3247, "step": 1550 }, { "epoch": 0.12, "learning_rate": 1.8785425101214576e-05, "loss": 0.378, "step": 1560 }, { "epoch": 0.12, "learning_rate": 1.87776393646839e-05, "loss": 0.3076, "step": 1570 }, { "epoch": 0.12, "learning_rate": 1.8769853628153226e-05, "loss": 0.3093, "step": 1580 }, { "epoch": 0.12, "learning_rate": 1.8762067891622548e-05, "loss": 0.2823, "step": 1590 }, { "epoch": 0.12, "learning_rate": 1.8754282155091873e-05, "loss": 0.2736, "step": 1600 }, { "epoch": 0.13, "learning_rate": 1.87464964185612e-05, "loss": 0.2913, "step": 1610 }, { "epoch": 0.13, "learning_rate": 1.873871068203052e-05, "loss": 0.3343, "step": 1620 }, { "epoch": 0.13, "learning_rate": 1.8730924945499846e-05, "loss": 0.3567, "step": 1630 }, { "epoch": 0.13, "learning_rate": 1.872313920896917e-05, "loss": 0.3122, "step": 1640 }, { "epoch": 0.13, "learning_rate": 1.8715353472438493e-05, "loss": 0.2749, "step": 1650 }, { "epoch": 0.13, "learning_rate": 1.870756773590782e-05, "loss": 0.3274, "step": 1660 }, { "epoch": 0.13, "learning_rate": 1.8699781999377144e-05, "loss": 0.4423, "step": 1670 }, { "epoch": 0.13, "learning_rate": 1.8691996262846466e-05, "loss": 0.2746, "step": 1680 }, { "epoch": 0.13, "learning_rate": 1.868421052631579e-05, "loss": 0.3538, "step": 1690 }, { "epoch": 0.13, "learning_rate": 1.8676424789785116e-05, "loss": 0.3878, "step": 1700 }, { "epoch": 0.13, "learning_rate": 1.866863905325444e-05, "loss": 0.3795, "step": 1710 }, { "epoch": 0.13, "learning_rate": 1.8660853316723763e-05, "loss": 0.3302, "step": 1720 }, { "epoch": 0.13, "learning_rate": 1.865306758019309e-05, "loss": 0.3231, "step": 1730 }, { "epoch": 0.14, "learning_rate": 1.8645281843662414e-05, "loss": 0.3356, "step": 1740 }, { "epoch": 0.14, "learning_rate": 1.8637496107131736e-05, "loss": 0.3151, "step": 1750 }, { "epoch": 0.14, "learning_rate": 1.862971037060106e-05, "loss": 0.3759, "step": 1760 }, { "epoch": 0.14, "learning_rate": 1.8621924634070386e-05, "loss": 0.3534, "step": 1770 }, { "epoch": 0.14, "learning_rate": 1.861413889753971e-05, "loss": 0.303, "step": 1780 }, { "epoch": 0.14, "learning_rate": 1.860635316100903e-05, "loss": 0.2945, "step": 1790 }, { "epoch": 0.14, "learning_rate": 1.859856742447836e-05, "loss": 0.349, "step": 1800 }, { "epoch": 0.14, "learning_rate": 1.859078168794768e-05, "loss": 0.3398, "step": 1810 }, { "epoch": 0.14, "learning_rate": 1.8582995951417006e-05, "loss": 0.2744, "step": 1820 }, { "epoch": 0.14, "learning_rate": 1.857521021488633e-05, "loss": 0.3276, "step": 1830 }, { "epoch": 0.14, "learning_rate": 1.8567424478355653e-05, "loss": 0.2523, "step": 1840 }, { "epoch": 0.14, "learning_rate": 1.855963874182498e-05, "loss": 0.3286, "step": 1850 }, { "epoch": 0.14, "learning_rate": 1.8551853005294304e-05, "loss": 0.3119, "step": 1860 }, { "epoch": 0.15, "learning_rate": 1.8544067268763626e-05, "loss": 0.3293, "step": 1870 }, { "epoch": 0.15, "learning_rate": 1.853628153223295e-05, "loss": 0.3766, "step": 1880 }, { "epoch": 0.15, "learning_rate": 1.8528495795702277e-05, "loss": 0.2765, "step": 1890 }, { "epoch": 0.15, "learning_rate": 1.85207100591716e-05, "loss": 0.4001, "step": 1900 }, { "epoch": 0.15, "learning_rate": 1.8512924322640924e-05, "loss": 0.3359, "step": 1910 }, { "epoch": 0.15, "learning_rate": 1.8505138586110246e-05, "loss": 0.3543, "step": 1920 }, { "epoch": 0.15, "learning_rate": 1.849735284957957e-05, "loss": 0.3326, "step": 1930 }, { "epoch": 0.15, "learning_rate": 1.8489567113048896e-05, "loss": 0.2847, "step": 1940 }, { "epoch": 0.15, "learning_rate": 1.8481781376518218e-05, "loss": 0.3548, "step": 1950 }, { "epoch": 0.15, "learning_rate": 1.8473995639987547e-05, "loss": 0.3486, "step": 1960 }, { "epoch": 0.15, "learning_rate": 1.846620990345687e-05, "loss": 0.3246, "step": 1970 }, { "epoch": 0.15, "learning_rate": 1.845842416692619e-05, "loss": 0.4146, "step": 1980 }, { "epoch": 0.15, "learning_rate": 1.845063843039552e-05, "loss": 0.3075, "step": 1990 }, { "epoch": 0.16, "learning_rate": 1.844285269386484e-05, "loss": 0.3088, "step": 2000 }, { "epoch": 0.16, "learning_rate": 1.8435066957334163e-05, "loss": 0.3123, "step": 2010 }, { "epoch": 0.16, "learning_rate": 1.842728122080349e-05, "loss": 0.3547, "step": 2020 }, { "epoch": 0.16, "learning_rate": 1.8419495484272814e-05, "loss": 0.2637, "step": 2030 }, { "epoch": 0.16, "learning_rate": 1.8411709747742136e-05, "loss": 0.3146, "step": 2040 }, { "epoch": 0.16, "learning_rate": 1.840392401121146e-05, "loss": 0.2766, "step": 2050 }, { "epoch": 0.16, "learning_rate": 1.8396138274680786e-05, "loss": 0.3732, "step": 2060 }, { "epoch": 0.16, "learning_rate": 1.838835253815011e-05, "loss": 0.263, "step": 2070 }, { "epoch": 0.16, "learning_rate": 1.8380566801619433e-05, "loss": 0.3383, "step": 2080 }, { "epoch": 0.16, "learning_rate": 1.837278106508876e-05, "loss": 0.3495, "step": 2090 }, { "epoch": 0.16, "learning_rate": 1.8364995328558084e-05, "loss": 0.3986, "step": 2100 }, { "epoch": 0.16, "learning_rate": 1.8357209592027406e-05, "loss": 0.3264, "step": 2110 }, { "epoch": 0.17, "learning_rate": 1.834942385549673e-05, "loss": 0.3782, "step": 2120 }, { "epoch": 0.17, "learning_rate": 1.8341638118966057e-05, "loss": 0.3523, "step": 2130 }, { "epoch": 0.17, "learning_rate": 1.833385238243538e-05, "loss": 0.2724, "step": 2140 }, { "epoch": 0.17, "learning_rate": 1.8326066645904704e-05, "loss": 0.3245, "step": 2150 }, { "epoch": 0.17, "learning_rate": 1.831828090937403e-05, "loss": 0.259, "step": 2160 }, { "epoch": 0.17, "learning_rate": 1.831049517284335e-05, "loss": 0.4271, "step": 2170 }, { "epoch": 0.17, "learning_rate": 1.8302709436312676e-05, "loss": 0.2523, "step": 2180 }, { "epoch": 0.17, "learning_rate": 1.8294923699782e-05, "loss": 0.267, "step": 2190 }, { "epoch": 0.17, "learning_rate": 1.8287137963251324e-05, "loss": 0.3008, "step": 2200 }, { "epoch": 0.17, "learning_rate": 1.827935222672065e-05, "loss": 0.3555, "step": 2210 }, { "epoch": 0.17, "learning_rate": 1.8271566490189974e-05, "loss": 0.3355, "step": 2220 }, { "epoch": 0.17, "learning_rate": 1.8263780753659296e-05, "loss": 0.2799, "step": 2230 }, { "epoch": 0.17, "learning_rate": 1.825599501712862e-05, "loss": 0.3444, "step": 2240 }, { "epoch": 0.18, "learning_rate": 1.8248209280597947e-05, "loss": 0.2557, "step": 2250 }, { "epoch": 0.18, "learning_rate": 1.824042354406727e-05, "loss": 0.316, "step": 2260 }, { "epoch": 0.18, "learning_rate": 1.8232637807536594e-05, "loss": 0.2644, "step": 2270 }, { "epoch": 0.18, "learning_rate": 1.822485207100592e-05, "loss": 0.3134, "step": 2280 }, { "epoch": 0.18, "learning_rate": 1.821706633447524e-05, "loss": 0.243, "step": 2290 }, { "epoch": 0.18, "learning_rate": 1.8209280597944566e-05, "loss": 0.4135, "step": 2300 }, { "epoch": 0.18, "learning_rate": 1.820149486141389e-05, "loss": 0.2781, "step": 2310 }, { "epoch": 0.18, "learning_rate": 1.8193709124883217e-05, "loss": 0.2512, "step": 2320 }, { "epoch": 0.18, "learning_rate": 1.818592338835254e-05, "loss": 0.2923, "step": 2330 }, { "epoch": 0.18, "learning_rate": 1.8178137651821864e-05, "loss": 0.3151, "step": 2340 }, { "epoch": 0.18, "learning_rate": 1.817035191529119e-05, "loss": 0.2766, "step": 2350 }, { "epoch": 0.18, "learning_rate": 1.816256617876051e-05, "loss": 0.3372, "step": 2360 }, { "epoch": 0.18, "learning_rate": 1.8154780442229837e-05, "loss": 0.3924, "step": 2370 }, { "epoch": 0.19, "learning_rate": 1.8146994705699162e-05, "loss": 0.2954, "step": 2380 }, { "epoch": 0.19, "learning_rate": 1.8139208969168484e-05, "loss": 0.2981, "step": 2390 }, { "epoch": 0.19, "learning_rate": 1.813142323263781e-05, "loss": 0.3011, "step": 2400 }, { "epoch": 0.19, "learning_rate": 1.8123637496107134e-05, "loss": 0.2724, "step": 2410 }, { "epoch": 0.19, "learning_rate": 1.8115851759576456e-05, "loss": 0.3791, "step": 2420 }, { "epoch": 0.19, "learning_rate": 1.8108066023045782e-05, "loss": 0.3037, "step": 2430 }, { "epoch": 0.19, "learning_rate": 1.8100280286515107e-05, "loss": 0.2986, "step": 2440 }, { "epoch": 0.19, "learning_rate": 1.809249454998443e-05, "loss": 0.3418, "step": 2450 }, { "epoch": 0.19, "learning_rate": 1.8084708813453754e-05, "loss": 0.3293, "step": 2460 }, { "epoch": 0.19, "learning_rate": 1.807692307692308e-05, "loss": 0.3038, "step": 2470 }, { "epoch": 0.19, "learning_rate": 1.80691373403924e-05, "loss": 0.3099, "step": 2480 }, { "epoch": 0.19, "learning_rate": 1.8061351603861727e-05, "loss": 0.3628, "step": 2490 }, { "epoch": 0.19, "learning_rate": 1.8053565867331052e-05, "loss": 0.3277, "step": 2500 }, { "epoch": 0.2, "learning_rate": 1.8045780130800374e-05, "loss": 0.2927, "step": 2510 }, { "epoch": 0.2, "learning_rate": 1.80379943942697e-05, "loss": 0.2666, "step": 2520 }, { "epoch": 0.2, "learning_rate": 1.8030208657739025e-05, "loss": 0.4419, "step": 2530 }, { "epoch": 0.2, "learning_rate": 1.8022422921208346e-05, "loss": 0.3165, "step": 2540 }, { "epoch": 0.2, "learning_rate": 1.8014637184677672e-05, "loss": 0.3474, "step": 2550 }, { "epoch": 0.2, "learning_rate": 1.8006851448146997e-05, "loss": 0.3313, "step": 2560 }, { "epoch": 0.2, "learning_rate": 1.7999065711616322e-05, "loss": 0.4063, "step": 2570 }, { "epoch": 0.2, "learning_rate": 1.7991279975085644e-05, "loss": 0.3396, "step": 2580 }, { "epoch": 0.2, "learning_rate": 1.798349423855497e-05, "loss": 0.2792, "step": 2590 }, { "epoch": 0.2, "learning_rate": 1.7975708502024295e-05, "loss": 0.3595, "step": 2600 }, { "epoch": 0.2, "learning_rate": 1.7967922765493617e-05, "loss": 0.2986, "step": 2610 }, { "epoch": 0.2, "learning_rate": 1.7960137028962942e-05, "loss": 0.3442, "step": 2620 }, { "epoch": 0.2, "learning_rate": 1.7952351292432267e-05, "loss": 0.3373, "step": 2630 }, { "epoch": 0.21, "learning_rate": 1.794456555590159e-05, "loss": 0.273, "step": 2640 }, { "epoch": 0.21, "learning_rate": 1.7936779819370915e-05, "loss": 0.3987, "step": 2650 }, { "epoch": 0.21, "learning_rate": 1.792899408284024e-05, "loss": 0.3173, "step": 2660 }, { "epoch": 0.21, "learning_rate": 1.7921208346309562e-05, "loss": 0.3384, "step": 2670 }, { "epoch": 0.21, "learning_rate": 1.7913422609778887e-05, "loss": 0.376, "step": 2680 }, { "epoch": 0.21, "learning_rate": 1.7905636873248212e-05, "loss": 0.2247, "step": 2690 }, { "epoch": 0.21, "learning_rate": 1.7897851136717534e-05, "loss": 0.326, "step": 2700 }, { "epoch": 0.21, "learning_rate": 1.789006540018686e-05, "loss": 0.2287, "step": 2710 }, { "epoch": 0.21, "learning_rate": 1.7882279663656185e-05, "loss": 0.3976, "step": 2720 }, { "epoch": 0.21, "learning_rate": 1.7874493927125507e-05, "loss": 0.2625, "step": 2730 }, { "epoch": 0.21, "learning_rate": 1.7866708190594832e-05, "loss": 0.3386, "step": 2740 }, { "epoch": 0.21, "learning_rate": 1.7858922454064157e-05, "loss": 0.3251, "step": 2750 }, { "epoch": 0.21, "learning_rate": 1.785113671753348e-05, "loss": 0.3312, "step": 2760 }, { "epoch": 0.22, "learning_rate": 1.7843350981002805e-05, "loss": 0.3089, "step": 2770 }, { "epoch": 0.22, "learning_rate": 1.783556524447213e-05, "loss": 0.3043, "step": 2780 }, { "epoch": 0.22, "learning_rate": 1.7827779507941452e-05, "loss": 0.3096, "step": 2790 }, { "epoch": 0.22, "learning_rate": 1.7819993771410777e-05, "loss": 0.282, "step": 2800 }, { "epoch": 0.22, "learning_rate": 1.7812208034880102e-05, "loss": 0.3608, "step": 2810 }, { "epoch": 0.22, "learning_rate": 1.7804422298349428e-05, "loss": 0.3096, "step": 2820 }, { "epoch": 0.22, "learning_rate": 1.779663656181875e-05, "loss": 0.3158, "step": 2830 }, { "epoch": 0.22, "learning_rate": 1.778885082528807e-05, "loss": 0.3508, "step": 2840 }, { "epoch": 0.22, "learning_rate": 1.77810650887574e-05, "loss": 0.2838, "step": 2850 }, { "epoch": 0.22, "learning_rate": 1.7773279352226722e-05, "loss": 0.2463, "step": 2860 }, { "epoch": 0.22, "learning_rate": 1.7765493615696044e-05, "loss": 0.4101, "step": 2870 }, { "epoch": 0.22, "learning_rate": 1.7757707879165373e-05, "loss": 0.3024, "step": 2880 }, { "epoch": 0.23, "learning_rate": 1.7749922142634695e-05, "loss": 0.3887, "step": 2890 }, { "epoch": 0.23, "learning_rate": 1.7742136406104017e-05, "loss": 0.3184, "step": 2900 }, { "epoch": 0.23, "learning_rate": 1.7734350669573345e-05, "loss": 0.3213, "step": 2910 }, { "epoch": 0.23, "learning_rate": 1.7726564933042667e-05, "loss": 0.3363, "step": 2920 }, { "epoch": 0.23, "learning_rate": 1.771877919651199e-05, "loss": 0.3635, "step": 2930 }, { "epoch": 0.23, "learning_rate": 1.7710993459981314e-05, "loss": 0.2902, "step": 2940 }, { "epoch": 0.23, "learning_rate": 1.770320772345064e-05, "loss": 0.3068, "step": 2950 }, { "epoch": 0.23, "learning_rate": 1.7695421986919965e-05, "loss": 0.2164, "step": 2960 }, { "epoch": 0.23, "learning_rate": 1.7687636250389287e-05, "loss": 0.3048, "step": 2970 }, { "epoch": 0.23, "learning_rate": 1.7679850513858612e-05, "loss": 0.3578, "step": 2980 }, { "epoch": 0.23, "learning_rate": 1.7672064777327937e-05, "loss": 0.3142, "step": 2990 }, { "epoch": 0.23, "learning_rate": 1.766427904079726e-05, "loss": 0.3153, "step": 3000 }, { "epoch": 0.23, "learning_rate": 1.7656493304266585e-05, "loss": 0.254, "step": 3010 }, { "epoch": 0.24, "learning_rate": 1.764870756773591e-05, "loss": 0.3416, "step": 3020 }, { "epoch": 0.24, "learning_rate": 1.7640921831205232e-05, "loss": 0.3015, "step": 3030 }, { "epoch": 0.24, "learning_rate": 1.7633136094674557e-05, "loss": 0.306, "step": 3040 }, { "epoch": 0.24, "learning_rate": 1.7625350358143882e-05, "loss": 0.3475, "step": 3050 }, { "epoch": 0.24, "learning_rate": 1.7617564621613204e-05, "loss": 0.2974, "step": 3060 }, { "epoch": 0.24, "learning_rate": 1.760977888508253e-05, "loss": 0.3741, "step": 3070 }, { "epoch": 0.24, "learning_rate": 1.7601993148551855e-05, "loss": 0.3203, "step": 3080 }, { "epoch": 0.24, "learning_rate": 1.7594207412021177e-05, "loss": 0.3682, "step": 3090 }, { "epoch": 0.24, "learning_rate": 1.7586421675490502e-05, "loss": 0.3191, "step": 3100 }, { "epoch": 0.24, "learning_rate": 1.7578635938959828e-05, "loss": 0.2515, "step": 3110 }, { "epoch": 0.24, "learning_rate": 1.757085020242915e-05, "loss": 0.394, "step": 3120 }, { "epoch": 0.24, "learning_rate": 1.7563064465898475e-05, "loss": 0.317, "step": 3130 }, { "epoch": 0.24, "learning_rate": 1.75552787293678e-05, "loss": 0.2698, "step": 3140 }, { "epoch": 0.25, "learning_rate": 1.7547492992837122e-05, "loss": 0.3299, "step": 3150 }, { "epoch": 0.25, "learning_rate": 1.7539707256306447e-05, "loss": 0.3183, "step": 3160 }, { "epoch": 0.25, "learning_rate": 1.7531921519775773e-05, "loss": 0.3598, "step": 3170 }, { "epoch": 0.25, "learning_rate": 1.7524135783245094e-05, "loss": 0.3637, "step": 3180 }, { "epoch": 0.25, "learning_rate": 1.751635004671442e-05, "loss": 0.2586, "step": 3190 }, { "epoch": 0.25, "learning_rate": 1.7508564310183745e-05, "loss": 0.3151, "step": 3200 }, { "epoch": 0.25, "learning_rate": 1.750077857365307e-05, "loss": 0.2587, "step": 3210 }, { "epoch": 0.25, "learning_rate": 1.7492992837122392e-05, "loss": 0.342, "step": 3220 }, { "epoch": 0.25, "learning_rate": 1.7485207100591718e-05, "loss": 0.305, "step": 3230 }, { "epoch": 0.25, "learning_rate": 1.7477421364061043e-05, "loss": 0.3429, "step": 3240 }, { "epoch": 0.25, "learning_rate": 1.7469635627530365e-05, "loss": 0.2381, "step": 3250 }, { "epoch": 0.25, "learning_rate": 1.746184989099969e-05, "loss": 0.2777, "step": 3260 }, { "epoch": 0.25, "learning_rate": 1.7454064154469015e-05, "loss": 0.3232, "step": 3270 }, { "epoch": 0.26, "learning_rate": 1.7446278417938337e-05, "loss": 0.2442, "step": 3280 }, { "epoch": 0.26, "learning_rate": 1.7438492681407663e-05, "loss": 0.2881, "step": 3290 }, { "epoch": 0.26, "learning_rate": 1.7430706944876988e-05, "loss": 0.299, "step": 3300 }, { "epoch": 0.26, "learning_rate": 1.742292120834631e-05, "loss": 0.2971, "step": 3310 }, { "epoch": 0.26, "learning_rate": 1.7415135471815635e-05, "loss": 0.2872, "step": 3320 }, { "epoch": 0.26, "learning_rate": 1.740734973528496e-05, "loss": 0.2707, "step": 3330 }, { "epoch": 0.26, "learning_rate": 1.7399563998754282e-05, "loss": 0.3922, "step": 3340 }, { "epoch": 0.26, "learning_rate": 1.7391778262223608e-05, "loss": 0.371, "step": 3350 }, { "epoch": 0.26, "learning_rate": 1.7383992525692933e-05, "loss": 0.2891, "step": 3360 }, { "epoch": 0.26, "learning_rate": 1.7376206789162255e-05, "loss": 0.3711, "step": 3370 }, { "epoch": 0.26, "learning_rate": 1.736842105263158e-05, "loss": 0.2751, "step": 3380 }, { "epoch": 0.26, "learning_rate": 1.7360635316100905e-05, "loss": 0.3483, "step": 3390 }, { "epoch": 0.26, "learning_rate": 1.7352849579570227e-05, "loss": 0.3248, "step": 3400 }, { "epoch": 0.27, "learning_rate": 1.7345063843039553e-05, "loss": 0.2927, "step": 3410 }, { "epoch": 0.27, "learning_rate": 1.7337278106508878e-05, "loss": 0.3026, "step": 3420 }, { "epoch": 0.27, "learning_rate": 1.73294923699782e-05, "loss": 0.2577, "step": 3430 }, { "epoch": 0.27, "learning_rate": 1.7321706633447525e-05, "loss": 0.2536, "step": 3440 }, { "epoch": 0.27, "learning_rate": 1.731392089691685e-05, "loss": 0.3338, "step": 3450 }, { "epoch": 0.27, "learning_rate": 1.7306135160386176e-05, "loss": 0.3278, "step": 3460 }, { "epoch": 0.27, "learning_rate": 1.7298349423855498e-05, "loss": 0.3179, "step": 3470 }, { "epoch": 0.27, "learning_rate": 1.7290563687324823e-05, "loss": 0.3449, "step": 3480 }, { "epoch": 0.27, "learning_rate": 1.7282777950794148e-05, "loss": 0.3133, "step": 3490 }, { "epoch": 0.27, "learning_rate": 1.727499221426347e-05, "loss": 0.3097, "step": 3500 }, { "epoch": 0.27, "learning_rate": 1.7267206477732795e-05, "loss": 0.3252, "step": 3510 }, { "epoch": 0.27, "learning_rate": 1.725942074120212e-05, "loss": 0.3041, "step": 3520 }, { "epoch": 0.27, "learning_rate": 1.7251635004671443e-05, "loss": 0.2786, "step": 3530 }, { "epoch": 0.28, "learning_rate": 1.7243849268140768e-05, "loss": 0.2478, "step": 3540 }, { "epoch": 0.28, "learning_rate": 1.7236063531610093e-05, "loss": 0.2706, "step": 3550 }, { "epoch": 0.28, "learning_rate": 1.7228277795079415e-05, "loss": 0.3272, "step": 3560 }, { "epoch": 0.28, "learning_rate": 1.722049205854874e-05, "loss": 0.3018, "step": 3570 }, { "epoch": 0.28, "learning_rate": 1.7212706322018066e-05, "loss": 0.2802, "step": 3580 }, { "epoch": 0.28, "learning_rate": 1.7204920585487388e-05, "loss": 0.274, "step": 3590 }, { "epoch": 0.28, "learning_rate": 1.7197134848956713e-05, "loss": 0.3024, "step": 3600 }, { "epoch": 0.28, "learning_rate": 1.7189349112426038e-05, "loss": 0.3475, "step": 3610 }, { "epoch": 0.28, "learning_rate": 1.718156337589536e-05, "loss": 0.2996, "step": 3620 }, { "epoch": 0.28, "learning_rate": 1.7173777639364685e-05, "loss": 0.2765, "step": 3630 }, { "epoch": 0.28, "learning_rate": 1.716599190283401e-05, "loss": 0.2422, "step": 3640 }, { "epoch": 0.28, "learning_rate": 1.7158206166303333e-05, "loss": 0.3942, "step": 3650 }, { "epoch": 0.28, "learning_rate": 1.7150420429772658e-05, "loss": 0.2361, "step": 3660 }, { "epoch": 0.29, "learning_rate": 1.7142634693241983e-05, "loss": 0.3354, "step": 3670 }, { "epoch": 0.29, "learning_rate": 1.7134848956711305e-05, "loss": 0.2544, "step": 3680 }, { "epoch": 0.29, "learning_rate": 1.712706322018063e-05, "loss": 0.3437, "step": 3690 }, { "epoch": 0.29, "learning_rate": 1.7119277483649956e-05, "loss": 0.2315, "step": 3700 }, { "epoch": 0.29, "learning_rate": 1.711149174711928e-05, "loss": 0.296, "step": 3710 }, { "epoch": 0.29, "learning_rate": 1.7103706010588603e-05, "loss": 0.2921, "step": 3720 }, { "epoch": 0.29, "learning_rate": 1.709592027405793e-05, "loss": 0.33, "step": 3730 }, { "epoch": 0.29, "learning_rate": 1.7088134537527254e-05, "loss": 0.2921, "step": 3740 }, { "epoch": 0.29, "learning_rate": 1.7080348800996576e-05, "loss": 0.3615, "step": 3750 }, { "epoch": 0.29, "learning_rate": 1.7072563064465897e-05, "loss": 0.3131, "step": 3760 }, { "epoch": 0.29, "learning_rate": 1.7064777327935226e-05, "loss": 0.372, "step": 3770 }, { "epoch": 0.29, "learning_rate": 1.7056991591404548e-05, "loss": 0.2867, "step": 3780 }, { "epoch": 0.3, "learning_rate": 1.704920585487387e-05, "loss": 0.3098, "step": 3790 }, { "epoch": 0.3, "learning_rate": 1.70414201183432e-05, "loss": 0.2592, "step": 3800 }, { "epoch": 0.3, "learning_rate": 1.703363438181252e-05, "loss": 0.3017, "step": 3810 }, { "epoch": 0.3, "learning_rate": 1.7025848645281846e-05, "loss": 0.3378, "step": 3820 }, { "epoch": 0.3, "learning_rate": 1.701806290875117e-05, "loss": 0.2605, "step": 3830 }, { "epoch": 0.3, "learning_rate": 1.7010277172220493e-05, "loss": 0.2793, "step": 3840 }, { "epoch": 0.3, "learning_rate": 1.700249143568982e-05, "loss": 0.2787, "step": 3850 }, { "epoch": 0.3, "learning_rate": 1.699470569915914e-05, "loss": 0.3085, "step": 3860 }, { "epoch": 0.3, "learning_rate": 1.6986919962628466e-05, "loss": 0.2703, "step": 3870 }, { "epoch": 0.3, "learning_rate": 1.697913422609779e-05, "loss": 0.3528, "step": 3880 }, { "epoch": 0.3, "learning_rate": 1.6971348489567113e-05, "loss": 0.2685, "step": 3890 }, { "epoch": 0.3, "learning_rate": 1.6963562753036438e-05, "loss": 0.3184, "step": 3900 }, { "epoch": 0.3, "learning_rate": 1.6955777016505763e-05, "loss": 0.2761, "step": 3910 }, { "epoch": 0.31, "learning_rate": 1.6947991279975085e-05, "loss": 0.2261, "step": 3920 }, { "epoch": 0.31, "learning_rate": 1.694020554344441e-05, "loss": 0.3712, "step": 3930 }, { "epoch": 0.31, "learning_rate": 1.6932419806913736e-05, "loss": 0.2567, "step": 3940 }, { "epoch": 0.31, "learning_rate": 1.6924634070383058e-05, "loss": 0.2936, "step": 3950 }, { "epoch": 0.31, "learning_rate": 1.6916848333852386e-05, "loss": 0.2667, "step": 3960 }, { "epoch": 0.31, "learning_rate": 1.690906259732171e-05, "loss": 0.35, "step": 3970 }, { "epoch": 0.31, "learning_rate": 1.690127686079103e-05, "loss": 0.2335, "step": 3980 }, { "epoch": 0.31, "learning_rate": 1.6893491124260356e-05, "loss": 0.2586, "step": 3990 }, { "epoch": 0.31, "learning_rate": 1.688570538772968e-05, "loss": 0.3377, "step": 4000 }, { "epoch": 0.31, "learning_rate": 1.6877919651199003e-05, "loss": 0.2969, "step": 4010 }, { "epoch": 0.31, "learning_rate": 1.6870133914668328e-05, "loss": 0.2893, "step": 4020 }, { "epoch": 0.31, "learning_rate": 1.6862348178137653e-05, "loss": 0.3195, "step": 4030 }, { "epoch": 0.31, "learning_rate": 1.6854562441606975e-05, "loss": 0.3279, "step": 4040 }, { "epoch": 0.32, "learning_rate": 1.68467767050763e-05, "loss": 0.2271, "step": 4050 }, { "epoch": 0.32, "learning_rate": 1.6838990968545626e-05, "loss": 0.3524, "step": 4060 }, { "epoch": 0.32, "learning_rate": 1.683120523201495e-05, "loss": 0.2838, "step": 4070 }, { "epoch": 0.32, "learning_rate": 1.6823419495484273e-05, "loss": 0.364, "step": 4080 }, { "epoch": 0.32, "learning_rate": 1.68156337589536e-05, "loss": 0.2356, "step": 4090 }, { "epoch": 0.32, "learning_rate": 1.6807848022422924e-05, "loss": 0.33, "step": 4100 }, { "epoch": 0.32, "learning_rate": 1.6800062285892246e-05, "loss": 0.3097, "step": 4110 }, { "epoch": 0.32, "learning_rate": 1.679227654936157e-05, "loss": 0.3582, "step": 4120 }, { "epoch": 0.32, "learning_rate": 1.6784490812830896e-05, "loss": 0.2895, "step": 4130 }, { "epoch": 0.32, "learning_rate": 1.6776705076300218e-05, "loss": 0.2226, "step": 4140 }, { "epoch": 0.32, "learning_rate": 1.6768919339769543e-05, "loss": 0.255, "step": 4150 }, { "epoch": 0.32, "learning_rate": 1.676113360323887e-05, "loss": 0.2919, "step": 4160 }, { "epoch": 0.32, "learning_rate": 1.675334786670819e-05, "loss": 0.3014, "step": 4170 }, { "epoch": 0.33, "learning_rate": 1.6745562130177516e-05, "loss": 0.3285, "step": 4180 }, { "epoch": 0.33, "learning_rate": 1.673777639364684e-05, "loss": 0.3023, "step": 4190 }, { "epoch": 0.33, "learning_rate": 1.6729990657116163e-05, "loss": 0.2802, "step": 4200 }, { "epoch": 0.33, "learning_rate": 1.672220492058549e-05, "loss": 0.2365, "step": 4210 }, { "epoch": 0.33, "learning_rate": 1.6714419184054814e-05, "loss": 0.2338, "step": 4220 }, { "epoch": 0.33, "learning_rate": 1.6706633447524136e-05, "loss": 0.3102, "step": 4230 }, { "epoch": 0.33, "learning_rate": 1.669884771099346e-05, "loss": 0.3002, "step": 4240 }, { "epoch": 0.33, "learning_rate": 1.6691061974462786e-05, "loss": 0.2361, "step": 4250 }, { "epoch": 0.33, "learning_rate": 1.6683276237932108e-05, "loss": 0.2159, "step": 4260 }, { "epoch": 0.33, "learning_rate": 1.6675490501401433e-05, "loss": 0.2764, "step": 4270 }, { "epoch": 0.33, "learning_rate": 1.666770476487076e-05, "loss": 0.2654, "step": 4280 }, { "epoch": 0.33, "learning_rate": 1.665991902834008e-05, "loss": 0.2997, "step": 4290 }, { "epoch": 0.33, "learning_rate": 1.6652133291809406e-05, "loss": 0.257, "step": 4300 }, { "epoch": 0.34, "learning_rate": 1.664434755527873e-05, "loss": 0.3513, "step": 4310 }, { "epoch": 0.34, "learning_rate": 1.6636561818748057e-05, "loss": 0.2621, "step": 4320 }, { "epoch": 0.34, "learning_rate": 1.662877608221738e-05, "loss": 0.2865, "step": 4330 }, { "epoch": 0.34, "learning_rate": 1.6620990345686704e-05, "loss": 0.3538, "step": 4340 }, { "epoch": 0.34, "learning_rate": 1.661320460915603e-05, "loss": 0.2507, "step": 4350 }, { "epoch": 0.34, "learning_rate": 1.660541887262535e-05, "loss": 0.2413, "step": 4360 }, { "epoch": 0.34, "learning_rate": 1.6597633136094676e-05, "loss": 0.2306, "step": 4370 }, { "epoch": 0.34, "learning_rate": 1.6589847399564e-05, "loss": 0.3176, "step": 4380 }, { "epoch": 0.34, "learning_rate": 1.6582061663033324e-05, "loss": 0.2894, "step": 4390 }, { "epoch": 0.34, "learning_rate": 1.657427592650265e-05, "loss": 0.2636, "step": 4400 }, { "epoch": 0.34, "learning_rate": 1.6566490189971974e-05, "loss": 0.4195, "step": 4410 }, { "epoch": 0.34, "learning_rate": 1.6558704453441296e-05, "loss": 0.3316, "step": 4420 }, { "epoch": 0.34, "learning_rate": 1.655091871691062e-05, "loss": 0.2798, "step": 4430 }, { "epoch": 0.35, "learning_rate": 1.6543132980379947e-05, "loss": 0.3231, "step": 4440 }, { "epoch": 0.35, "learning_rate": 1.653534724384927e-05, "loss": 0.2856, "step": 4450 }, { "epoch": 0.35, "learning_rate": 1.6527561507318594e-05, "loss": 0.444, "step": 4460 }, { "epoch": 0.35, "learning_rate": 1.651977577078792e-05, "loss": 0.2894, "step": 4470 }, { "epoch": 0.35, "learning_rate": 1.651199003425724e-05, "loss": 0.2602, "step": 4480 }, { "epoch": 0.35, "learning_rate": 1.6504204297726566e-05, "loss": 0.2079, "step": 4490 }, { "epoch": 0.35, "learning_rate": 1.649641856119589e-05, "loss": 0.3006, "step": 4500 }, { "epoch": 0.35, "learning_rate": 1.6488632824665214e-05, "loss": 0.2701, "step": 4510 }, { "epoch": 0.35, "learning_rate": 1.648084708813454e-05, "loss": 0.2697, "step": 4520 }, { "epoch": 0.35, "learning_rate": 1.6473061351603864e-05, "loss": 0.3324, "step": 4530 }, { "epoch": 0.35, "learning_rate": 1.6465275615073186e-05, "loss": 0.3033, "step": 4540 }, { "epoch": 0.35, "learning_rate": 1.645748987854251e-05, "loss": 0.3216, "step": 4550 }, { "epoch": 0.36, "learning_rate": 1.6449704142011837e-05, "loss": 0.2848, "step": 4560 }, { "epoch": 0.36, "learning_rate": 1.6441918405481162e-05, "loss": 0.2466, "step": 4570 }, { "epoch": 0.36, "learning_rate": 1.6434132668950484e-05, "loss": 0.3451, "step": 4580 }, { "epoch": 0.36, "learning_rate": 1.642634693241981e-05, "loss": 0.3165, "step": 4590 }, { "epoch": 0.36, "learning_rate": 1.6418561195889134e-05, "loss": 0.3146, "step": 4600 }, { "epoch": 0.36, "learning_rate": 1.6410775459358456e-05, "loss": 0.1862, "step": 4610 }, { "epoch": 0.36, "learning_rate": 1.640298972282778e-05, "loss": 0.255, "step": 4620 }, { "epoch": 0.36, "learning_rate": 1.6395203986297107e-05, "loss": 0.3015, "step": 4630 }, { "epoch": 0.36, "learning_rate": 1.638741824976643e-05, "loss": 0.2665, "step": 4640 }, { "epoch": 0.36, "learning_rate": 1.6379632513235754e-05, "loss": 0.234, "step": 4650 }, { "epoch": 0.36, "learning_rate": 1.637184677670508e-05, "loss": 0.2748, "step": 4660 }, { "epoch": 0.36, "learning_rate": 1.63640610401744e-05, "loss": 0.2846, "step": 4670 }, { "epoch": 0.36, "learning_rate": 1.6356275303643723e-05, "loss": 0.2349, "step": 4680 }, { "epoch": 0.37, "learning_rate": 1.6348489567113052e-05, "loss": 0.2729, "step": 4690 }, { "epoch": 0.37, "learning_rate": 1.6340703830582374e-05, "loss": 0.1976, "step": 4700 }, { "epoch": 0.37, "learning_rate": 1.63329180940517e-05, "loss": 0.3199, "step": 4710 }, { "epoch": 0.37, "learning_rate": 1.6325132357521025e-05, "loss": 0.4013, "step": 4720 }, { "epoch": 0.37, "learning_rate": 1.6317346620990346e-05, "loss": 0.2417, "step": 4730 }, { "epoch": 0.37, "learning_rate": 1.6309560884459672e-05, "loss": 0.2593, "step": 4740 }, { "epoch": 0.37, "learning_rate": 1.6301775147928997e-05, "loss": 0.2663, "step": 4750 }, { "epoch": 0.37, "learning_rate": 1.629398941139832e-05, "loss": 0.2613, "step": 4760 }, { "epoch": 0.37, "learning_rate": 1.6286203674867644e-05, "loss": 0.3266, "step": 4770 }, { "epoch": 0.37, "learning_rate": 1.6278417938336966e-05, "loss": 0.3107, "step": 4780 }, { "epoch": 0.37, "learning_rate": 1.627063220180629e-05, "loss": 0.278, "step": 4790 }, { "epoch": 0.37, "learning_rate": 1.6262846465275617e-05, "loss": 0.3524, "step": 4800 }, { "epoch": 0.37, "learning_rate": 1.625506072874494e-05, "loss": 0.2858, "step": 4810 }, { "epoch": 0.38, "learning_rate": 1.6247274992214267e-05, "loss": 0.292, "step": 4820 }, { "epoch": 0.38, "learning_rate": 1.623948925568359e-05, "loss": 0.2983, "step": 4830 }, { "epoch": 0.38, "learning_rate": 1.623170351915291e-05, "loss": 0.2949, "step": 4840 }, { "epoch": 0.38, "learning_rate": 1.622391778262224e-05, "loss": 0.3657, "step": 4850 }, { "epoch": 0.38, "learning_rate": 1.6216132046091562e-05, "loss": 0.3057, "step": 4860 }, { "epoch": 0.38, "learning_rate": 1.6208346309560884e-05, "loss": 0.2418, "step": 4870 }, { "epoch": 0.38, "learning_rate": 1.6200560573030212e-05, "loss": 0.3026, "step": 4880 }, { "epoch": 0.38, "learning_rate": 1.6192774836499534e-05, "loss": 0.2972, "step": 4890 }, { "epoch": 0.38, "learning_rate": 1.6184989099968856e-05, "loss": 0.2747, "step": 4900 }, { "epoch": 0.38, "learning_rate": 1.617720336343818e-05, "loss": 0.2987, "step": 4910 }, { "epoch": 0.38, "learning_rate": 1.6169417626907507e-05, "loss": 0.3031, "step": 4920 }, { "epoch": 0.38, "learning_rate": 1.616163189037683e-05, "loss": 0.3612, "step": 4930 }, { "epoch": 0.38, "learning_rate": 1.6153846153846154e-05, "loss": 0.2909, "step": 4940 }, { "epoch": 0.39, "learning_rate": 1.614606041731548e-05, "loss": 0.2469, "step": 4950 }, { "epoch": 0.39, "learning_rate": 1.6138274680784805e-05, "loss": 0.2322, "step": 4960 }, { "epoch": 0.39, "learning_rate": 1.6130488944254127e-05, "loss": 0.3777, "step": 4970 }, { "epoch": 0.39, "learning_rate": 1.6122703207723452e-05, "loss": 0.2769, "step": 4980 }, { "epoch": 0.39, "learning_rate": 1.6114917471192777e-05, "loss": 0.3218, "step": 4990 }, { "epoch": 0.39, "learning_rate": 1.61071317346621e-05, "loss": 0.3272, "step": 5000 }, { "epoch": 0.39, "learning_rate": 1.6099345998131424e-05, "loss": 0.3187, "step": 5010 }, { "epoch": 0.39, "learning_rate": 1.609156026160075e-05, "loss": 0.228, "step": 5020 }, { "epoch": 0.39, "learning_rate": 1.608377452507007e-05, "loss": 0.2892, "step": 5030 }, { "epoch": 0.39, "learning_rate": 1.6075988788539397e-05, "loss": 0.3135, "step": 5040 }, { "epoch": 0.39, "learning_rate": 1.6068203052008722e-05, "loss": 0.2711, "step": 5050 }, { "epoch": 0.39, "learning_rate": 1.6060417315478044e-05, "loss": 0.2548, "step": 5060 }, { "epoch": 0.39, "learning_rate": 1.605263157894737e-05, "loss": 0.256, "step": 5070 }, { "epoch": 0.4, "learning_rate": 1.6044845842416695e-05, "loss": 0.3005, "step": 5080 }, { "epoch": 0.4, "learning_rate": 1.6037060105886017e-05, "loss": 0.3019, "step": 5090 }, { "epoch": 0.4, "learning_rate": 1.6029274369355342e-05, "loss": 0.2936, "step": 5100 }, { "epoch": 0.4, "learning_rate": 1.6021488632824667e-05, "loss": 0.3083, "step": 5110 }, { "epoch": 0.4, "learning_rate": 1.601370289629399e-05, "loss": 0.2696, "step": 5120 }, { "epoch": 0.4, "learning_rate": 1.6005917159763314e-05, "loss": 0.3029, "step": 5130 }, { "epoch": 0.4, "learning_rate": 1.599813142323264e-05, "loss": 0.2439, "step": 5140 }, { "epoch": 0.4, "learning_rate": 1.599034568670196e-05, "loss": 0.2714, "step": 5150 }, { "epoch": 0.4, "learning_rate": 1.5982559950171287e-05, "loss": 0.3455, "step": 5160 }, { "epoch": 0.4, "learning_rate": 1.5974774213640612e-05, "loss": 0.273, "step": 5170 }, { "epoch": 0.4, "learning_rate": 1.5966988477109934e-05, "loss": 0.216, "step": 5180 }, { "epoch": 0.4, "learning_rate": 1.595920274057926e-05, "loss": 0.3535, "step": 5190 }, { "epoch": 0.4, "learning_rate": 1.5951417004048585e-05, "loss": 0.2712, "step": 5200 }, { "epoch": 0.41, "learning_rate": 1.594363126751791e-05, "loss": 0.4113, "step": 5210 }, { "epoch": 0.41, "learning_rate": 1.5935845530987232e-05, "loss": 0.2541, "step": 5220 }, { "epoch": 0.41, "learning_rate": 1.5928059794456557e-05, "loss": 0.2662, "step": 5230 }, { "epoch": 0.41, "learning_rate": 1.5920274057925882e-05, "loss": 0.2713, "step": 5240 }, { "epoch": 0.41, "learning_rate": 1.5912488321395204e-05, "loss": 0.252, "step": 5250 }, { "epoch": 0.41, "learning_rate": 1.590470258486453e-05, "loss": 0.3018, "step": 5260 }, { "epoch": 0.41, "learning_rate": 1.5896916848333855e-05, "loss": 0.2971, "step": 5270 }, { "epoch": 0.41, "learning_rate": 1.5889131111803177e-05, "loss": 0.2898, "step": 5280 }, { "epoch": 0.41, "learning_rate": 1.5881345375272502e-05, "loss": 0.3558, "step": 5290 }, { "epoch": 0.41, "learning_rate": 1.5873559638741827e-05, "loss": 0.3142, "step": 5300 }, { "epoch": 0.41, "learning_rate": 1.586577390221115e-05, "loss": 0.308, "step": 5310 }, { "epoch": 0.41, "learning_rate": 1.5857988165680475e-05, "loss": 0.2551, "step": 5320 }, { "epoch": 0.41, "learning_rate": 1.58502024291498e-05, "loss": 0.3259, "step": 5330 }, { "epoch": 0.42, "learning_rate": 1.5842416692619122e-05, "loss": 0.3212, "step": 5340 }, { "epoch": 0.42, "learning_rate": 1.5834630956088447e-05, "loss": 0.3083, "step": 5350 }, { "epoch": 0.42, "learning_rate": 1.5826845219557773e-05, "loss": 0.2442, "step": 5360 }, { "epoch": 0.42, "learning_rate": 1.5819059483027094e-05, "loss": 0.2715, "step": 5370 }, { "epoch": 0.42, "learning_rate": 1.581127374649642e-05, "loss": 0.314, "step": 5380 }, { "epoch": 0.42, "learning_rate": 1.5803488009965745e-05, "loss": 0.3391, "step": 5390 }, { "epoch": 0.42, "learning_rate": 1.5795702273435067e-05, "loss": 0.3402, "step": 5400 }, { "epoch": 0.42, "learning_rate": 1.5787916536904392e-05, "loss": 0.2857, "step": 5410 }, { "epoch": 0.42, "learning_rate": 1.5780130800373718e-05, "loss": 0.2099, "step": 5420 }, { "epoch": 0.42, "learning_rate": 1.577234506384304e-05, "loss": 0.2864, "step": 5430 }, { "epoch": 0.42, "learning_rate": 1.5764559327312365e-05, "loss": 0.3086, "step": 5440 }, { "epoch": 0.42, "learning_rate": 1.575677359078169e-05, "loss": 0.2425, "step": 5450 }, { "epoch": 0.43, "learning_rate": 1.5748987854251015e-05, "loss": 0.3098, "step": 5460 }, { "epoch": 0.43, "learning_rate": 1.5741202117720337e-05, "loss": 0.2194, "step": 5470 }, { "epoch": 0.43, "learning_rate": 1.5733416381189663e-05, "loss": 0.3205, "step": 5480 }, { "epoch": 0.43, "learning_rate": 1.5725630644658988e-05, "loss": 0.3202, "step": 5490 }, { "epoch": 0.43, "learning_rate": 1.571784490812831e-05, "loss": 0.3467, "step": 5500 }, { "epoch": 0.43, "learning_rate": 1.5710059171597635e-05, "loss": 0.2815, "step": 5510 }, { "epoch": 0.43, "learning_rate": 1.570227343506696e-05, "loss": 0.2683, "step": 5520 }, { "epoch": 0.43, "learning_rate": 1.5694487698536282e-05, "loss": 0.3093, "step": 5530 }, { "epoch": 0.43, "learning_rate": 1.5686701962005608e-05, "loss": 0.2464, "step": 5540 }, { "epoch": 0.43, "learning_rate": 1.5678916225474933e-05, "loss": 0.3131, "step": 5550 }, { "epoch": 0.43, "learning_rate": 1.5671130488944255e-05, "loss": 0.262, "step": 5560 }, { "epoch": 0.43, "learning_rate": 1.566334475241358e-05, "loss": 0.329, "step": 5570 }, { "epoch": 0.43, "learning_rate": 1.5655559015882905e-05, "loss": 0.3237, "step": 5580 }, { "epoch": 0.44, "learning_rate": 1.5647773279352227e-05, "loss": 0.2645, "step": 5590 }, { "epoch": 0.44, "learning_rate": 1.5639987542821553e-05, "loss": 0.3115, "step": 5600 }, { "epoch": 0.44, "learning_rate": 1.5632201806290878e-05, "loss": 0.2472, "step": 5610 }, { "epoch": 0.44, "learning_rate": 1.56244160697602e-05, "loss": 0.2549, "step": 5620 }, { "epoch": 0.44, "learning_rate": 1.5616630333229525e-05, "loss": 0.3139, "step": 5630 }, { "epoch": 0.44, "learning_rate": 1.560884459669885e-05, "loss": 0.2782, "step": 5640 }, { "epoch": 0.44, "learning_rate": 1.5601058860168172e-05, "loss": 0.2747, "step": 5650 }, { "epoch": 0.44, "learning_rate": 1.5593273123637498e-05, "loss": 0.2519, "step": 5660 }, { "epoch": 0.44, "learning_rate": 1.5585487387106823e-05, "loss": 0.3232, "step": 5670 }, { "epoch": 0.44, "learning_rate": 1.5577701650576145e-05, "loss": 0.281, "step": 5680 }, { "epoch": 0.44, "learning_rate": 1.556991591404547e-05, "loss": 0.2814, "step": 5690 }, { "epoch": 0.44, "learning_rate": 1.5562130177514792e-05, "loss": 0.3394, "step": 5700 }, { "epoch": 0.44, "learning_rate": 1.555434444098412e-05, "loss": 0.257, "step": 5710 }, { "epoch": 0.45, "learning_rate": 1.5546558704453443e-05, "loss": 0.2698, "step": 5720 }, { "epoch": 0.45, "learning_rate": 1.5538772967922765e-05, "loss": 0.3376, "step": 5730 }, { "epoch": 0.45, "learning_rate": 1.5530987231392093e-05, "loss": 0.2416, "step": 5740 }, { "epoch": 0.45, "learning_rate": 1.5523201494861415e-05, "loss": 0.3062, "step": 5750 }, { "epoch": 0.45, "learning_rate": 1.5515415758330737e-05, "loss": 0.264, "step": 5760 }, { "epoch": 0.45, "learning_rate": 1.5507630021800066e-05, "loss": 0.2184, "step": 5770 }, { "epoch": 0.45, "learning_rate": 1.5499844285269388e-05, "loss": 0.2969, "step": 5780 }, { "epoch": 0.45, "learning_rate": 1.549205854873871e-05, "loss": 0.2512, "step": 5790 }, { "epoch": 0.45, "learning_rate": 1.5484272812208038e-05, "loss": 0.2614, "step": 5800 }, { "epoch": 0.45, "learning_rate": 1.547648707567736e-05, "loss": 0.2285, "step": 5810 }, { "epoch": 0.45, "learning_rate": 1.5468701339146685e-05, "loss": 0.254, "step": 5820 }, { "epoch": 0.45, "learning_rate": 1.5460915602616007e-05, "loss": 0.2227, "step": 5830 }, { "epoch": 0.45, "learning_rate": 1.5453129866085333e-05, "loss": 0.2734, "step": 5840 }, { "epoch": 0.46, "learning_rate": 1.5445344129554658e-05, "loss": 0.2874, "step": 5850 }, { "epoch": 0.46, "learning_rate": 1.543755839302398e-05, "loss": 0.2869, "step": 5860 }, { "epoch": 0.46, "learning_rate": 1.5429772656493305e-05, "loss": 0.2445, "step": 5870 }, { "epoch": 0.46, "learning_rate": 1.542198691996263e-05, "loss": 0.331, "step": 5880 }, { "epoch": 0.46, "learning_rate": 1.5414201183431952e-05, "loss": 0.2387, "step": 5890 }, { "epoch": 0.46, "learning_rate": 1.5406415446901278e-05, "loss": 0.2279, "step": 5900 }, { "epoch": 0.46, "learning_rate": 1.5398629710370603e-05, "loss": 0.2091, "step": 5910 }, { "epoch": 0.46, "learning_rate": 1.5390843973839925e-05, "loss": 0.2894, "step": 5920 }, { "epoch": 0.46, "learning_rate": 1.538305823730925e-05, "loss": 0.3092, "step": 5930 }, { "epoch": 0.46, "learning_rate": 1.5375272500778576e-05, "loss": 0.2876, "step": 5940 }, { "epoch": 0.46, "learning_rate": 1.5367486764247897e-05, "loss": 0.2665, "step": 5950 }, { "epoch": 0.46, "learning_rate": 1.5359701027717223e-05, "loss": 0.2591, "step": 5960 }, { "epoch": 0.46, "learning_rate": 1.5351915291186548e-05, "loss": 0.3199, "step": 5970 }, { "epoch": 0.47, "learning_rate": 1.534412955465587e-05, "loss": 0.2275, "step": 5980 }, { "epoch": 0.47, "learning_rate": 1.5336343818125195e-05, "loss": 0.2238, "step": 5990 }, { "epoch": 0.47, "learning_rate": 1.532855808159452e-05, "loss": 0.2585, "step": 6000 }, { "epoch": 0.47, "learning_rate": 1.5320772345063842e-05, "loss": 0.241, "step": 6010 }, { "epoch": 0.47, "learning_rate": 1.5312986608533168e-05, "loss": 0.2876, "step": 6020 }, { "epoch": 0.47, "learning_rate": 1.5305200872002493e-05, "loss": 0.2868, "step": 6030 }, { "epoch": 0.47, "learning_rate": 1.5297415135471815e-05, "loss": 0.2299, "step": 6040 }, { "epoch": 0.47, "learning_rate": 1.528962939894114e-05, "loss": 0.3401, "step": 6050 }, { "epoch": 0.47, "learning_rate": 1.5281843662410466e-05, "loss": 0.2593, "step": 6060 }, { "epoch": 0.47, "learning_rate": 1.527405792587979e-05, "loss": 0.2513, "step": 6070 }, { "epoch": 0.47, "learning_rate": 1.5266272189349113e-05, "loss": 0.3075, "step": 6080 }, { "epoch": 0.47, "learning_rate": 1.5258486452818438e-05, "loss": 0.3274, "step": 6090 }, { "epoch": 0.47, "learning_rate": 1.5250700716287763e-05, "loss": 0.2581, "step": 6100 }, { "epoch": 0.48, "learning_rate": 1.5242914979757087e-05, "loss": 0.2692, "step": 6110 }, { "epoch": 0.48, "learning_rate": 1.523512924322641e-05, "loss": 0.3244, "step": 6120 }, { "epoch": 0.48, "learning_rate": 1.5227343506695736e-05, "loss": 0.2415, "step": 6130 }, { "epoch": 0.48, "learning_rate": 1.521955777016506e-05, "loss": 0.2424, "step": 6140 }, { "epoch": 0.48, "learning_rate": 1.5211772033634381e-05, "loss": 0.2477, "step": 6150 }, { "epoch": 0.48, "learning_rate": 1.5203986297103708e-05, "loss": 0.2988, "step": 6160 }, { "epoch": 0.48, "learning_rate": 1.5196200560573032e-05, "loss": 0.2726, "step": 6170 }, { "epoch": 0.48, "learning_rate": 1.5188414824042354e-05, "loss": 0.3212, "step": 6180 }, { "epoch": 0.48, "learning_rate": 1.5180629087511681e-05, "loss": 0.2562, "step": 6190 }, { "epoch": 0.48, "learning_rate": 1.5172843350981003e-05, "loss": 0.2075, "step": 6200 }, { "epoch": 0.48, "learning_rate": 1.516505761445033e-05, "loss": 0.3107, "step": 6210 }, { "epoch": 0.48, "learning_rate": 1.5157271877919653e-05, "loss": 0.267, "step": 6220 }, { "epoch": 0.49, "learning_rate": 1.5149486141388975e-05, "loss": 0.2532, "step": 6230 }, { "epoch": 0.49, "learning_rate": 1.5141700404858302e-05, "loss": 0.2778, "step": 6240 }, { "epoch": 0.49, "learning_rate": 1.5133914668327624e-05, "loss": 0.3576, "step": 6250 }, { "epoch": 0.49, "learning_rate": 1.5126128931796948e-05, "loss": 0.2662, "step": 6260 }, { "epoch": 0.49, "learning_rate": 1.5118343195266275e-05, "loss": 0.2563, "step": 6270 }, { "epoch": 0.49, "learning_rate": 1.5110557458735597e-05, "loss": 0.293, "step": 6280 }, { "epoch": 0.49, "learning_rate": 1.510277172220492e-05, "loss": 0.279, "step": 6290 }, { "epoch": 0.49, "learning_rate": 1.5094985985674246e-05, "loss": 0.2837, "step": 6300 }, { "epoch": 0.49, "learning_rate": 1.508720024914357e-05, "loss": 0.2893, "step": 6310 }, { "epoch": 0.49, "learning_rate": 1.5079414512612896e-05, "loss": 0.3511, "step": 6320 }, { "epoch": 0.49, "learning_rate": 1.5071628776082218e-05, "loss": 0.378, "step": 6330 }, { "epoch": 0.49, "learning_rate": 1.5063843039551542e-05, "loss": 0.3159, "step": 6340 }, { "epoch": 0.49, "learning_rate": 1.5056057303020867e-05, "loss": 0.2108, "step": 6350 }, { "epoch": 0.5, "learning_rate": 1.504827156649019e-05, "loss": 0.2183, "step": 6360 }, { "epoch": 0.5, "learning_rate": 1.5040485829959514e-05, "loss": 0.2395, "step": 6370 }, { "epoch": 0.5, "learning_rate": 1.503270009342884e-05, "loss": 0.3082, "step": 6380 }, { "epoch": 0.5, "learning_rate": 1.5024914356898163e-05, "loss": 0.2743, "step": 6390 }, { "epoch": 0.5, "learning_rate": 1.5017128620367487e-05, "loss": 0.3109, "step": 6400 }, { "epoch": 0.5, "learning_rate": 1.5009342883836812e-05, "loss": 0.2441, "step": 6410 }, { "epoch": 0.5, "learning_rate": 1.5001557147306136e-05, "loss": 0.2637, "step": 6420 }, { "epoch": 0.5, "learning_rate": 1.499377141077546e-05, "loss": 0.2905, "step": 6430 }, { "epoch": 0.5, "learning_rate": 1.4985985674244785e-05, "loss": 0.2632, "step": 6440 }, { "epoch": 0.5, "learning_rate": 1.4978199937714108e-05, "loss": 0.217, "step": 6450 }, { "epoch": 0.5, "learning_rate": 1.4970414201183433e-05, "loss": 0.2873, "step": 6460 }, { "epoch": 0.5, "learning_rate": 1.4962628464652757e-05, "loss": 0.279, "step": 6470 }, { "epoch": 0.5, "learning_rate": 1.495484272812208e-05, "loss": 0.2589, "step": 6480 }, { "epoch": 0.51, "learning_rate": 1.4947056991591406e-05, "loss": 0.3188, "step": 6490 }, { "epoch": 0.51, "learning_rate": 1.493927125506073e-05, "loss": 0.2329, "step": 6500 }, { "epoch": 0.51, "learning_rate": 1.4931485518530053e-05, "loss": 0.2875, "step": 6510 }, { "epoch": 0.51, "learning_rate": 1.4923699781999378e-05, "loss": 0.2437, "step": 6520 }, { "epoch": 0.51, "learning_rate": 1.4915914045468702e-05, "loss": 0.2798, "step": 6530 }, { "epoch": 0.51, "learning_rate": 1.4908128308938026e-05, "loss": 0.2659, "step": 6540 }, { "epoch": 0.51, "learning_rate": 1.4900342572407351e-05, "loss": 0.2605, "step": 6550 }, { "epoch": 0.51, "learning_rate": 1.4892556835876675e-05, "loss": 0.3059, "step": 6560 }, { "epoch": 0.51, "learning_rate": 1.4884771099346e-05, "loss": 0.1881, "step": 6570 }, { "epoch": 0.51, "learning_rate": 1.4876985362815324e-05, "loss": 0.268, "step": 6580 }, { "epoch": 0.51, "learning_rate": 1.4869199626284647e-05, "loss": 0.2027, "step": 6590 }, { "epoch": 0.51, "learning_rate": 1.4861413889753972e-05, "loss": 0.2267, "step": 6600 }, { "epoch": 0.51, "learning_rate": 1.4853628153223296e-05, "loss": 0.2685, "step": 6610 }, { "epoch": 0.52, "learning_rate": 1.484584241669262e-05, "loss": 0.2352, "step": 6620 }, { "epoch": 0.52, "learning_rate": 1.4838056680161945e-05, "loss": 0.2105, "step": 6630 }, { "epoch": 0.52, "learning_rate": 1.4830270943631269e-05, "loss": 0.2554, "step": 6640 }, { "epoch": 0.52, "learning_rate": 1.4822485207100592e-05, "loss": 0.3014, "step": 6650 }, { "epoch": 0.52, "learning_rate": 1.4814699470569917e-05, "loss": 0.2456, "step": 6660 }, { "epoch": 0.52, "learning_rate": 1.4806913734039241e-05, "loss": 0.2588, "step": 6670 }, { "epoch": 0.52, "learning_rate": 1.4799127997508565e-05, "loss": 0.233, "step": 6680 }, { "epoch": 0.52, "learning_rate": 1.479134226097789e-05, "loss": 0.2118, "step": 6690 }, { "epoch": 0.52, "learning_rate": 1.4783556524447214e-05, "loss": 0.3248, "step": 6700 }, { "epoch": 0.52, "learning_rate": 1.4775770787916539e-05, "loss": 0.2562, "step": 6710 }, { "epoch": 0.52, "learning_rate": 1.4767985051385862e-05, "loss": 0.3039, "step": 6720 }, { "epoch": 0.52, "learning_rate": 1.4760199314855186e-05, "loss": 0.2707, "step": 6730 }, { "epoch": 0.52, "learning_rate": 1.4752413578324511e-05, "loss": 0.2062, "step": 6740 }, { "epoch": 0.53, "learning_rate": 1.4744627841793835e-05, "loss": 0.2664, "step": 6750 }, { "epoch": 0.53, "learning_rate": 1.4736842105263159e-05, "loss": 0.2372, "step": 6760 }, { "epoch": 0.53, "learning_rate": 1.4729056368732484e-05, "loss": 0.3098, "step": 6770 }, { "epoch": 0.53, "learning_rate": 1.4721270632201807e-05, "loss": 0.2494, "step": 6780 }, { "epoch": 0.53, "learning_rate": 1.4713484895671131e-05, "loss": 0.3575, "step": 6790 }, { "epoch": 0.53, "learning_rate": 1.4705699159140456e-05, "loss": 0.2152, "step": 6800 }, { "epoch": 0.53, "learning_rate": 1.469791342260978e-05, "loss": 0.2933, "step": 6810 }, { "epoch": 0.53, "learning_rate": 1.4690127686079105e-05, "loss": 0.2765, "step": 6820 }, { "epoch": 0.53, "learning_rate": 1.4682341949548429e-05, "loss": 0.2998, "step": 6830 }, { "epoch": 0.53, "learning_rate": 1.4674556213017752e-05, "loss": 0.2421, "step": 6840 }, { "epoch": 0.53, "learning_rate": 1.4666770476487078e-05, "loss": 0.2567, "step": 6850 }, { "epoch": 0.53, "learning_rate": 1.4658984739956401e-05, "loss": 0.2217, "step": 6860 }, { "epoch": 0.53, "learning_rate": 1.4651199003425725e-05, "loss": 0.2064, "step": 6870 }, { "epoch": 0.54, "learning_rate": 1.464341326689505e-05, "loss": 0.2982, "step": 6880 }, { "epoch": 0.54, "learning_rate": 1.4635627530364374e-05, "loss": 0.2313, "step": 6890 }, { "epoch": 0.54, "learning_rate": 1.4627841793833698e-05, "loss": 0.3445, "step": 6900 }, { "epoch": 0.54, "learning_rate": 1.4620056057303023e-05, "loss": 0.3044, "step": 6910 }, { "epoch": 0.54, "learning_rate": 1.4612270320772346e-05, "loss": 0.2654, "step": 6920 }, { "epoch": 0.54, "learning_rate": 1.460448458424167e-05, "loss": 0.2996, "step": 6930 }, { "epoch": 0.54, "learning_rate": 1.4596698847710995e-05, "loss": 0.2435, "step": 6940 }, { "epoch": 0.54, "learning_rate": 1.4588913111180319e-05, "loss": 0.348, "step": 6950 }, { "epoch": 0.54, "learning_rate": 1.4581127374649644e-05, "loss": 0.2329, "step": 6960 }, { "epoch": 0.54, "learning_rate": 1.4573341638118968e-05, "loss": 0.2526, "step": 6970 }, { "epoch": 0.54, "learning_rate": 1.4565555901588291e-05, "loss": 0.1841, "step": 6980 }, { "epoch": 0.54, "learning_rate": 1.4557770165057617e-05, "loss": 0.2716, "step": 6990 }, { "epoch": 0.55, "learning_rate": 1.454998442852694e-05, "loss": 0.2528, "step": 7000 }, { "epoch": 0.55, "learning_rate": 1.4542198691996264e-05, "loss": 0.2268, "step": 7010 }, { "epoch": 0.55, "learning_rate": 1.453441295546559e-05, "loss": 0.2281, "step": 7020 }, { "epoch": 0.55, "learning_rate": 1.4526627218934913e-05, "loss": 0.2526, "step": 7030 }, { "epoch": 0.55, "learning_rate": 1.4518841482404236e-05, "loss": 0.3006, "step": 7040 }, { "epoch": 0.55, "learning_rate": 1.4511055745873562e-05, "loss": 0.3228, "step": 7050 }, { "epoch": 0.55, "learning_rate": 1.4503270009342885e-05, "loss": 0.2666, "step": 7060 }, { "epoch": 0.55, "learning_rate": 1.449548427281221e-05, "loss": 0.2406, "step": 7070 }, { "epoch": 0.55, "learning_rate": 1.4487698536281534e-05, "loss": 0.2733, "step": 7080 }, { "epoch": 0.55, "learning_rate": 1.4479912799750858e-05, "loss": 0.2883, "step": 7090 }, { "epoch": 0.55, "learning_rate": 1.4472127063220183e-05, "loss": 0.2873, "step": 7100 }, { "epoch": 0.55, "learning_rate": 1.4464341326689507e-05, "loss": 0.2031, "step": 7110 }, { "epoch": 0.55, "learning_rate": 1.4456555590158829e-05, "loss": 0.2494, "step": 7120 }, { "epoch": 0.56, "learning_rate": 1.4448769853628156e-05, "loss": 0.2951, "step": 7130 }, { "epoch": 0.56, "learning_rate": 1.444098411709748e-05, "loss": 0.3056, "step": 7140 }, { "epoch": 0.56, "learning_rate": 1.4433198380566801e-05, "loss": 0.3215, "step": 7150 }, { "epoch": 0.56, "learning_rate": 1.4425412644036128e-05, "loss": 0.2782, "step": 7160 }, { "epoch": 0.56, "learning_rate": 1.441762690750545e-05, "loss": 0.28, "step": 7170 }, { "epoch": 0.56, "learning_rate": 1.4409841170974774e-05, "loss": 0.2594, "step": 7180 }, { "epoch": 0.56, "learning_rate": 1.44020554344441e-05, "loss": 0.2574, "step": 7190 }, { "epoch": 0.56, "learning_rate": 1.4394269697913423e-05, "loss": 0.2778, "step": 7200 }, { "epoch": 0.56, "learning_rate": 1.438648396138275e-05, "loss": 0.3185, "step": 7210 }, { "epoch": 0.56, "learning_rate": 1.4378698224852072e-05, "loss": 0.2715, "step": 7220 }, { "epoch": 0.56, "learning_rate": 1.4370912488321395e-05, "loss": 0.2555, "step": 7230 }, { "epoch": 0.56, "learning_rate": 1.4363126751790722e-05, "loss": 0.2159, "step": 7240 }, { "epoch": 0.56, "learning_rate": 1.4355341015260044e-05, "loss": 0.3302, "step": 7250 }, { "epoch": 0.57, "learning_rate": 1.4347555278729368e-05, "loss": 0.1962, "step": 7260 }, { "epoch": 0.57, "learning_rate": 1.4339769542198693e-05, "loss": 0.223, "step": 7270 }, { "epoch": 0.57, "learning_rate": 1.4331983805668017e-05, "loss": 0.2492, "step": 7280 }, { "epoch": 0.57, "learning_rate": 1.432419806913734e-05, "loss": 0.2742, "step": 7290 }, { "epoch": 0.57, "learning_rate": 1.4316412332606665e-05, "loss": 0.2691, "step": 7300 }, { "epoch": 0.57, "learning_rate": 1.4308626596075989e-05, "loss": 0.2728, "step": 7310 }, { "epoch": 0.57, "learning_rate": 1.4300840859545316e-05, "loss": 0.2205, "step": 7320 }, { "epoch": 0.57, "learning_rate": 1.4293055123014638e-05, "loss": 0.2697, "step": 7330 }, { "epoch": 0.57, "learning_rate": 1.4285269386483962e-05, "loss": 0.2413, "step": 7340 }, { "epoch": 0.57, "learning_rate": 1.4277483649953287e-05, "loss": 0.2632, "step": 7350 }, { "epoch": 0.57, "learning_rate": 1.426969791342261e-05, "loss": 0.2231, "step": 7360 }, { "epoch": 0.57, "learning_rate": 1.4261912176891934e-05, "loss": 0.2651, "step": 7370 }, { "epoch": 0.57, "learning_rate": 1.425412644036126e-05, "loss": 0.3421, "step": 7380 }, { "epoch": 0.58, "learning_rate": 1.4246340703830583e-05, "loss": 0.2889, "step": 7390 }, { "epoch": 0.58, "learning_rate": 1.4238554967299907e-05, "loss": 0.2374, "step": 7400 }, { "epoch": 0.58, "learning_rate": 1.4230769230769232e-05, "loss": 0.3111, "step": 7410 }, { "epoch": 0.58, "learning_rate": 1.4222983494238555e-05, "loss": 0.2267, "step": 7420 }, { "epoch": 0.58, "learning_rate": 1.4215197757707879e-05, "loss": 0.3142, "step": 7430 }, { "epoch": 0.58, "learning_rate": 1.4207412021177204e-05, "loss": 0.2473, "step": 7440 }, { "epoch": 0.58, "learning_rate": 1.4199626284646528e-05, "loss": 0.3039, "step": 7450 }, { "epoch": 0.58, "learning_rate": 1.4191840548115853e-05, "loss": 0.2859, "step": 7460 }, { "epoch": 0.58, "learning_rate": 1.4184054811585177e-05, "loss": 0.1975, "step": 7470 }, { "epoch": 0.58, "learning_rate": 1.41762690750545e-05, "loss": 0.247, "step": 7480 }, { "epoch": 0.58, "learning_rate": 1.4168483338523826e-05, "loss": 0.1781, "step": 7490 }, { "epoch": 0.58, "learning_rate": 1.416069760199315e-05, "loss": 0.2485, "step": 7500 }, { "epoch": 0.58, "learning_rate": 1.4152911865462473e-05, "loss": 0.3146, "step": 7510 }, { "epoch": 0.59, "learning_rate": 1.4145126128931798e-05, "loss": 0.2644, "step": 7520 }, { "epoch": 0.59, "learning_rate": 1.4137340392401122e-05, "loss": 0.2583, "step": 7530 }, { "epoch": 0.59, "learning_rate": 1.4129554655870446e-05, "loss": 0.2273, "step": 7540 }, { "epoch": 0.59, "learning_rate": 1.412176891933977e-05, "loss": 0.2436, "step": 7550 }, { "epoch": 0.59, "learning_rate": 1.4113983182809094e-05, "loss": 0.2849, "step": 7560 }, { "epoch": 0.59, "learning_rate": 1.410619744627842e-05, "loss": 0.2887, "step": 7570 }, { "epoch": 0.59, "learning_rate": 1.4098411709747743e-05, "loss": 0.2867, "step": 7580 }, { "epoch": 0.59, "learning_rate": 1.4090625973217067e-05, "loss": 0.2833, "step": 7590 }, { "epoch": 0.59, "learning_rate": 1.4082840236686392e-05, "loss": 0.2558, "step": 7600 }, { "epoch": 0.59, "learning_rate": 1.4075054500155716e-05, "loss": 0.2488, "step": 7610 }, { "epoch": 0.59, "learning_rate": 1.406726876362504e-05, "loss": 0.2157, "step": 7620 }, { "epoch": 0.59, "learning_rate": 1.4059483027094365e-05, "loss": 0.2722, "step": 7630 }, { "epoch": 0.59, "learning_rate": 1.4051697290563688e-05, "loss": 0.2399, "step": 7640 }, { "epoch": 0.6, "learning_rate": 1.4043911554033012e-05, "loss": 0.254, "step": 7650 }, { "epoch": 0.6, "learning_rate": 1.4036125817502337e-05, "loss": 0.2834, "step": 7660 }, { "epoch": 0.6, "learning_rate": 1.4028340080971661e-05, "loss": 0.2653, "step": 7670 }, { "epoch": 0.6, "learning_rate": 1.4020554344440984e-05, "loss": 0.3492, "step": 7680 }, { "epoch": 0.6, "learning_rate": 1.401276860791031e-05, "loss": 0.2323, "step": 7690 }, { "epoch": 0.6, "learning_rate": 1.4004982871379633e-05, "loss": 0.2731, "step": 7700 }, { "epoch": 0.6, "learning_rate": 1.3997197134848959e-05, "loss": 0.2051, "step": 7710 }, { "epoch": 0.6, "learning_rate": 1.3989411398318282e-05, "loss": 0.2671, "step": 7720 }, { "epoch": 0.6, "learning_rate": 1.3981625661787606e-05, "loss": 0.2759, "step": 7730 }, { "epoch": 0.6, "learning_rate": 1.3973839925256931e-05, "loss": 0.2552, "step": 7740 }, { "epoch": 0.6, "learning_rate": 1.3966054188726255e-05, "loss": 0.2265, "step": 7750 }, { "epoch": 0.6, "learning_rate": 1.3958268452195578e-05, "loss": 0.2216, "step": 7760 }, { "epoch": 0.6, "learning_rate": 1.3950482715664904e-05, "loss": 0.2281, "step": 7770 }, { "epoch": 0.61, "learning_rate": 1.3942696979134227e-05, "loss": 0.1875, "step": 7780 }, { "epoch": 0.61, "learning_rate": 1.3934911242603551e-05, "loss": 0.2821, "step": 7790 }, { "epoch": 0.61, "learning_rate": 1.3927125506072876e-05, "loss": 0.2133, "step": 7800 }, { "epoch": 0.61, "learning_rate": 1.39193397695422e-05, "loss": 0.2294, "step": 7810 }, { "epoch": 0.61, "learning_rate": 1.3911554033011525e-05, "loss": 0.2857, "step": 7820 }, { "epoch": 0.61, "learning_rate": 1.3903768296480849e-05, "loss": 0.2717, "step": 7830 }, { "epoch": 0.61, "learning_rate": 1.3895982559950172e-05, "loss": 0.2796, "step": 7840 }, { "epoch": 0.61, "learning_rate": 1.3888196823419498e-05, "loss": 0.268, "step": 7850 }, { "epoch": 0.61, "learning_rate": 1.3880411086888821e-05, "loss": 0.2628, "step": 7860 }, { "epoch": 0.61, "learning_rate": 1.3872625350358145e-05, "loss": 0.2389, "step": 7870 }, { "epoch": 0.61, "learning_rate": 1.386483961382747e-05, "loss": 0.2932, "step": 7880 }, { "epoch": 0.61, "learning_rate": 1.3857053877296794e-05, "loss": 0.2538, "step": 7890 }, { "epoch": 0.62, "learning_rate": 1.3849268140766117e-05, "loss": 0.2351, "step": 7900 }, { "epoch": 0.62, "learning_rate": 1.3841482404235443e-05, "loss": 0.2099, "step": 7910 }, { "epoch": 0.62, "learning_rate": 1.3833696667704766e-05, "loss": 0.1891, "step": 7920 }, { "epoch": 0.62, "learning_rate": 1.382591093117409e-05, "loss": 0.2529, "step": 7930 }, { "epoch": 0.62, "learning_rate": 1.3818125194643415e-05, "loss": 0.2546, "step": 7940 }, { "epoch": 0.62, "learning_rate": 1.3810339458112739e-05, "loss": 0.2655, "step": 7950 }, { "epoch": 0.62, "learning_rate": 1.3802553721582064e-05, "loss": 0.243, "step": 7960 }, { "epoch": 0.62, "learning_rate": 1.3794767985051388e-05, "loss": 0.2579, "step": 7970 }, { "epoch": 0.62, "learning_rate": 1.3786982248520711e-05, "loss": 0.2958, "step": 7980 }, { "epoch": 0.62, "learning_rate": 1.3779196511990037e-05, "loss": 0.2424, "step": 7990 }, { "epoch": 0.62, "learning_rate": 1.377141077545936e-05, "loss": 0.2509, "step": 8000 }, { "epoch": 0.62, "learning_rate": 1.3763625038928684e-05, "loss": 0.2683, "step": 8010 }, { "epoch": 0.62, "learning_rate": 1.3755839302398009e-05, "loss": 0.1821, "step": 8020 }, { "epoch": 0.63, "learning_rate": 1.3748053565867333e-05, "loss": 0.2214, "step": 8030 }, { "epoch": 0.63, "learning_rate": 1.3740267829336655e-05, "loss": 0.258, "step": 8040 }, { "epoch": 0.63, "learning_rate": 1.3732482092805982e-05, "loss": 0.2458, "step": 8050 }, { "epoch": 0.63, "learning_rate": 1.3724696356275305e-05, "loss": 0.2039, "step": 8060 }, { "epoch": 0.63, "learning_rate": 1.371691061974463e-05, "loss": 0.2078, "step": 8070 }, { "epoch": 0.63, "learning_rate": 1.3709124883213954e-05, "loss": 0.2607, "step": 8080 }, { "epoch": 0.63, "learning_rate": 1.3701339146683276e-05, "loss": 0.2988, "step": 8090 }, { "epoch": 0.63, "learning_rate": 1.3693553410152603e-05, "loss": 0.2406, "step": 8100 }, { "epoch": 0.63, "learning_rate": 1.3685767673621927e-05, "loss": 0.2635, "step": 8110 }, { "epoch": 0.63, "learning_rate": 1.3677981937091248e-05, "loss": 0.2343, "step": 8120 }, { "epoch": 0.63, "learning_rate": 1.3670196200560575e-05, "loss": 0.2251, "step": 8130 }, { "epoch": 0.63, "learning_rate": 1.3662410464029897e-05, "loss": 0.2247, "step": 8140 }, { "epoch": 0.63, "learning_rate": 1.3654624727499221e-05, "loss": 0.2332, "step": 8150 }, { "epoch": 0.64, "learning_rate": 1.3646838990968548e-05, "loss": 0.2405, "step": 8160 }, { "epoch": 0.64, "learning_rate": 1.363905325443787e-05, "loss": 0.3167, "step": 8170 }, { "epoch": 0.64, "learning_rate": 1.3631267517907194e-05, "loss": 0.293, "step": 8180 }, { "epoch": 0.64, "learning_rate": 1.362348178137652e-05, "loss": 0.2745, "step": 8190 }, { "epoch": 0.64, "learning_rate": 1.3615696044845842e-05, "loss": 0.201, "step": 8200 }, { "epoch": 0.64, "learning_rate": 1.360791030831517e-05, "loss": 0.2546, "step": 8210 }, { "epoch": 0.64, "learning_rate": 1.3600124571784491e-05, "loss": 0.219, "step": 8220 }, { "epoch": 0.64, "learning_rate": 1.3592338835253815e-05, "loss": 0.2208, "step": 8230 }, { "epoch": 0.64, "learning_rate": 1.3584553098723142e-05, "loss": 0.2707, "step": 8240 }, { "epoch": 0.64, "learning_rate": 1.3576767362192464e-05, "loss": 0.3258, "step": 8250 }, { "epoch": 0.64, "learning_rate": 1.3568981625661787e-05, "loss": 0.2519, "step": 8260 }, { "epoch": 0.64, "learning_rate": 1.3561195889131113e-05, "loss": 0.236, "step": 8270 }, { "epoch": 0.64, "learning_rate": 1.3553410152600436e-05, "loss": 0.2893, "step": 8280 }, { "epoch": 0.65, "learning_rate": 1.354562441606976e-05, "loss": 0.2164, "step": 8290 }, { "epoch": 0.65, "learning_rate": 1.3537838679539085e-05, "loss": 0.2337, "step": 8300 }, { "epoch": 0.65, "learning_rate": 1.3530052943008409e-05, "loss": 0.2531, "step": 8310 }, { "epoch": 0.65, "learning_rate": 1.3522267206477734e-05, "loss": 0.2735, "step": 8320 }, { "epoch": 0.65, "learning_rate": 1.3514481469947058e-05, "loss": 0.2704, "step": 8330 }, { "epoch": 0.65, "learning_rate": 1.3506695733416381e-05, "loss": 0.2442, "step": 8340 }, { "epoch": 0.65, "learning_rate": 1.3498909996885707e-05, "loss": 0.2818, "step": 8350 }, { "epoch": 0.65, "learning_rate": 1.349112426035503e-05, "loss": 0.2394, "step": 8360 }, { "epoch": 0.65, "learning_rate": 1.3483338523824354e-05, "loss": 0.2207, "step": 8370 }, { "epoch": 0.65, "learning_rate": 1.347555278729368e-05, "loss": 0.2092, "step": 8380 }, { "epoch": 0.65, "learning_rate": 1.3467767050763003e-05, "loss": 0.2016, "step": 8390 }, { "epoch": 0.65, "learning_rate": 1.3459981314232326e-05, "loss": 0.2382, "step": 8400 }, { "epoch": 0.65, "learning_rate": 1.3452195577701652e-05, "loss": 0.2259, "step": 8410 }, { "epoch": 0.66, "learning_rate": 1.3444409841170975e-05, "loss": 0.2597, "step": 8420 }, { "epoch": 0.66, "learning_rate": 1.3436624104640299e-05, "loss": 0.1858, "step": 8430 }, { "epoch": 0.66, "learning_rate": 1.3428838368109624e-05, "loss": 0.2511, "step": 8440 }, { "epoch": 0.66, "learning_rate": 1.3421052631578948e-05, "loss": 0.2191, "step": 8450 }, { "epoch": 0.66, "learning_rate": 1.3413266895048273e-05, "loss": 0.259, "step": 8460 }, { "epoch": 0.66, "learning_rate": 1.3405481158517597e-05, "loss": 0.2004, "step": 8470 }, { "epoch": 0.66, "learning_rate": 1.339769542198692e-05, "loss": 0.2483, "step": 8480 }, { "epoch": 0.66, "learning_rate": 1.3389909685456246e-05, "loss": 0.214, "step": 8490 }, { "epoch": 0.66, "learning_rate": 1.338212394892557e-05, "loss": 0.2958, "step": 8500 }, { "epoch": 0.66, "learning_rate": 1.3374338212394893e-05, "loss": 0.3244, "step": 8510 }, { "epoch": 0.66, "learning_rate": 1.3366552475864218e-05, "loss": 0.2065, "step": 8520 }, { "epoch": 0.66, "learning_rate": 1.3358766739333542e-05, "loss": 0.249, "step": 8530 }, { "epoch": 0.66, "learning_rate": 1.3350981002802865e-05, "loss": 0.2386, "step": 8540 }, { "epoch": 0.67, "learning_rate": 1.334319526627219e-05, "loss": 0.1867, "step": 8550 }, { "epoch": 0.67, "learning_rate": 1.3335409529741514e-05, "loss": 0.2001, "step": 8560 }, { "epoch": 0.67, "learning_rate": 1.332762379321084e-05, "loss": 0.2559, "step": 8570 }, { "epoch": 0.67, "learning_rate": 1.3319838056680163e-05, "loss": 0.2514, "step": 8580 }, { "epoch": 0.67, "learning_rate": 1.3312052320149487e-05, "loss": 0.2386, "step": 8590 }, { "epoch": 0.67, "learning_rate": 1.3304266583618812e-05, "loss": 0.3099, "step": 8600 }, { "epoch": 0.67, "learning_rate": 1.3296480847088136e-05, "loss": 0.1941, "step": 8610 }, { "epoch": 0.67, "learning_rate": 1.328869511055746e-05, "loss": 0.1931, "step": 8620 }, { "epoch": 0.67, "learning_rate": 1.3280909374026785e-05, "loss": 0.2676, "step": 8630 }, { "epoch": 0.67, "learning_rate": 1.3273123637496108e-05, "loss": 0.2689, "step": 8640 }, { "epoch": 0.67, "learning_rate": 1.3265337900965432e-05, "loss": 0.2499, "step": 8650 }, { "epoch": 0.67, "learning_rate": 1.3257552164434757e-05, "loss": 0.1597, "step": 8660 }, { "epoch": 0.68, "learning_rate": 1.324976642790408e-05, "loss": 0.2385, "step": 8670 }, { "epoch": 0.68, "learning_rate": 1.3241980691373404e-05, "loss": 0.3206, "step": 8680 }, { "epoch": 0.68, "learning_rate": 1.323419495484273e-05, "loss": 0.2182, "step": 8690 }, { "epoch": 0.68, "learning_rate": 1.3226409218312053e-05, "loss": 0.2932, "step": 8700 }, { "epoch": 0.68, "learning_rate": 1.3218623481781378e-05, "loss": 0.1684, "step": 8710 }, { "epoch": 0.68, "learning_rate": 1.3210837745250702e-05, "loss": 0.2822, "step": 8720 }, { "epoch": 0.68, "learning_rate": 1.3203052008720026e-05, "loss": 0.2572, "step": 8730 }, { "epoch": 0.68, "learning_rate": 1.3195266272189351e-05, "loss": 0.2549, "step": 8740 }, { "epoch": 0.68, "learning_rate": 1.3187480535658675e-05, "loss": 0.2183, "step": 8750 }, { "epoch": 0.68, "learning_rate": 1.3179694799127998e-05, "loss": 0.3112, "step": 8760 }, { "epoch": 0.68, "learning_rate": 1.3171909062597323e-05, "loss": 0.2417, "step": 8770 }, { "epoch": 0.68, "learning_rate": 1.3164123326066647e-05, "loss": 0.2393, "step": 8780 }, { "epoch": 0.68, "learning_rate": 1.315633758953597e-05, "loss": 0.2481, "step": 8790 }, { "epoch": 0.69, "learning_rate": 1.3148551853005296e-05, "loss": 0.2245, "step": 8800 }, { "epoch": 0.69, "learning_rate": 1.314076611647462e-05, "loss": 0.2221, "step": 8810 }, { "epoch": 0.69, "learning_rate": 1.3132980379943945e-05, "loss": 0.2406, "step": 8820 }, { "epoch": 0.69, "learning_rate": 1.3125194643413269e-05, "loss": 0.3024, "step": 8830 }, { "epoch": 0.69, "learning_rate": 1.3117408906882592e-05, "loss": 0.2108, "step": 8840 }, { "epoch": 0.69, "learning_rate": 1.3109623170351917e-05, "loss": 0.281, "step": 8850 }, { "epoch": 0.69, "learning_rate": 1.3101837433821241e-05, "loss": 0.2789, "step": 8860 }, { "epoch": 0.69, "learning_rate": 1.3094051697290565e-05, "loss": 0.1919, "step": 8870 }, { "epoch": 0.69, "learning_rate": 1.308626596075989e-05, "loss": 0.3249, "step": 8880 }, { "epoch": 0.69, "learning_rate": 1.3078480224229214e-05, "loss": 0.2109, "step": 8890 }, { "epoch": 0.69, "learning_rate": 1.3070694487698537e-05, "loss": 0.3476, "step": 8900 }, { "epoch": 0.69, "learning_rate": 1.3062908751167862e-05, "loss": 0.2504, "step": 8910 }, { "epoch": 0.69, "learning_rate": 1.3055123014637186e-05, "loss": 0.2318, "step": 8920 }, { "epoch": 0.7, "learning_rate": 1.304733727810651e-05, "loss": 0.196, "step": 8930 }, { "epoch": 0.7, "learning_rate": 1.3039551541575835e-05, "loss": 0.2634, "step": 8940 }, { "epoch": 0.7, "learning_rate": 1.3031765805045159e-05, "loss": 0.2615, "step": 8950 }, { "epoch": 0.7, "learning_rate": 1.3023980068514484e-05, "loss": 0.1575, "step": 8960 }, { "epoch": 0.7, "learning_rate": 1.3016194331983807e-05, "loss": 0.2671, "step": 8970 }, { "epoch": 0.7, "learning_rate": 1.3008408595453131e-05, "loss": 0.2605, "step": 8980 }, { "epoch": 0.7, "learning_rate": 1.3000622858922456e-05, "loss": 0.2248, "step": 8990 }, { "epoch": 0.7, "learning_rate": 1.299283712239178e-05, "loss": 0.3365, "step": 9000 }, { "epoch": 0.7, "learning_rate": 1.2985051385861102e-05, "loss": 0.1897, "step": 9010 }, { "epoch": 0.7, "learning_rate": 1.2977265649330429e-05, "loss": 0.1951, "step": 9020 }, { "epoch": 0.7, "learning_rate": 1.2969479912799752e-05, "loss": 0.2807, "step": 9030 }, { "epoch": 0.7, "learning_rate": 1.2961694176269074e-05, "loss": 0.2551, "step": 9040 }, { "epoch": 0.7, "learning_rate": 1.2953908439738401e-05, "loss": 0.1983, "step": 9050 }, { "epoch": 0.71, "learning_rate": 1.2946122703207723e-05, "loss": 0.2754, "step": 9060 }, { "epoch": 0.71, "learning_rate": 1.293833696667705e-05, "loss": 0.2593, "step": 9070 }, { "epoch": 0.71, "learning_rate": 1.2930551230146374e-05, "loss": 0.1994, "step": 9080 }, { "epoch": 0.71, "learning_rate": 1.2922765493615696e-05, "loss": 0.2162, "step": 9090 }, { "epoch": 0.71, "learning_rate": 1.2914979757085023e-05, "loss": 0.2505, "step": 9100 }, { "epoch": 0.71, "learning_rate": 1.2907194020554346e-05, "loss": 0.2122, "step": 9110 }, { "epoch": 0.71, "learning_rate": 1.2899408284023668e-05, "loss": 0.2839, "step": 9120 }, { "epoch": 0.71, "learning_rate": 1.2891622547492995e-05, "loss": 0.2256, "step": 9130 }, { "epoch": 0.71, "learning_rate": 1.2883836810962317e-05, "loss": 0.2733, "step": 9140 }, { "epoch": 0.71, "learning_rate": 1.287605107443164e-05, "loss": 0.2434, "step": 9150 }, { "epoch": 0.71, "learning_rate": 1.2868265337900968e-05, "loss": 0.2805, "step": 9160 }, { "epoch": 0.71, "learning_rate": 1.286047960137029e-05, "loss": 0.235, "step": 9170 }, { "epoch": 0.71, "learning_rate": 1.2852693864839613e-05, "loss": 0.3376, "step": 9180 }, { "epoch": 0.72, "learning_rate": 1.2844908128308939e-05, "loss": 0.2328, "step": 9190 }, { "epoch": 0.72, "learning_rate": 1.2837122391778262e-05, "loss": 0.2354, "step": 9200 }, { "epoch": 0.72, "learning_rate": 1.282933665524759e-05, "loss": 0.1856, "step": 9210 }, { "epoch": 0.72, "learning_rate": 1.2821550918716911e-05, "loss": 0.2262, "step": 9220 }, { "epoch": 0.72, "learning_rate": 1.2813765182186235e-05, "loss": 0.2725, "step": 9230 }, { "epoch": 0.72, "learning_rate": 1.280597944565556e-05, "loss": 0.2022, "step": 9240 }, { "epoch": 0.72, "learning_rate": 1.2798193709124884e-05, "loss": 0.2515, "step": 9250 }, { "epoch": 0.72, "learning_rate": 1.2790407972594207e-05, "loss": 0.2786, "step": 9260 }, { "epoch": 0.72, "learning_rate": 1.2782622236063533e-05, "loss": 0.1973, "step": 9270 }, { "epoch": 0.72, "learning_rate": 1.2774836499532856e-05, "loss": 0.275, "step": 9280 }, { "epoch": 0.72, "learning_rate": 1.276705076300218e-05, "loss": 0.1834, "step": 9290 }, { "epoch": 0.72, "learning_rate": 1.2759265026471505e-05, "loss": 0.2149, "step": 9300 }, { "epoch": 0.72, "learning_rate": 1.2751479289940829e-05, "loss": 0.2387, "step": 9310 }, { "epoch": 0.73, "learning_rate": 1.2743693553410154e-05, "loss": 0.195, "step": 9320 }, { "epoch": 0.73, "learning_rate": 1.2735907816879478e-05, "loss": 0.2405, "step": 9330 }, { "epoch": 0.73, "learning_rate": 1.2728122080348801e-05, "loss": 0.2592, "step": 9340 }, { "epoch": 0.73, "learning_rate": 1.2720336343818126e-05, "loss": 0.3315, "step": 9350 }, { "epoch": 0.73, "learning_rate": 1.271255060728745e-05, "loss": 0.1888, "step": 9360 }, { "epoch": 0.73, "learning_rate": 1.2704764870756774e-05, "loss": 0.2336, "step": 9370 }, { "epoch": 0.73, "learning_rate": 1.2696979134226099e-05, "loss": 0.1942, "step": 9380 }, { "epoch": 0.73, "learning_rate": 1.2689193397695423e-05, "loss": 0.1785, "step": 9390 }, { "epoch": 0.73, "learning_rate": 1.2681407661164746e-05, "loss": 0.3373, "step": 9400 }, { "epoch": 0.73, "learning_rate": 1.2673621924634071e-05, "loss": 0.2567, "step": 9410 }, { "epoch": 0.73, "learning_rate": 1.2665836188103395e-05, "loss": 0.1999, "step": 9420 }, { "epoch": 0.73, "learning_rate": 1.2658050451572719e-05, "loss": 0.2082, "step": 9430 }, { "epoch": 0.73, "learning_rate": 1.2650264715042044e-05, "loss": 0.2549, "step": 9440 }, { "epoch": 0.74, "learning_rate": 1.2642478978511368e-05, "loss": 0.2606, "step": 9450 }, { "epoch": 0.74, "learning_rate": 1.2634693241980693e-05, "loss": 0.2784, "step": 9460 }, { "epoch": 0.74, "learning_rate": 1.2626907505450017e-05, "loss": 0.2298, "step": 9470 }, { "epoch": 0.74, "learning_rate": 1.261912176891934e-05, "loss": 0.2609, "step": 9480 }, { "epoch": 0.74, "learning_rate": 1.2611336032388665e-05, "loss": 0.229, "step": 9490 }, { "epoch": 0.74, "learning_rate": 1.2603550295857989e-05, "loss": 0.1955, "step": 9500 }, { "epoch": 0.74, "learning_rate": 1.2595764559327313e-05, "loss": 0.2197, "step": 9510 }, { "epoch": 0.74, "learning_rate": 1.2587978822796638e-05, "loss": 0.2211, "step": 9520 }, { "epoch": 0.74, "learning_rate": 1.2580193086265962e-05, "loss": 0.2508, "step": 9530 }, { "epoch": 0.74, "learning_rate": 1.2572407349735285e-05, "loss": 0.2029, "step": 9540 }, { "epoch": 0.74, "learning_rate": 1.256462161320461e-05, "loss": 0.2052, "step": 9550 }, { "epoch": 0.74, "learning_rate": 1.2556835876673934e-05, "loss": 0.1875, "step": 9560 }, { "epoch": 0.75, "learning_rate": 1.254905014014326e-05, "loss": 0.2552, "step": 9570 }, { "epoch": 0.75, "learning_rate": 1.2541264403612583e-05, "loss": 0.2349, "step": 9580 }, { "epoch": 0.75, "learning_rate": 1.2533478667081907e-05, "loss": 0.2103, "step": 9590 }, { "epoch": 0.75, "learning_rate": 1.2525692930551232e-05, "loss": 0.2292, "step": 9600 }, { "epoch": 0.75, "learning_rate": 1.2517907194020555e-05, "loss": 0.2763, "step": 9610 }, { "epoch": 0.75, "learning_rate": 1.2510121457489879e-05, "loss": 0.186, "step": 9620 }, { "epoch": 0.75, "learning_rate": 1.2502335720959204e-05, "loss": 0.1946, "step": 9630 }, { "epoch": 0.75, "learning_rate": 1.2494549984428528e-05, "loss": 0.2341, "step": 9640 }, { "epoch": 0.75, "learning_rate": 1.2486764247897852e-05, "loss": 0.2072, "step": 9650 }, { "epoch": 0.75, "learning_rate": 1.2478978511367177e-05, "loss": 0.2128, "step": 9660 }, { "epoch": 0.75, "learning_rate": 1.24711927748365e-05, "loss": 0.2115, "step": 9670 }, { "epoch": 0.75, "learning_rate": 1.2463407038305824e-05, "loss": 0.2096, "step": 9680 }, { "epoch": 0.75, "learning_rate": 1.245562130177515e-05, "loss": 0.2418, "step": 9690 }, { "epoch": 0.76, "learning_rate": 1.2447835565244473e-05, "loss": 0.2884, "step": 9700 }, { "epoch": 0.76, "learning_rate": 1.2440049828713798e-05, "loss": 0.3362, "step": 9710 }, { "epoch": 0.76, "learning_rate": 1.2432264092183122e-05, "loss": 0.1847, "step": 9720 }, { "epoch": 0.76, "learning_rate": 1.2424478355652445e-05, "loss": 0.2206, "step": 9730 }, { "epoch": 0.76, "learning_rate": 1.241669261912177e-05, "loss": 0.2059, "step": 9740 }, { "epoch": 0.76, "learning_rate": 1.2408906882591094e-05, "loss": 0.2209, "step": 9750 }, { "epoch": 0.76, "learning_rate": 1.2401121146060418e-05, "loss": 0.2191, "step": 9760 }, { "epoch": 0.76, "learning_rate": 1.2393335409529743e-05, "loss": 0.2439, "step": 9770 }, { "epoch": 0.76, "learning_rate": 1.2385549672999067e-05, "loss": 0.2058, "step": 9780 }, { "epoch": 0.76, "learning_rate": 1.237776393646839e-05, "loss": 0.1416, "step": 9790 }, { "epoch": 0.76, "learning_rate": 1.2369978199937716e-05, "loss": 0.2964, "step": 9800 }, { "epoch": 0.76, "learning_rate": 1.236219246340704e-05, "loss": 0.2656, "step": 9810 }, { "epoch": 0.76, "learning_rate": 1.2354406726876365e-05, "loss": 0.2732, "step": 9820 }, { "epoch": 0.77, "learning_rate": 1.2346620990345688e-05, "loss": 0.2273, "step": 9830 }, { "epoch": 0.77, "learning_rate": 1.2338835253815012e-05, "loss": 0.245, "step": 9840 }, { "epoch": 0.77, "learning_rate": 1.2331049517284337e-05, "loss": 0.1903, "step": 9850 }, { "epoch": 0.77, "learning_rate": 1.232326378075366e-05, "loss": 0.219, "step": 9860 }, { "epoch": 0.77, "learning_rate": 1.2315478044222984e-05, "loss": 0.2203, "step": 9870 }, { "epoch": 0.77, "learning_rate": 1.230769230769231e-05, "loss": 0.3253, "step": 9880 }, { "epoch": 0.77, "learning_rate": 1.2299906571161633e-05, "loss": 0.1897, "step": 9890 }, { "epoch": 0.77, "learning_rate": 1.2292120834630957e-05, "loss": 0.3111, "step": 9900 }, { "epoch": 0.77, "learning_rate": 1.2284335098100282e-05, "loss": 0.2446, "step": 9910 }, { "epoch": 0.77, "learning_rate": 1.2276549361569606e-05, "loss": 0.2078, "step": 9920 }, { "epoch": 0.77, "learning_rate": 1.2268763625038928e-05, "loss": 0.1927, "step": 9930 }, { "epoch": 0.77, "learning_rate": 1.2260977888508255e-05, "loss": 0.2107, "step": 9940 }, { "epoch": 0.77, "learning_rate": 1.2253192151977578e-05, "loss": 0.1688, "step": 9950 }, { "epoch": 0.78, "learning_rate": 1.2245406415446904e-05, "loss": 0.2085, "step": 9960 }, { "epoch": 0.78, "learning_rate": 1.2237620678916227e-05, "loss": 0.1623, "step": 9970 }, { "epoch": 0.78, "learning_rate": 1.222983494238555e-05, "loss": 0.2193, "step": 9980 }, { "epoch": 0.78, "learning_rate": 1.2222049205854876e-05, "loss": 0.2748, "step": 9990 }, { "epoch": 0.78, "learning_rate": 1.22142634693242e-05, "loss": 0.2054, "step": 10000 }, { "epoch": 0.78, "learning_rate": 1.2206477732793522e-05, "loss": 0.1815, "step": 10010 }, { "epoch": 0.78, "learning_rate": 1.2198691996262849e-05, "loss": 0.2605, "step": 10020 }, { "epoch": 0.78, "learning_rate": 1.2190906259732172e-05, "loss": 0.2719, "step": 10030 }, { "epoch": 0.78, "learning_rate": 1.2183120523201494e-05, "loss": 0.2284, "step": 10040 }, { "epoch": 0.78, "learning_rate": 1.2175334786670821e-05, "loss": 0.2895, "step": 10050 }, { "epoch": 0.78, "learning_rate": 1.2167549050140143e-05, "loss": 0.2278, "step": 10060 }, { "epoch": 0.78, "learning_rate": 1.215976331360947e-05, "loss": 0.2383, "step": 10070 }, { "epoch": 0.78, "learning_rate": 1.2151977577078794e-05, "loss": 0.2191, "step": 10080 }, { "epoch": 0.79, "learning_rate": 1.2144191840548116e-05, "loss": 0.1865, "step": 10090 }, { "epoch": 0.79, "learning_rate": 1.2136406104017443e-05, "loss": 0.3054, "step": 10100 }, { "epoch": 0.79, "learning_rate": 1.2128620367486765e-05, "loss": 0.245, "step": 10110 }, { "epoch": 0.79, "learning_rate": 1.2120834630956088e-05, "loss": 0.2497, "step": 10120 }, { "epoch": 0.79, "learning_rate": 1.2113048894425415e-05, "loss": 0.2181, "step": 10130 }, { "epoch": 0.79, "learning_rate": 1.2105263157894737e-05, "loss": 0.1896, "step": 10140 }, { "epoch": 0.79, "learning_rate": 1.209747742136406e-05, "loss": 0.2177, "step": 10150 }, { "epoch": 0.79, "learning_rate": 1.2089691684833386e-05, "loss": 0.2672, "step": 10160 }, { "epoch": 0.79, "learning_rate": 1.208190594830271e-05, "loss": 0.2449, "step": 10170 }, { "epoch": 0.79, "learning_rate": 1.2074120211772033e-05, "loss": 0.2086, "step": 10180 }, { "epoch": 0.79, "learning_rate": 1.2066334475241358e-05, "loss": 0.2144, "step": 10190 }, { "epoch": 0.79, "learning_rate": 1.2058548738710682e-05, "loss": 0.212, "step": 10200 }, { "epoch": 0.79, "learning_rate": 1.2050763002180007e-05, "loss": 0.3234, "step": 10210 }, { "epoch": 0.8, "learning_rate": 1.2042977265649331e-05, "loss": 0.23, "step": 10220 }, { "epoch": 0.8, "learning_rate": 1.2035191529118655e-05, "loss": 0.2608, "step": 10230 }, { "epoch": 0.8, "learning_rate": 1.202740579258798e-05, "loss": 0.2053, "step": 10240 }, { "epoch": 0.8, "learning_rate": 1.2019620056057303e-05, "loss": 0.1606, "step": 10250 }, { "epoch": 0.8, "learning_rate": 1.2011834319526627e-05, "loss": 0.2766, "step": 10260 }, { "epoch": 0.8, "learning_rate": 1.2004048582995952e-05, "loss": 0.2145, "step": 10270 }, { "epoch": 0.8, "learning_rate": 1.1996262846465276e-05, "loss": 0.2709, "step": 10280 }, { "epoch": 0.8, "learning_rate": 1.19884771099346e-05, "loss": 0.2038, "step": 10290 }, { "epoch": 0.8, "learning_rate": 1.1980691373403925e-05, "loss": 0.2132, "step": 10300 }, { "epoch": 0.8, "learning_rate": 1.1972905636873248e-05, "loss": 0.2122, "step": 10310 }, { "epoch": 0.8, "learning_rate": 1.1965119900342574e-05, "loss": 0.2436, "step": 10320 }, { "epoch": 0.8, "learning_rate": 1.1957334163811897e-05, "loss": 0.2474, "step": 10330 }, { "epoch": 0.81, "learning_rate": 1.1949548427281221e-05, "loss": 0.2912, "step": 10340 }, { "epoch": 0.81, "learning_rate": 1.1941762690750546e-05, "loss": 0.1962, "step": 10350 }, { "epoch": 0.81, "learning_rate": 1.193397695421987e-05, "loss": 0.1368, "step": 10360 }, { "epoch": 0.81, "learning_rate": 1.1926191217689194e-05, "loss": 0.2864, "step": 10370 }, { "epoch": 0.81, "learning_rate": 1.1918405481158519e-05, "loss": 0.152, "step": 10380 }, { "epoch": 0.81, "learning_rate": 1.1910619744627842e-05, "loss": 0.1998, "step": 10390 }, { "epoch": 0.81, "learning_rate": 1.1902834008097166e-05, "loss": 0.256, "step": 10400 }, { "epoch": 0.81, "learning_rate": 1.1895048271566491e-05, "loss": 0.1882, "step": 10410 }, { "epoch": 0.81, "learning_rate": 1.1887262535035815e-05, "loss": 0.1613, "step": 10420 }, { "epoch": 0.81, "learning_rate": 1.1879476798505139e-05, "loss": 0.257, "step": 10430 }, { "epoch": 0.81, "learning_rate": 1.1871691061974464e-05, "loss": 0.253, "step": 10440 }, { "epoch": 0.81, "learning_rate": 1.1863905325443787e-05, "loss": 0.2488, "step": 10450 }, { "epoch": 0.81, "learning_rate": 1.1856119588913113e-05, "loss": 0.2032, "step": 10460 }, { "epoch": 0.82, "learning_rate": 1.1848333852382436e-05, "loss": 0.1911, "step": 10470 }, { "epoch": 0.82, "learning_rate": 1.184054811585176e-05, "loss": 0.1919, "step": 10480 }, { "epoch": 0.82, "learning_rate": 1.1832762379321085e-05, "loss": 0.2179, "step": 10490 }, { "epoch": 0.82, "learning_rate": 1.1824976642790409e-05, "loss": 0.1749, "step": 10500 }, { "epoch": 0.82, "learning_rate": 1.1817190906259732e-05, "loss": 0.2601, "step": 10510 }, { "epoch": 0.82, "learning_rate": 1.1809405169729058e-05, "loss": 0.1821, "step": 10520 }, { "epoch": 0.82, "learning_rate": 1.1801619433198381e-05, "loss": 0.1973, "step": 10530 }, { "epoch": 0.82, "learning_rate": 1.1793833696667705e-05, "loss": 0.2062, "step": 10540 }, { "epoch": 0.82, "learning_rate": 1.178604796013703e-05, "loss": 0.2057, "step": 10550 }, { "epoch": 0.82, "learning_rate": 1.1778262223606354e-05, "loss": 0.1592, "step": 10560 }, { "epoch": 0.82, "learning_rate": 1.177047648707568e-05, "loss": 0.2175, "step": 10570 }, { "epoch": 0.82, "learning_rate": 1.1762690750545003e-05, "loss": 0.1738, "step": 10580 }, { "epoch": 0.82, "learning_rate": 1.1754905014014326e-05, "loss": 0.2378, "step": 10590 }, { "epoch": 0.83, "learning_rate": 1.1747119277483652e-05, "loss": 0.1743, "step": 10600 }, { "epoch": 0.83, "learning_rate": 1.1739333540952975e-05, "loss": 0.2354, "step": 10610 }, { "epoch": 0.83, "learning_rate": 1.1731547804422299e-05, "loss": 0.1741, "step": 10620 }, { "epoch": 0.83, "learning_rate": 1.1723762067891624e-05, "loss": 0.1684, "step": 10630 }, { "epoch": 0.83, "learning_rate": 1.1715976331360948e-05, "loss": 0.2199, "step": 10640 }, { "epoch": 0.83, "learning_rate": 1.1708190594830271e-05, "loss": 0.2124, "step": 10650 }, { "epoch": 0.83, "learning_rate": 1.1700404858299597e-05, "loss": 0.2659, "step": 10660 }, { "epoch": 0.83, "learning_rate": 1.169261912176892e-05, "loss": 0.2294, "step": 10670 }, { "epoch": 0.83, "learning_rate": 1.1684833385238244e-05, "loss": 0.1339, "step": 10680 }, { "epoch": 0.83, "learning_rate": 1.167704764870757e-05, "loss": 0.2392, "step": 10690 }, { "epoch": 0.83, "learning_rate": 1.1669261912176893e-05, "loss": 0.1526, "step": 10700 }, { "epoch": 0.83, "learning_rate": 1.1661476175646218e-05, "loss": 0.1836, "step": 10710 }, { "epoch": 0.83, "learning_rate": 1.1653690439115542e-05, "loss": 0.2258, "step": 10720 }, { "epoch": 0.84, "learning_rate": 1.1645904702584865e-05, "loss": 0.1856, "step": 10730 }, { "epoch": 0.84, "learning_rate": 1.163811896605419e-05, "loss": 0.2287, "step": 10740 }, { "epoch": 0.84, "learning_rate": 1.1630333229523514e-05, "loss": 0.2017, "step": 10750 }, { "epoch": 0.84, "learning_rate": 1.1622547492992838e-05, "loss": 0.1682, "step": 10760 }, { "epoch": 0.84, "learning_rate": 1.1614761756462163e-05, "loss": 0.2564, "step": 10770 }, { "epoch": 0.84, "learning_rate": 1.1606976019931487e-05, "loss": 0.1868, "step": 10780 }, { "epoch": 0.84, "learning_rate": 1.159919028340081e-05, "loss": 0.1721, "step": 10790 }, { "epoch": 0.84, "learning_rate": 1.1591404546870136e-05, "loss": 0.2094, "step": 10800 }, { "epoch": 0.84, "learning_rate": 1.158361881033946e-05, "loss": 0.2318, "step": 10810 }, { "epoch": 0.84, "learning_rate": 1.1575833073808785e-05, "loss": 0.2198, "step": 10820 }, { "epoch": 0.84, "learning_rate": 1.1568047337278108e-05, "loss": 0.2162, "step": 10830 }, { "epoch": 0.84, "learning_rate": 1.1560261600747432e-05, "loss": 0.2424, "step": 10840 }, { "epoch": 0.84, "learning_rate": 1.1552475864216757e-05, "loss": 0.2163, "step": 10850 }, { "epoch": 0.85, "learning_rate": 1.154469012768608e-05, "loss": 0.1945, "step": 10860 }, { "epoch": 0.85, "learning_rate": 1.1536904391155404e-05, "loss": 0.1453, "step": 10870 }, { "epoch": 0.85, "learning_rate": 1.152911865462473e-05, "loss": 0.2658, "step": 10880 }, { "epoch": 0.85, "learning_rate": 1.1521332918094053e-05, "loss": 0.1744, "step": 10890 }, { "epoch": 0.85, "learning_rate": 1.1513547181563375e-05, "loss": 0.1922, "step": 10900 }, { "epoch": 0.85, "learning_rate": 1.1505761445032702e-05, "loss": 0.2594, "step": 10910 }, { "epoch": 0.85, "learning_rate": 1.1497975708502026e-05, "loss": 0.2348, "step": 10920 }, { "epoch": 0.85, "learning_rate": 1.1490189971971348e-05, "loss": 0.2265, "step": 10930 }, { "epoch": 0.85, "learning_rate": 1.1482404235440675e-05, "loss": 0.231, "step": 10940 }, { "epoch": 0.85, "learning_rate": 1.1474618498909998e-05, "loss": 0.2046, "step": 10950 }, { "epoch": 0.85, "learning_rate": 1.1466832762379323e-05, "loss": 0.2101, "step": 10960 }, { "epoch": 0.85, "learning_rate": 1.1459047025848647e-05, "loss": 0.224, "step": 10970 }, { "epoch": 0.85, "learning_rate": 1.1451261289317969e-05, "loss": 0.1564, "step": 10980 }, { "epoch": 0.86, "learning_rate": 1.1443475552787296e-05, "loss": 0.2457, "step": 10990 }, { "epoch": 0.86, "learning_rate": 1.143568981625662e-05, "loss": 0.2481, "step": 11000 }, { "epoch": 0.86, "learning_rate": 1.1427904079725942e-05, "loss": 0.2572, "step": 11010 }, { "epoch": 0.86, "learning_rate": 1.1420118343195268e-05, "loss": 0.1696, "step": 11020 }, { "epoch": 0.86, "learning_rate": 1.141233260666459e-05, "loss": 0.247, "step": 11030 }, { "epoch": 0.86, "learning_rate": 1.1404546870133914e-05, "loss": 0.2109, "step": 11040 }, { "epoch": 0.86, "learning_rate": 1.1396761133603241e-05, "loss": 0.1912, "step": 11050 }, { "epoch": 0.86, "learning_rate": 1.1388975397072563e-05, "loss": 0.2224, "step": 11060 }, { "epoch": 0.86, "learning_rate": 1.138118966054189e-05, "loss": 0.1924, "step": 11070 }, { "epoch": 0.86, "learning_rate": 1.1373403924011212e-05, "loss": 0.2314, "step": 11080 }, { "epoch": 0.86, "learning_rate": 1.1365618187480535e-05, "loss": 0.256, "step": 11090 }, { "epoch": 0.86, "learning_rate": 1.1357832450949862e-05, "loss": 0.2141, "step": 11100 }, { "epoch": 0.86, "learning_rate": 1.1350046714419184e-05, "loss": 0.1559, "step": 11110 }, { "epoch": 0.87, "learning_rate": 1.1342260977888508e-05, "loss": 0.1879, "step": 11120 }, { "epoch": 0.87, "learning_rate": 1.1334475241357833e-05, "loss": 0.1849, "step": 11130 }, { "epoch": 0.87, "learning_rate": 1.1326689504827157e-05, "loss": 0.1812, "step": 11140 }, { "epoch": 0.87, "learning_rate": 1.131890376829648e-05, "loss": 0.1766, "step": 11150 }, { "epoch": 0.87, "learning_rate": 1.1311118031765806e-05, "loss": 0.1857, "step": 11160 }, { "epoch": 0.87, "learning_rate": 1.130333229523513e-05, "loss": 0.1891, "step": 11170 }, { "epoch": 0.87, "learning_rate": 1.1295546558704453e-05, "loss": 0.2172, "step": 11180 }, { "epoch": 0.87, "learning_rate": 1.1287760822173778e-05, "loss": 0.2251, "step": 11190 }, { "epoch": 0.87, "learning_rate": 1.1279975085643102e-05, "loss": 0.2082, "step": 11200 }, { "epoch": 0.87, "learning_rate": 1.1272189349112427e-05, "loss": 0.1885, "step": 11210 }, { "epoch": 0.87, "learning_rate": 1.126440361258175e-05, "loss": 0.2178, "step": 11220 }, { "epoch": 0.87, "learning_rate": 1.1256617876051074e-05, "loss": 0.1842, "step": 11230 }, { "epoch": 0.88, "learning_rate": 1.12488321395204e-05, "loss": 0.221, "step": 11240 }, { "epoch": 0.88, "learning_rate": 1.1241046402989723e-05, "loss": 0.2308, "step": 11250 }, { "epoch": 0.88, "learning_rate": 1.1233260666459047e-05, "loss": 0.178, "step": 11260 }, { "epoch": 0.88, "learning_rate": 1.1225474929928372e-05, "loss": 0.266, "step": 11270 }, { "epoch": 0.88, "learning_rate": 1.1217689193397696e-05, "loss": 0.2185, "step": 11280 }, { "epoch": 0.88, "learning_rate": 1.120990345686702e-05, "loss": 0.1754, "step": 11290 }, { "epoch": 0.88, "learning_rate": 1.1202117720336345e-05, "loss": 0.181, "step": 11300 }, { "epoch": 0.88, "learning_rate": 1.1194331983805668e-05, "loss": 0.2556, "step": 11310 }, { "epoch": 0.88, "learning_rate": 1.1186546247274994e-05, "loss": 0.1735, "step": 11320 }, { "epoch": 0.88, "learning_rate": 1.1178760510744317e-05, "loss": 0.1829, "step": 11330 }, { "epoch": 0.88, "learning_rate": 1.117097477421364e-05, "loss": 0.2738, "step": 11340 }, { "epoch": 0.88, "learning_rate": 1.1163189037682966e-05, "loss": 0.2314, "step": 11350 }, { "epoch": 0.88, "learning_rate": 1.115540330115229e-05, "loss": 0.2023, "step": 11360 }, { "epoch": 0.89, "learning_rate": 1.1147617564621613e-05, "loss": 0.2612, "step": 11370 }, { "epoch": 0.89, "learning_rate": 1.1139831828090939e-05, "loss": 0.298, "step": 11380 }, { "epoch": 0.89, "learning_rate": 1.1132046091560262e-05, "loss": 0.2385, "step": 11390 }, { "epoch": 0.89, "learning_rate": 1.1124260355029586e-05, "loss": 0.2713, "step": 11400 }, { "epoch": 0.89, "learning_rate": 1.1116474618498911e-05, "loss": 0.1887, "step": 11410 }, { "epoch": 0.89, "learning_rate": 1.1108688881968235e-05, "loss": 0.2056, "step": 11420 }, { "epoch": 0.89, "learning_rate": 1.1100903145437558e-05, "loss": 0.2475, "step": 11430 }, { "epoch": 0.89, "learning_rate": 1.1093117408906884e-05, "loss": 0.1947, "step": 11440 }, { "epoch": 0.89, "learning_rate": 1.1085331672376207e-05, "loss": 0.2345, "step": 11450 }, { "epoch": 0.89, "learning_rate": 1.1077545935845533e-05, "loss": 0.2265, "step": 11460 }, { "epoch": 0.89, "learning_rate": 1.1069760199314856e-05, "loss": 0.2264, "step": 11470 }, { "epoch": 0.89, "learning_rate": 1.106197446278418e-05, "loss": 0.1302, "step": 11480 }, { "epoch": 0.89, "learning_rate": 1.1054188726253505e-05, "loss": 0.2255, "step": 11490 }, { "epoch": 0.9, "learning_rate": 1.1046402989722829e-05, "loss": 0.1996, "step": 11500 }, { "epoch": 0.9, "learning_rate": 1.1038617253192152e-05, "loss": 0.1856, "step": 11510 }, { "epoch": 0.9, "learning_rate": 1.1030831516661478e-05, "loss": 0.2494, "step": 11520 }, { "epoch": 0.9, "learning_rate": 1.1023045780130801e-05, "loss": 0.1817, "step": 11530 }, { "epoch": 0.9, "learning_rate": 1.1015260043600125e-05, "loss": 0.2428, "step": 11540 }, { "epoch": 0.9, "learning_rate": 1.100747430706945e-05, "loss": 0.1772, "step": 11550 }, { "epoch": 0.9, "learning_rate": 1.0999688570538774e-05, "loss": 0.1547, "step": 11560 }, { "epoch": 0.9, "learning_rate": 1.0991902834008099e-05, "loss": 0.1982, "step": 11570 }, { "epoch": 0.9, "learning_rate": 1.0984117097477423e-05, "loss": 0.1974, "step": 11580 }, { "epoch": 0.9, "learning_rate": 1.0976331360946746e-05, "loss": 0.2009, "step": 11590 }, { "epoch": 0.9, "learning_rate": 1.0968545624416071e-05, "loss": 0.1843, "step": 11600 }, { "epoch": 0.9, "learning_rate": 1.0960759887885395e-05, "loss": 0.2159, "step": 11610 }, { "epoch": 0.9, "learning_rate": 1.0952974151354719e-05, "loss": 0.2529, "step": 11620 }, { "epoch": 0.91, "learning_rate": 1.0945188414824044e-05, "loss": 0.1819, "step": 11630 }, { "epoch": 0.91, "learning_rate": 1.0937402678293368e-05, "loss": 0.1801, "step": 11640 }, { "epoch": 0.91, "learning_rate": 1.0929616941762691e-05, "loss": 0.1975, "step": 11650 }, { "epoch": 0.91, "learning_rate": 1.0921831205232017e-05, "loss": 0.2526, "step": 11660 }, { "epoch": 0.91, "learning_rate": 1.091404546870134e-05, "loss": 0.1886, "step": 11670 }, { "epoch": 0.91, "learning_rate": 1.0906259732170664e-05, "loss": 0.2286, "step": 11680 }, { "epoch": 0.91, "learning_rate": 1.0898473995639989e-05, "loss": 0.1451, "step": 11690 }, { "epoch": 0.91, "learning_rate": 1.0890688259109313e-05, "loss": 0.2952, "step": 11700 }, { "epoch": 0.91, "learning_rate": 1.0882902522578638e-05, "loss": 0.2533, "step": 11710 }, { "epoch": 0.91, "learning_rate": 1.0875116786047962e-05, "loss": 0.2076, "step": 11720 }, { "epoch": 0.91, "learning_rate": 1.0867331049517285e-05, "loss": 0.1807, "step": 11730 }, { "epoch": 0.91, "learning_rate": 1.085954531298661e-05, "loss": 0.2167, "step": 11740 }, { "epoch": 0.91, "learning_rate": 1.0851759576455934e-05, "loss": 0.2054, "step": 11750 }, { "epoch": 0.92, "learning_rate": 1.0843973839925258e-05, "loss": 0.2179, "step": 11760 }, { "epoch": 0.92, "learning_rate": 1.0836188103394583e-05, "loss": 0.2269, "step": 11770 }, { "epoch": 0.92, "learning_rate": 1.0828402366863907e-05, "loss": 0.166, "step": 11780 }, { "epoch": 0.92, "learning_rate": 1.082061663033323e-05, "loss": 0.2171, "step": 11790 }, { "epoch": 0.92, "learning_rate": 1.0812830893802555e-05, "loss": 0.1731, "step": 11800 }, { "epoch": 0.92, "learning_rate": 1.0805045157271879e-05, "loss": 0.2032, "step": 11810 }, { "epoch": 0.92, "learning_rate": 1.0797259420741204e-05, "loss": 0.1816, "step": 11820 }, { "epoch": 0.92, "learning_rate": 1.0789473684210528e-05, "loss": 0.1449, "step": 11830 }, { "epoch": 0.92, "learning_rate": 1.0781687947679852e-05, "loss": 0.275, "step": 11840 }, { "epoch": 0.92, "learning_rate": 1.0773902211149177e-05, "loss": 0.2051, "step": 11850 }, { "epoch": 0.92, "learning_rate": 1.07661164746185e-05, "loss": 0.2313, "step": 11860 }, { "epoch": 0.92, "learning_rate": 1.0758330738087824e-05, "loss": 0.218, "step": 11870 }, { "epoch": 0.92, "learning_rate": 1.075054500155715e-05, "loss": 0.2417, "step": 11880 }, { "epoch": 0.93, "learning_rate": 1.0742759265026473e-05, "loss": 0.2445, "step": 11890 }, { "epoch": 0.93, "learning_rate": 1.0734973528495795e-05, "loss": 0.1577, "step": 11900 }, { "epoch": 0.93, "learning_rate": 1.0727187791965122e-05, "loss": 0.1642, "step": 11910 }, { "epoch": 0.93, "learning_rate": 1.0719402055434445e-05, "loss": 0.1732, "step": 11920 }, { "epoch": 0.93, "learning_rate": 1.0711616318903767e-05, "loss": 0.2135, "step": 11930 }, { "epoch": 0.93, "learning_rate": 1.0703830582373094e-05, "loss": 0.2901, "step": 11940 }, { "epoch": 0.93, "learning_rate": 1.0696044845842416e-05, "loss": 0.205, "step": 11950 }, { "epoch": 0.93, "learning_rate": 1.0688259109311743e-05, "loss": 0.1833, "step": 11960 }, { "epoch": 0.93, "learning_rate": 1.0680473372781067e-05, "loss": 0.1664, "step": 11970 }, { "epoch": 0.93, "learning_rate": 1.0672687636250389e-05, "loss": 0.1935, "step": 11980 }, { "epoch": 0.93, "learning_rate": 1.0664901899719716e-05, "loss": 0.2205, "step": 11990 }, { "epoch": 0.93, "learning_rate": 1.0657116163189038e-05, "loss": 0.1953, "step": 12000 }, { "epoch": 0.94, "learning_rate": 1.0649330426658361e-05, "loss": 0.1638, "step": 12010 }, { "epoch": 0.94, "learning_rate": 1.0641544690127688e-05, "loss": 0.2432, "step": 12020 }, { "epoch": 0.94, "learning_rate": 1.063375895359701e-05, "loss": 0.1405, "step": 12030 }, { "epoch": 0.94, "learning_rate": 1.0625973217066334e-05, "loss": 0.1838, "step": 12040 }, { "epoch": 0.94, "learning_rate": 1.0618187480535659e-05, "loss": 0.2223, "step": 12050 }, { "epoch": 0.94, "learning_rate": 1.0610401744004983e-05, "loss": 0.1746, "step": 12060 }, { "epoch": 0.94, "learning_rate": 1.060261600747431e-05, "loss": 0.2268, "step": 12070 }, { "epoch": 0.94, "learning_rate": 1.0594830270943632e-05, "loss": 0.1739, "step": 12080 }, { "epoch": 0.94, "learning_rate": 1.0587044534412955e-05, "loss": 0.1762, "step": 12090 }, { "epoch": 0.94, "learning_rate": 1.057925879788228e-05, "loss": 0.2067, "step": 12100 }, { "epoch": 0.94, "learning_rate": 1.0571473061351604e-05, "loss": 0.1499, "step": 12110 }, { "epoch": 0.94, "learning_rate": 1.0563687324820928e-05, "loss": 0.1828, "step": 12120 }, { "epoch": 0.94, "learning_rate": 1.0555901588290253e-05, "loss": 0.1995, "step": 12130 }, { "epoch": 0.95, "learning_rate": 1.0548115851759577e-05, "loss": 0.2062, "step": 12140 }, { "epoch": 0.95, "learning_rate": 1.05403301152289e-05, "loss": 0.2472, "step": 12150 }, { "epoch": 0.95, "learning_rate": 1.0532544378698226e-05, "loss": 0.2155, "step": 12160 }, { "epoch": 0.95, "learning_rate": 1.052475864216755e-05, "loss": 0.2805, "step": 12170 }, { "epoch": 0.95, "learning_rate": 1.0516972905636873e-05, "loss": 0.1677, "step": 12180 }, { "epoch": 0.95, "learning_rate": 1.0509187169106198e-05, "loss": 0.1805, "step": 12190 }, { "epoch": 0.95, "learning_rate": 1.0501401432575522e-05, "loss": 0.2232, "step": 12200 }, { "epoch": 0.95, "learning_rate": 1.0493615696044847e-05, "loss": 0.2086, "step": 12210 }, { "epoch": 0.95, "learning_rate": 1.048582995951417e-05, "loss": 0.1905, "step": 12220 }, { "epoch": 0.95, "learning_rate": 1.0478044222983494e-05, "loss": 0.2523, "step": 12230 }, { "epoch": 0.95, "learning_rate": 1.047025848645282e-05, "loss": 0.1813, "step": 12240 }, { "epoch": 0.95, "learning_rate": 1.0462472749922143e-05, "loss": 0.1759, "step": 12250 }, { "epoch": 0.95, "learning_rate": 1.0454687013391467e-05, "loss": 0.1958, "step": 12260 }, { "epoch": 0.96, "learning_rate": 1.0446901276860792e-05, "loss": 0.143, "step": 12270 }, { "epoch": 0.96, "learning_rate": 1.0439115540330116e-05, "loss": 0.2086, "step": 12280 }, { "epoch": 0.96, "learning_rate": 1.043132980379944e-05, "loss": 0.2147, "step": 12290 }, { "epoch": 0.96, "learning_rate": 1.0423544067268765e-05, "loss": 0.2334, "step": 12300 }, { "epoch": 0.96, "learning_rate": 1.0415758330738088e-05, "loss": 0.1718, "step": 12310 }, { "epoch": 0.96, "learning_rate": 1.0407972594207413e-05, "loss": 0.2122, "step": 12320 }, { "epoch": 0.96, "learning_rate": 1.0400186857676737e-05, "loss": 0.2273, "step": 12330 }, { "epoch": 0.96, "learning_rate": 1.039240112114606e-05, "loss": 0.1885, "step": 12340 }, { "epoch": 0.96, "learning_rate": 1.0384615384615386e-05, "loss": 0.217, "step": 12350 }, { "epoch": 0.96, "learning_rate": 1.037682964808471e-05, "loss": 0.2304, "step": 12360 }, { "epoch": 0.96, "learning_rate": 1.0369043911554033e-05, "loss": 0.178, "step": 12370 }, { "epoch": 0.96, "learning_rate": 1.0361258175023358e-05, "loss": 0.1575, "step": 12380 }, { "epoch": 0.96, "learning_rate": 1.0353472438492682e-05, "loss": 0.227, "step": 12390 }, { "epoch": 0.97, "learning_rate": 1.0345686701962006e-05, "loss": 0.2079, "step": 12400 }, { "epoch": 0.97, "learning_rate": 1.0337900965431331e-05, "loss": 0.1606, "step": 12410 }, { "epoch": 0.97, "learning_rate": 1.0330115228900655e-05, "loss": 0.2617, "step": 12420 }, { "epoch": 0.97, "learning_rate": 1.0322329492369978e-05, "loss": 0.1823, "step": 12430 }, { "epoch": 0.97, "learning_rate": 1.0314543755839303e-05, "loss": 0.1926, "step": 12440 }, { "epoch": 0.97, "learning_rate": 1.0306758019308627e-05, "loss": 0.183, "step": 12450 }, { "epoch": 0.97, "learning_rate": 1.0298972282777952e-05, "loss": 0.1285, "step": 12460 }, { "epoch": 0.97, "learning_rate": 1.0291186546247276e-05, "loss": 0.1676, "step": 12470 }, { "epoch": 0.97, "learning_rate": 1.02834008097166e-05, "loss": 0.2193, "step": 12480 }, { "epoch": 0.97, "learning_rate": 1.0275615073185925e-05, "loss": 0.1855, "step": 12490 }, { "epoch": 0.97, "learning_rate": 1.0267829336655248e-05, "loss": 0.1543, "step": 12500 }, { "epoch": 0.97, "learning_rate": 1.0260043600124572e-05, "loss": 0.1837, "step": 12510 }, { "epoch": 0.97, "learning_rate": 1.0252257863593897e-05, "loss": 0.2156, "step": 12520 }, { "epoch": 0.98, "learning_rate": 1.0244472127063221e-05, "loss": 0.2071, "step": 12530 }, { "epoch": 0.98, "learning_rate": 1.0236686390532545e-05, "loss": 0.2326, "step": 12540 }, { "epoch": 0.98, "learning_rate": 1.022890065400187e-05, "loss": 0.1803, "step": 12550 }, { "epoch": 0.98, "learning_rate": 1.0221114917471193e-05, "loss": 0.206, "step": 12560 }, { "epoch": 0.98, "learning_rate": 1.0213329180940519e-05, "loss": 0.2131, "step": 12570 }, { "epoch": 0.98, "learning_rate": 1.0205543444409842e-05, "loss": 0.2522, "step": 12580 }, { "epoch": 0.98, "learning_rate": 1.0197757707879166e-05, "loss": 0.1994, "step": 12590 }, { "epoch": 0.98, "learning_rate": 1.0189971971348491e-05, "loss": 0.1764, "step": 12600 }, { "epoch": 0.98, "learning_rate": 1.0182186234817815e-05, "loss": 0.1961, "step": 12610 }, { "epoch": 0.98, "learning_rate": 1.0174400498287139e-05, "loss": 0.2062, "step": 12620 }, { "epoch": 0.98, "learning_rate": 1.0166614761756464e-05, "loss": 0.1866, "step": 12630 }, { "epoch": 0.98, "learning_rate": 1.0158829025225787e-05, "loss": 0.2196, "step": 12640 }, { "epoch": 0.98, "learning_rate": 1.0151043288695111e-05, "loss": 0.1873, "step": 12650 }, { "epoch": 0.99, "learning_rate": 1.0143257552164436e-05, "loss": 0.1491, "step": 12660 }, { "epoch": 0.99, "learning_rate": 1.013547181563376e-05, "loss": 0.2145, "step": 12670 }, { "epoch": 0.99, "learning_rate": 1.0127686079103084e-05, "loss": 0.2338, "step": 12680 }, { "epoch": 0.99, "learning_rate": 1.0119900342572409e-05, "loss": 0.1597, "step": 12690 }, { "epoch": 0.99, "learning_rate": 1.0112114606041732e-05, "loss": 0.1908, "step": 12700 }, { "epoch": 0.99, "learning_rate": 1.0104328869511058e-05, "loss": 0.1368, "step": 12710 }, { "epoch": 0.99, "learning_rate": 1.0096543132980381e-05, "loss": 0.2238, "step": 12720 }, { "epoch": 0.99, "learning_rate": 1.0088757396449705e-05, "loss": 0.1706, "step": 12730 }, { "epoch": 0.99, "learning_rate": 1.008097165991903e-05, "loss": 0.1616, "step": 12740 }, { "epoch": 0.99, "learning_rate": 1.0073185923388354e-05, "loss": 0.1904, "step": 12750 }, { "epoch": 0.99, "learning_rate": 1.0065400186857677e-05, "loss": 0.2075, "step": 12760 }, { "epoch": 0.99, "learning_rate": 1.0057614450327003e-05, "loss": 0.1753, "step": 12770 }, { "epoch": 1.0, "learning_rate": 1.0049828713796326e-05, "loss": 0.2173, "step": 12780 }, { "epoch": 1.0, "learning_rate": 1.004204297726565e-05, "loss": 0.1809, "step": 12790 }, { "epoch": 1.0, "learning_rate": 1.0034257240734975e-05, "loss": 0.1903, "step": 12800 }, { "epoch": 1.0, "learning_rate": 1.0026471504204299e-05, "loss": 0.1887, "step": 12810 }, { "epoch": 1.0, "learning_rate": 1.0018685767673624e-05, "loss": 0.203, "step": 12820 }, { "epoch": 1.0, "learning_rate": 1.0010900031142948e-05, "loss": 0.2308, "step": 12830 }, { "epoch": 1.0, "learning_rate": 1.0003114294612271e-05, "loss": 0.1726, "step": 12840 }, { "epoch": 1.0, "learning_rate": 9.995328558081595e-06, "loss": 0.1402, "step": 12850 }, { "epoch": 1.0, "learning_rate": 9.98754282155092e-06, "loss": 0.0868, "step": 12860 }, { "epoch": 1.0, "learning_rate": 9.979757085020244e-06, "loss": 0.1362, "step": 12870 }, { "epoch": 1.0, "learning_rate": 9.971971348489567e-06, "loss": 0.0793, "step": 12880 }, { "epoch": 1.0, "learning_rate": 9.964185611958893e-06, "loss": 0.1042, "step": 12890 }, { "epoch": 1.0, "learning_rate": 9.956399875428216e-06, "loss": 0.0904, "step": 12900 }, { "epoch": 1.01, "learning_rate": 9.948614138897542e-06, "loss": 0.1634, "step": 12910 }, { "epoch": 1.01, "learning_rate": 9.940828402366864e-06, "loss": 0.1025, "step": 12920 }, { "epoch": 1.01, "learning_rate": 9.933042665836189e-06, "loss": 0.1222, "step": 12930 }, { "epoch": 1.01, "learning_rate": 9.925256929305514e-06, "loss": 0.1175, "step": 12940 }, { "epoch": 1.01, "learning_rate": 9.917471192774836e-06, "loss": 0.084, "step": 12950 }, { "epoch": 1.01, "learning_rate": 9.909685456244161e-06, "loss": 0.0964, "step": 12960 }, { "epoch": 1.01, "learning_rate": 9.901899719713485e-06, "loss": 0.0858, "step": 12970 }, { "epoch": 1.01, "learning_rate": 9.89411398318281e-06, "loss": 0.117, "step": 12980 }, { "epoch": 1.01, "learning_rate": 9.886328246652134e-06, "loss": 0.089, "step": 12990 }, { "epoch": 1.01, "learning_rate": 9.878542510121458e-06, "loss": 0.0906, "step": 13000 }, { "epoch": 1.01, "learning_rate": 9.870756773590783e-06, "loss": 0.0955, "step": 13010 }, { "epoch": 1.01, "learning_rate": 9.862971037060106e-06, "loss": 0.0976, "step": 13020 }, { "epoch": 1.01, "learning_rate": 9.85518530052943e-06, "loss": 0.1087, "step": 13030 }, { "epoch": 1.02, "learning_rate": 9.847399563998755e-06, "loss": 0.1139, "step": 13040 }, { "epoch": 1.02, "learning_rate": 9.839613827468079e-06, "loss": 0.1174, "step": 13050 }, { "epoch": 1.02, "learning_rate": 9.831828090937403e-06, "loss": 0.1103, "step": 13060 }, { "epoch": 1.02, "learning_rate": 9.824042354406728e-06, "loss": 0.148, "step": 13070 }, { "epoch": 1.02, "learning_rate": 9.816256617876051e-06, "loss": 0.0997, "step": 13080 }, { "epoch": 1.02, "learning_rate": 9.808470881345377e-06, "loss": 0.0874, "step": 13090 }, { "epoch": 1.02, "learning_rate": 9.8006851448147e-06, "loss": 0.1347, "step": 13100 }, { "epoch": 1.02, "learning_rate": 9.792899408284024e-06, "loss": 0.1064, "step": 13110 }, { "epoch": 1.02, "learning_rate": 9.78511367175335e-06, "loss": 0.09, "step": 13120 }, { "epoch": 1.02, "learning_rate": 9.777327935222673e-06, "loss": 0.1006, "step": 13130 }, { "epoch": 1.02, "learning_rate": 9.769542198691996e-06, "loss": 0.0901, "step": 13140 }, { "epoch": 1.02, "learning_rate": 9.761756462161322e-06, "loss": 0.0818, "step": 13150 }, { "epoch": 1.02, "learning_rate": 9.753970725630645e-06, "loss": 0.1102, "step": 13160 }, { "epoch": 1.03, "learning_rate": 9.746184989099969e-06, "loss": 0.1036, "step": 13170 }, { "epoch": 1.03, "learning_rate": 9.738399252569294e-06, "loss": 0.0712, "step": 13180 }, { "epoch": 1.03, "learning_rate": 9.730613516038618e-06, "loss": 0.1214, "step": 13190 }, { "epoch": 1.03, "learning_rate": 9.722827779507941e-06, "loss": 0.0889, "step": 13200 }, { "epoch": 1.03, "learning_rate": 9.715042042977267e-06, "loss": 0.1578, "step": 13210 }, { "epoch": 1.03, "learning_rate": 9.70725630644659e-06, "loss": 0.1033, "step": 13220 }, { "epoch": 1.03, "learning_rate": 9.699470569915916e-06, "loss": 0.0833, "step": 13230 }, { "epoch": 1.03, "learning_rate": 9.69168483338524e-06, "loss": 0.0876, "step": 13240 }, { "epoch": 1.03, "learning_rate": 9.683899096854563e-06, "loss": 0.0916, "step": 13250 }, { "epoch": 1.03, "learning_rate": 9.676113360323888e-06, "loss": 0.0962, "step": 13260 }, { "epoch": 1.03, "learning_rate": 9.668327623793212e-06, "loss": 0.075, "step": 13270 }, { "epoch": 1.03, "learning_rate": 9.660541887262535e-06, "loss": 0.1243, "step": 13280 }, { "epoch": 1.03, "learning_rate": 9.65275615073186e-06, "loss": 0.1184, "step": 13290 }, { "epoch": 1.04, "learning_rate": 9.644970414201184e-06, "loss": 0.0915, "step": 13300 }, { "epoch": 1.04, "learning_rate": 9.637184677670508e-06, "loss": 0.0818, "step": 13310 }, { "epoch": 1.04, "learning_rate": 9.629398941139833e-06, "loss": 0.0907, "step": 13320 }, { "epoch": 1.04, "learning_rate": 9.621613204609157e-06, "loss": 0.1174, "step": 13330 }, { "epoch": 1.04, "learning_rate": 9.613827468078482e-06, "loss": 0.0875, "step": 13340 }, { "epoch": 1.04, "learning_rate": 9.606041731547806e-06, "loss": 0.1012, "step": 13350 }, { "epoch": 1.04, "learning_rate": 9.59825599501713e-06, "loss": 0.1012, "step": 13360 }, { "epoch": 1.04, "learning_rate": 9.590470258486455e-06, "loss": 0.1122, "step": 13370 }, { "epoch": 1.04, "learning_rate": 9.582684521955777e-06, "loss": 0.1086, "step": 13380 }, { "epoch": 1.04, "learning_rate": 9.574898785425102e-06, "loss": 0.1006, "step": 13390 }, { "epoch": 1.04, "learning_rate": 9.567113048894427e-06, "loss": 0.0776, "step": 13400 }, { "epoch": 1.04, "learning_rate": 9.55932731236375e-06, "loss": 0.0899, "step": 13410 }, { "epoch": 1.04, "learning_rate": 9.551541575833074e-06, "loss": 0.1003, "step": 13420 }, { "epoch": 1.05, "learning_rate": 9.543755839302398e-06, "loss": 0.0925, "step": 13430 }, { "epoch": 1.05, "learning_rate": 9.535970102771723e-06, "loss": 0.1014, "step": 13440 }, { "epoch": 1.05, "learning_rate": 9.528184366241047e-06, "loss": 0.098, "step": 13450 }, { "epoch": 1.05, "learning_rate": 9.52039862971037e-06, "loss": 0.1273, "step": 13460 }, { "epoch": 1.05, "learning_rate": 9.512612893179696e-06, "loss": 0.101, "step": 13470 }, { "epoch": 1.05, "learning_rate": 9.50482715664902e-06, "loss": 0.0918, "step": 13480 }, { "epoch": 1.05, "learning_rate": 9.497041420118343e-06, "loss": 0.1491, "step": 13490 }, { "epoch": 1.05, "learning_rate": 9.489255683587668e-06, "loss": 0.096, "step": 13500 }, { "epoch": 1.05, "learning_rate": 9.481469947056992e-06, "loss": 0.1107, "step": 13510 }, { "epoch": 1.05, "learning_rate": 9.473684210526315e-06, "loss": 0.0973, "step": 13520 }, { "epoch": 1.05, "learning_rate": 9.46589847399564e-06, "loss": 0.1056, "step": 13530 }, { "epoch": 1.05, "learning_rate": 9.458112737464964e-06, "loss": 0.1128, "step": 13540 }, { "epoch": 1.05, "learning_rate": 9.45032700093429e-06, "loss": 0.1049, "step": 13550 }, { "epoch": 1.06, "learning_rate": 9.442541264403613e-06, "loss": 0.1248, "step": 13560 }, { "epoch": 1.06, "learning_rate": 9.434755527872937e-06, "loss": 0.1062, "step": 13570 }, { "epoch": 1.06, "learning_rate": 9.426969791342262e-06, "loss": 0.1301, "step": 13580 }, { "epoch": 1.06, "learning_rate": 9.419184054811586e-06, "loss": 0.0943, "step": 13590 }, { "epoch": 1.06, "learning_rate": 9.41139831828091e-06, "loss": 0.099, "step": 13600 }, { "epoch": 1.06, "learning_rate": 9.403612581750235e-06, "loss": 0.0961, "step": 13610 }, { "epoch": 1.06, "learning_rate": 9.395826845219558e-06, "loss": 0.0944, "step": 13620 }, { "epoch": 1.06, "learning_rate": 9.388041108688882e-06, "loss": 0.0837, "step": 13630 }, { "epoch": 1.06, "learning_rate": 9.380255372158207e-06, "loss": 0.117, "step": 13640 }, { "epoch": 1.06, "learning_rate": 9.37246963562753e-06, "loss": 0.1119, "step": 13650 }, { "epoch": 1.06, "learning_rate": 9.364683899096856e-06, "loss": 0.1023, "step": 13660 }, { "epoch": 1.06, "learning_rate": 9.35689816256618e-06, "loss": 0.1201, "step": 13670 }, { "epoch": 1.07, "learning_rate": 9.349112426035503e-06, "loss": 0.0881, "step": 13680 }, { "epoch": 1.07, "learning_rate": 9.341326689504829e-06, "loss": 0.1116, "step": 13690 }, { "epoch": 1.07, "learning_rate": 9.333540952974152e-06, "loss": 0.1148, "step": 13700 }, { "epoch": 1.07, "learning_rate": 9.325755216443476e-06, "loss": 0.1727, "step": 13710 }, { "epoch": 1.07, "learning_rate": 9.317969479912801e-06, "loss": 0.0856, "step": 13720 }, { "epoch": 1.07, "learning_rate": 9.310183743382125e-06, "loss": 0.109, "step": 13730 }, { "epoch": 1.07, "learning_rate": 9.302398006851448e-06, "loss": 0.1113, "step": 13740 }, { "epoch": 1.07, "learning_rate": 9.294612270320774e-06, "loss": 0.1035, "step": 13750 }, { "epoch": 1.07, "learning_rate": 9.286826533790097e-06, "loss": 0.1176, "step": 13760 }, { "epoch": 1.07, "learning_rate": 9.279040797259421e-06, "loss": 0.1036, "step": 13770 }, { "epoch": 1.07, "learning_rate": 9.271255060728746e-06, "loss": 0.0757, "step": 13780 }, { "epoch": 1.07, "learning_rate": 9.26346932419807e-06, "loss": 0.0792, "step": 13790 }, { "epoch": 1.07, "learning_rate": 9.255683587667395e-06, "loss": 0.0927, "step": 13800 }, { "epoch": 1.08, "learning_rate": 9.247897851136719e-06, "loss": 0.0981, "step": 13810 }, { "epoch": 1.08, "learning_rate": 9.240112114606042e-06, "loss": 0.0994, "step": 13820 }, { "epoch": 1.08, "learning_rate": 9.232326378075368e-06, "loss": 0.1099, "step": 13830 }, { "epoch": 1.08, "learning_rate": 9.224540641544691e-06, "loss": 0.0999, "step": 13840 }, { "epoch": 1.08, "learning_rate": 9.216754905014015e-06, "loss": 0.0755, "step": 13850 }, { "epoch": 1.08, "learning_rate": 9.20896916848334e-06, "loss": 0.1006, "step": 13860 }, { "epoch": 1.08, "learning_rate": 9.201183431952664e-06, "loss": 0.0706, "step": 13870 }, { "epoch": 1.08, "learning_rate": 9.193397695421987e-06, "loss": 0.0903, "step": 13880 }, { "epoch": 1.08, "learning_rate": 9.185611958891311e-06, "loss": 0.1197, "step": 13890 }, { "epoch": 1.08, "learning_rate": 9.177826222360636e-06, "loss": 0.1052, "step": 13900 }, { "epoch": 1.08, "learning_rate": 9.170040485829962e-06, "loss": 0.0887, "step": 13910 }, { "epoch": 1.08, "learning_rate": 9.162254749299283e-06, "loss": 0.1081, "step": 13920 }, { "epoch": 1.08, "learning_rate": 9.154469012768609e-06, "loss": 0.1074, "step": 13930 }, { "epoch": 1.09, "learning_rate": 9.146683276237934e-06, "loss": 0.072, "step": 13940 }, { "epoch": 1.09, "learning_rate": 9.138897539707256e-06, "loss": 0.1217, "step": 13950 }, { "epoch": 1.09, "learning_rate": 9.131111803176581e-06, "loss": 0.0872, "step": 13960 }, { "epoch": 1.09, "learning_rate": 9.123326066645905e-06, "loss": 0.0789, "step": 13970 }, { "epoch": 1.09, "learning_rate": 9.11554033011523e-06, "loss": 0.0576, "step": 13980 }, { "epoch": 1.09, "learning_rate": 9.107754593584554e-06, "loss": 0.0967, "step": 13990 }, { "epoch": 1.09, "learning_rate": 9.099968857053877e-06, "loss": 0.0982, "step": 14000 }, { "epoch": 1.09, "learning_rate": 9.092183120523203e-06, "loss": 0.1194, "step": 14010 }, { "epoch": 1.09, "learning_rate": 9.084397383992526e-06, "loss": 0.0902, "step": 14020 }, { "epoch": 1.09, "learning_rate": 9.07661164746185e-06, "loss": 0.0964, "step": 14030 }, { "epoch": 1.09, "learning_rate": 9.068825910931175e-06, "loss": 0.1205, "step": 14040 }, { "epoch": 1.09, "learning_rate": 9.061040174400499e-06, "loss": 0.1087, "step": 14050 }, { "epoch": 1.09, "learning_rate": 9.053254437869822e-06, "loss": 0.1053, "step": 14060 }, { "epoch": 1.1, "learning_rate": 9.045468701339148e-06, "loss": 0.1332, "step": 14070 }, { "epoch": 1.1, "learning_rate": 9.037682964808471e-06, "loss": 0.1147, "step": 14080 }, { "epoch": 1.1, "learning_rate": 9.029897228277797e-06, "loss": 0.1198, "step": 14090 }, { "epoch": 1.1, "learning_rate": 9.02211149174712e-06, "loss": 0.0964, "step": 14100 }, { "epoch": 1.1, "learning_rate": 9.014325755216444e-06, "loss": 0.0887, "step": 14110 }, { "epoch": 1.1, "learning_rate": 9.006540018685769e-06, "loss": 0.0665, "step": 14120 }, { "epoch": 1.1, "learning_rate": 8.998754282155093e-06, "loss": 0.1118, "step": 14130 }, { "epoch": 1.1, "learning_rate": 8.990968545624416e-06, "loss": 0.1073, "step": 14140 }, { "epoch": 1.1, "learning_rate": 8.983182809093742e-06, "loss": 0.1199, "step": 14150 }, { "epoch": 1.1, "learning_rate": 8.975397072563065e-06, "loss": 0.1028, "step": 14160 }, { "epoch": 1.1, "learning_rate": 8.967611336032389e-06, "loss": 0.0827, "step": 14170 }, { "epoch": 1.1, "learning_rate": 8.959825599501714e-06, "loss": 0.0956, "step": 14180 }, { "epoch": 1.1, "learning_rate": 8.952039862971038e-06, "loss": 0.0765, "step": 14190 }, { "epoch": 1.11, "learning_rate": 8.944254126440361e-06, "loss": 0.0938, "step": 14200 }, { "epoch": 1.11, "learning_rate": 8.936468389909687e-06, "loss": 0.0877, "step": 14210 }, { "epoch": 1.11, "learning_rate": 8.92868265337901e-06, "loss": 0.0644, "step": 14220 }, { "epoch": 1.11, "learning_rate": 8.920896916848336e-06, "loss": 0.1061, "step": 14230 }, { "epoch": 1.11, "learning_rate": 8.913111180317659e-06, "loss": 0.0974, "step": 14240 }, { "epoch": 1.11, "learning_rate": 8.905325443786983e-06, "loss": 0.096, "step": 14250 }, { "epoch": 1.11, "learning_rate": 8.897539707256308e-06, "loss": 0.0791, "step": 14260 }, { "epoch": 1.11, "learning_rate": 8.889753970725632e-06, "loss": 0.0857, "step": 14270 }, { "epoch": 1.11, "learning_rate": 8.881968234194955e-06, "loss": 0.1289, "step": 14280 }, { "epoch": 1.11, "learning_rate": 8.87418249766428e-06, "loss": 0.0815, "step": 14290 }, { "epoch": 1.11, "learning_rate": 8.866396761133604e-06, "loss": 0.0872, "step": 14300 }, { "epoch": 1.11, "learning_rate": 8.858611024602928e-06, "loss": 0.1081, "step": 14310 }, { "epoch": 1.11, "learning_rate": 8.850825288072253e-06, "loss": 0.0825, "step": 14320 }, { "epoch": 1.12, "learning_rate": 8.843039551541577e-06, "loss": 0.0949, "step": 14330 }, { "epoch": 1.12, "learning_rate": 8.835253815010902e-06, "loss": 0.1213, "step": 14340 }, { "epoch": 1.12, "learning_rate": 8.827468078480224e-06, "loss": 0.0725, "step": 14350 }, { "epoch": 1.12, "learning_rate": 8.81968234194955e-06, "loss": 0.0984, "step": 14360 }, { "epoch": 1.12, "learning_rate": 8.811896605418874e-06, "loss": 0.1204, "step": 14370 }, { "epoch": 1.12, "learning_rate": 8.804110868888196e-06, "loss": 0.1016, "step": 14380 }, { "epoch": 1.12, "learning_rate": 8.796325132357522e-06, "loss": 0.0758, "step": 14390 }, { "epoch": 1.12, "learning_rate": 8.788539395826847e-06, "loss": 0.0979, "step": 14400 }, { "epoch": 1.12, "learning_rate": 8.78075365929617e-06, "loss": 0.0934, "step": 14410 }, { "epoch": 1.12, "learning_rate": 8.772967922765494e-06, "loss": 0.1046, "step": 14420 }, { "epoch": 1.12, "learning_rate": 8.765182186234818e-06, "loss": 0.0962, "step": 14430 }, { "epoch": 1.12, "learning_rate": 8.757396449704143e-06, "loss": 0.1194, "step": 14440 }, { "epoch": 1.13, "learning_rate": 8.749610713173467e-06, "loss": 0.1216, "step": 14450 }, { "epoch": 1.13, "learning_rate": 8.74182497664279e-06, "loss": 0.1329, "step": 14460 }, { "epoch": 1.13, "learning_rate": 8.734039240112116e-06, "loss": 0.0969, "step": 14470 }, { "epoch": 1.13, "learning_rate": 8.72625350358144e-06, "loss": 0.0835, "step": 14480 }, { "epoch": 1.13, "learning_rate": 8.718467767050763e-06, "loss": 0.1152, "step": 14490 }, { "epoch": 1.13, "learning_rate": 8.710682030520088e-06, "loss": 0.0784, "step": 14500 }, { "epoch": 1.13, "learning_rate": 8.702896293989412e-06, "loss": 0.1241, "step": 14510 }, { "epoch": 1.13, "learning_rate": 8.695110557458735e-06, "loss": 0.1151, "step": 14520 }, { "epoch": 1.13, "learning_rate": 8.68732482092806e-06, "loss": 0.0946, "step": 14530 }, { "epoch": 1.13, "learning_rate": 8.679539084397384e-06, "loss": 0.1031, "step": 14540 }, { "epoch": 1.13, "learning_rate": 8.67175334786671e-06, "loss": 0.1101, "step": 14550 }, { "epoch": 1.13, "learning_rate": 8.663967611336033e-06, "loss": 0.0964, "step": 14560 }, { "epoch": 1.13, "learning_rate": 8.656181874805357e-06, "loss": 0.106, "step": 14570 }, { "epoch": 1.14, "learning_rate": 8.648396138274682e-06, "loss": 0.0775, "step": 14580 }, { "epoch": 1.14, "learning_rate": 8.640610401744006e-06, "loss": 0.0869, "step": 14590 }, { "epoch": 1.14, "learning_rate": 8.63282466521333e-06, "loss": 0.096, "step": 14600 }, { "epoch": 1.14, "learning_rate": 8.625038928682655e-06, "loss": 0.0728, "step": 14610 }, { "epoch": 1.14, "learning_rate": 8.617253192151978e-06, "loss": 0.0959, "step": 14620 }, { "epoch": 1.14, "learning_rate": 8.609467455621302e-06, "loss": 0.0824, "step": 14630 }, { "epoch": 1.14, "learning_rate": 8.601681719090627e-06, "loss": 0.0838, "step": 14640 }, { "epoch": 1.14, "learning_rate": 8.59389598255995e-06, "loss": 0.1209, "step": 14650 }, { "epoch": 1.14, "learning_rate": 8.586110246029276e-06, "loss": 0.1263, "step": 14660 }, { "epoch": 1.14, "learning_rate": 8.5783245094986e-06, "loss": 0.0975, "step": 14670 }, { "epoch": 1.14, "learning_rate": 8.570538772967923e-06, "loss": 0.0879, "step": 14680 }, { "epoch": 1.14, "learning_rate": 8.562753036437248e-06, "loss": 0.0806, "step": 14690 }, { "epoch": 1.14, "learning_rate": 8.554967299906572e-06, "loss": 0.0818, "step": 14700 }, { "epoch": 1.15, "learning_rate": 8.547181563375896e-06, "loss": 0.1075, "step": 14710 }, { "epoch": 1.15, "learning_rate": 8.539395826845221e-06, "loss": 0.1059, "step": 14720 }, { "epoch": 1.15, "learning_rate": 8.531610090314545e-06, "loss": 0.0742, "step": 14730 }, { "epoch": 1.15, "learning_rate": 8.523824353783868e-06, "loss": 0.1018, "step": 14740 }, { "epoch": 1.15, "learning_rate": 8.516038617253193e-06, "loss": 0.1284, "step": 14750 }, { "epoch": 1.15, "learning_rate": 8.508252880722517e-06, "loss": 0.0703, "step": 14760 }, { "epoch": 1.15, "learning_rate": 8.50046714419184e-06, "loss": 0.1002, "step": 14770 }, { "epoch": 1.15, "learning_rate": 8.492681407661166e-06, "loss": 0.0786, "step": 14780 }, { "epoch": 1.15, "learning_rate": 8.48489567113049e-06, "loss": 0.0997, "step": 14790 }, { "epoch": 1.15, "learning_rate": 8.477109934599815e-06, "loss": 0.0951, "step": 14800 }, { "epoch": 1.15, "learning_rate": 8.469324198069137e-06, "loss": 0.0839, "step": 14810 }, { "epoch": 1.15, "learning_rate": 8.461538461538462e-06, "loss": 0.0955, "step": 14820 }, { "epoch": 1.15, "learning_rate": 8.453752725007787e-06, "loss": 0.11, "step": 14830 }, { "epoch": 1.16, "learning_rate": 8.445966988477111e-06, "loss": 0.137, "step": 14840 }, { "epoch": 1.16, "learning_rate": 8.438181251946435e-06, "loss": 0.0971, "step": 14850 }, { "epoch": 1.16, "learning_rate": 8.43039551541576e-06, "loss": 0.0825, "step": 14860 }, { "epoch": 1.16, "learning_rate": 8.422609778885084e-06, "loss": 0.0767, "step": 14870 }, { "epoch": 1.16, "learning_rate": 8.414824042354407e-06, "loss": 0.0661, "step": 14880 }, { "epoch": 1.16, "learning_rate": 8.40703830582373e-06, "loss": 0.1102, "step": 14890 }, { "epoch": 1.16, "learning_rate": 8.399252569293056e-06, "loss": 0.1012, "step": 14900 }, { "epoch": 1.16, "learning_rate": 8.391466832762381e-06, "loss": 0.1119, "step": 14910 }, { "epoch": 1.16, "learning_rate": 8.383681096231703e-06, "loss": 0.1084, "step": 14920 }, { "epoch": 1.16, "learning_rate": 8.375895359701029e-06, "loss": 0.1127, "step": 14930 }, { "epoch": 1.16, "learning_rate": 8.368109623170352e-06, "loss": 0.0896, "step": 14940 }, { "epoch": 1.16, "learning_rate": 8.360323886639676e-06, "loss": 0.132, "step": 14950 }, { "epoch": 1.16, "learning_rate": 8.352538150109001e-06, "loss": 0.1157, "step": 14960 }, { "epoch": 1.17, "learning_rate": 8.344752413578325e-06, "loss": 0.079, "step": 14970 }, { "epoch": 1.17, "learning_rate": 8.33696667704765e-06, "loss": 0.0762, "step": 14980 }, { "epoch": 1.17, "learning_rate": 8.329180940516974e-06, "loss": 0.116, "step": 14990 }, { "epoch": 1.17, "learning_rate": 8.321395203986297e-06, "loss": 0.1088, "step": 15000 }, { "epoch": 1.17, "learning_rate": 8.313609467455622e-06, "loss": 0.081, "step": 15010 }, { "epoch": 1.17, "learning_rate": 8.305823730924946e-06, "loss": 0.1092, "step": 15020 }, { "epoch": 1.17, "learning_rate": 8.29803799439427e-06, "loss": 0.0923, "step": 15030 }, { "epoch": 1.17, "learning_rate": 8.290252257863595e-06, "loss": 0.0755, "step": 15040 }, { "epoch": 1.17, "learning_rate": 8.282466521332919e-06, "loss": 0.0827, "step": 15050 }, { "epoch": 1.17, "learning_rate": 8.274680784802242e-06, "loss": 0.0851, "step": 15060 }, { "epoch": 1.17, "learning_rate": 8.266895048271567e-06, "loss": 0.1001, "step": 15070 }, { "epoch": 1.17, "learning_rate": 8.259109311740891e-06, "loss": 0.0813, "step": 15080 }, { "epoch": 1.17, "learning_rate": 8.251323575210216e-06, "loss": 0.0875, "step": 15090 }, { "epoch": 1.18, "learning_rate": 8.24353783867954e-06, "loss": 0.0938, "step": 15100 }, { "epoch": 1.18, "learning_rate": 8.235752102148864e-06, "loss": 0.0932, "step": 15110 }, { "epoch": 1.18, "learning_rate": 8.227966365618189e-06, "loss": 0.1039, "step": 15120 }, { "epoch": 1.18, "learning_rate": 8.220180629087512e-06, "loss": 0.1193, "step": 15130 }, { "epoch": 1.18, "learning_rate": 8.212394892556836e-06, "loss": 0.0989, "step": 15140 }, { "epoch": 1.18, "learning_rate": 8.204609156026161e-06, "loss": 0.1035, "step": 15150 }, { "epoch": 1.18, "learning_rate": 8.196823419495485e-06, "loss": 0.1034, "step": 15160 }, { "epoch": 1.18, "learning_rate": 8.189037682964809e-06, "loss": 0.0728, "step": 15170 }, { "epoch": 1.18, "learning_rate": 8.181251946434134e-06, "loss": 0.0792, "step": 15180 }, { "epoch": 1.18, "learning_rate": 8.173466209903458e-06, "loss": 0.0954, "step": 15190 }, { "epoch": 1.18, "learning_rate": 8.165680473372781e-06, "loss": 0.0944, "step": 15200 }, { "epoch": 1.18, "learning_rate": 8.157894736842106e-06, "loss": 0.1044, "step": 15210 }, { "epoch": 1.18, "learning_rate": 8.15010900031143e-06, "loss": 0.0921, "step": 15220 }, { "epoch": 1.19, "learning_rate": 8.142323263780755e-06, "loss": 0.107, "step": 15230 }, { "epoch": 1.19, "learning_rate": 8.134537527250079e-06, "loss": 0.0823, "step": 15240 }, { "epoch": 1.19, "learning_rate": 8.126751790719403e-06, "loss": 0.0961, "step": 15250 }, { "epoch": 1.19, "learning_rate": 8.118966054188728e-06, "loss": 0.0944, "step": 15260 }, { "epoch": 1.19, "learning_rate": 8.11118031765805e-06, "loss": 0.1194, "step": 15270 }, { "epoch": 1.19, "learning_rate": 8.103394581127375e-06, "loss": 0.0987, "step": 15280 }, { "epoch": 1.19, "learning_rate": 8.0956088445967e-06, "loss": 0.1077, "step": 15290 }, { "epoch": 1.19, "learning_rate": 8.087823108066024e-06, "loss": 0.0814, "step": 15300 }, { "epoch": 1.19, "learning_rate": 8.080037371535348e-06, "loss": 0.0841, "step": 15310 }, { "epoch": 1.19, "learning_rate": 8.072251635004673e-06, "loss": 0.0788, "step": 15320 }, { "epoch": 1.19, "learning_rate": 8.064465898473996e-06, "loss": 0.099, "step": 15330 }, { "epoch": 1.19, "learning_rate": 8.056680161943322e-06, "loss": 0.0757, "step": 15340 }, { "epoch": 1.2, "learning_rate": 8.048894425412644e-06, "loss": 0.0975, "step": 15350 }, { "epoch": 1.2, "learning_rate": 8.041108688881969e-06, "loss": 0.1129, "step": 15360 }, { "epoch": 1.2, "learning_rate": 8.033322952351294e-06, "loss": 0.0953, "step": 15370 }, { "epoch": 1.2, "learning_rate": 8.025537215820616e-06, "loss": 0.0825, "step": 15380 }, { "epoch": 1.2, "learning_rate": 8.017751479289941e-06, "loss": 0.1078, "step": 15390 }, { "epoch": 1.2, "learning_rate": 8.009965742759265e-06, "loss": 0.1187, "step": 15400 }, { "epoch": 1.2, "learning_rate": 8.00218000622859e-06, "loss": 0.0976, "step": 15410 }, { "epoch": 1.2, "learning_rate": 7.994394269697914e-06, "loss": 0.0899, "step": 15420 }, { "epoch": 1.2, "learning_rate": 7.986608533167238e-06, "loss": 0.0967, "step": 15430 }, { "epoch": 1.2, "learning_rate": 7.978822796636563e-06, "loss": 0.0962, "step": 15440 }, { "epoch": 1.2, "learning_rate": 7.971037060105886e-06, "loss": 0.0989, "step": 15450 }, { "epoch": 1.2, "learning_rate": 7.96325132357521e-06, "loss": 0.1049, "step": 15460 }, { "epoch": 1.2, "learning_rate": 7.955465587044535e-06, "loss": 0.0862, "step": 15470 }, { "epoch": 1.21, "learning_rate": 7.947679850513859e-06, "loss": 0.0736, "step": 15480 }, { "epoch": 1.21, "learning_rate": 7.939894113983183e-06, "loss": 0.0788, "step": 15490 }, { "epoch": 1.21, "learning_rate": 7.932108377452508e-06, "loss": 0.1044, "step": 15500 }, { "epoch": 1.21, "learning_rate": 7.924322640921832e-06, "loss": 0.0755, "step": 15510 }, { "epoch": 1.21, "learning_rate": 7.916536904391155e-06, "loss": 0.102, "step": 15520 }, { "epoch": 1.21, "learning_rate": 7.90875116786048e-06, "loss": 0.0904, "step": 15530 }, { "epoch": 1.21, "learning_rate": 7.900965431329804e-06, "loss": 0.0756, "step": 15540 }, { "epoch": 1.21, "learning_rate": 7.89317969479913e-06, "loss": 0.133, "step": 15550 }, { "epoch": 1.21, "learning_rate": 7.885393958268453e-06, "loss": 0.0754, "step": 15560 }, { "epoch": 1.21, "learning_rate": 7.877608221737777e-06, "loss": 0.0811, "step": 15570 }, { "epoch": 1.21, "learning_rate": 7.869822485207102e-06, "loss": 0.0692, "step": 15580 }, { "epoch": 1.21, "learning_rate": 7.862036748676425e-06, "loss": 0.0918, "step": 15590 }, { "epoch": 1.21, "learning_rate": 7.854251012145749e-06, "loss": 0.0908, "step": 15600 }, { "epoch": 1.22, "learning_rate": 7.846465275615074e-06, "loss": 0.1203, "step": 15610 }, { "epoch": 1.22, "learning_rate": 7.838679539084398e-06, "loss": 0.1036, "step": 15620 }, { "epoch": 1.22, "learning_rate": 7.830893802553722e-06, "loss": 0.089, "step": 15630 }, { "epoch": 1.22, "learning_rate": 7.823108066023047e-06, "loss": 0.0854, "step": 15640 }, { "epoch": 1.22, "learning_rate": 7.81532232949237e-06, "loss": 0.0985, "step": 15650 }, { "epoch": 1.22, "learning_rate": 7.807536592961696e-06, "loss": 0.1006, "step": 15660 }, { "epoch": 1.22, "learning_rate": 7.79975085643102e-06, "loss": 0.0798, "step": 15670 }, { "epoch": 1.22, "learning_rate": 7.791965119900343e-06, "loss": 0.0875, "step": 15680 }, { "epoch": 1.22, "learning_rate": 7.784179383369668e-06, "loss": 0.0901, "step": 15690 }, { "epoch": 1.22, "learning_rate": 7.776393646838992e-06, "loss": 0.0961, "step": 15700 }, { "epoch": 1.22, "learning_rate": 7.768607910308315e-06, "loss": 0.0895, "step": 15710 }, { "epoch": 1.22, "learning_rate": 7.76082217377764e-06, "loss": 0.1057, "step": 15720 }, { "epoch": 1.22, "learning_rate": 7.753036437246964e-06, "loss": 0.088, "step": 15730 }, { "epoch": 1.23, "learning_rate": 7.745250700716288e-06, "loss": 0.0826, "step": 15740 }, { "epoch": 1.23, "learning_rate": 7.737464964185613e-06, "loss": 0.0609, "step": 15750 }, { "epoch": 1.23, "learning_rate": 7.729679227654937e-06, "loss": 0.1069, "step": 15760 }, { "epoch": 1.23, "learning_rate": 7.72189349112426e-06, "loss": 0.0939, "step": 15770 }, { "epoch": 1.23, "learning_rate": 7.714107754593586e-06, "loss": 0.0867, "step": 15780 }, { "epoch": 1.23, "learning_rate": 7.70632201806291e-06, "loss": 0.0757, "step": 15790 }, { "epoch": 1.23, "learning_rate": 7.698536281532235e-06, "loss": 0.1131, "step": 15800 }, { "epoch": 1.23, "learning_rate": 7.690750545001557e-06, "loss": 0.086, "step": 15810 }, { "epoch": 1.23, "learning_rate": 7.682964808470882e-06, "loss": 0.0866, "step": 15820 }, { "epoch": 1.23, "learning_rate": 7.675179071940207e-06, "loss": 0.0794, "step": 15830 }, { "epoch": 1.23, "learning_rate": 7.66739333540953e-06, "loss": 0.064, "step": 15840 }, { "epoch": 1.23, "learning_rate": 7.659607598878854e-06, "loss": 0.1027, "step": 15850 }, { "epoch": 1.23, "learning_rate": 7.651821862348178e-06, "loss": 0.09, "step": 15860 }, { "epoch": 1.24, "learning_rate": 7.644036125817503e-06, "loss": 0.0792, "step": 15870 }, { "epoch": 1.24, "learning_rate": 7.636250389286827e-06, "loss": 0.1208, "step": 15880 }, { "epoch": 1.24, "learning_rate": 7.628464652756151e-06, "loss": 0.0938, "step": 15890 }, { "epoch": 1.24, "learning_rate": 7.620678916225476e-06, "loss": 0.0949, "step": 15900 }, { "epoch": 1.24, "learning_rate": 7.6128931796948e-06, "loss": 0.094, "step": 15910 }, { "epoch": 1.24, "learning_rate": 7.605107443164124e-06, "loss": 0.1024, "step": 15920 }, { "epoch": 1.24, "learning_rate": 7.597321706633448e-06, "loss": 0.1052, "step": 15930 }, { "epoch": 1.24, "learning_rate": 7.589535970102773e-06, "loss": 0.1041, "step": 15940 }, { "epoch": 1.24, "learning_rate": 7.581750233572096e-06, "loss": 0.0907, "step": 15950 }, { "epoch": 1.24, "learning_rate": 7.573964497041421e-06, "loss": 0.1208, "step": 15960 }, { "epoch": 1.24, "learning_rate": 7.566178760510745e-06, "loss": 0.0868, "step": 15970 }, { "epoch": 1.24, "learning_rate": 7.55839302398007e-06, "loss": 0.0865, "step": 15980 }, { "epoch": 1.24, "learning_rate": 7.550607287449393e-06, "loss": 0.0884, "step": 15990 }, { "epoch": 1.25, "learning_rate": 7.542821550918718e-06, "loss": 0.066, "step": 16000 }, { "epoch": 1.25, "learning_rate": 7.535035814388042e-06, "loss": 0.0754, "step": 16010 }, { "epoch": 1.25, "learning_rate": 7.527250077857365e-06, "loss": 0.0815, "step": 16020 }, { "epoch": 1.25, "learning_rate": 7.5194643413266895e-06, "loss": 0.0702, "step": 16030 }, { "epoch": 1.25, "learning_rate": 7.511678604796015e-06, "loss": 0.1028, "step": 16040 }, { "epoch": 1.25, "learning_rate": 7.503892868265339e-06, "loss": 0.0928, "step": 16050 }, { "epoch": 1.25, "learning_rate": 7.496107131734662e-06, "loss": 0.1022, "step": 16060 }, { "epoch": 1.25, "learning_rate": 7.4883213952039864e-06, "loss": 0.0959, "step": 16070 }, { "epoch": 1.25, "learning_rate": 7.480535658673312e-06, "loss": 0.0732, "step": 16080 }, { "epoch": 1.25, "learning_rate": 7.472749922142636e-06, "loss": 0.0803, "step": 16090 }, { "epoch": 1.25, "learning_rate": 7.464964185611959e-06, "loss": 0.0852, "step": 16100 }, { "epoch": 1.25, "learning_rate": 7.457178449081283e-06, "loss": 0.1012, "step": 16110 }, { "epoch": 1.26, "learning_rate": 7.449392712550608e-06, "loss": 0.0867, "step": 16120 }, { "epoch": 1.26, "learning_rate": 7.4416069760199315e-06, "loss": 0.075, "step": 16130 }, { "epoch": 1.26, "learning_rate": 7.433821239489256e-06, "loss": 0.0927, "step": 16140 }, { "epoch": 1.26, "learning_rate": 7.42603550295858e-06, "loss": 0.111, "step": 16150 }, { "epoch": 1.26, "learning_rate": 7.418249766427905e-06, "loss": 0.0784, "step": 16160 }, { "epoch": 1.26, "learning_rate": 7.4104640298972284e-06, "loss": 0.0985, "step": 16170 }, { "epoch": 1.26, "learning_rate": 7.402678293366553e-06, "loss": 0.0681, "step": 16180 }, { "epoch": 1.26, "learning_rate": 7.394892556835877e-06, "loss": 0.1078, "step": 16190 }, { "epoch": 1.26, "learning_rate": 7.387106820305201e-06, "loss": 0.0726, "step": 16200 }, { "epoch": 1.26, "learning_rate": 7.379321083774525e-06, "loss": 0.1075, "step": 16210 }, { "epoch": 1.26, "learning_rate": 7.37153534724385e-06, "loss": 0.0938, "step": 16220 }, { "epoch": 1.26, "learning_rate": 7.363749610713174e-06, "loss": 0.1013, "step": 16230 }, { "epoch": 1.26, "learning_rate": 7.355963874182498e-06, "loss": 0.1037, "step": 16240 }, { "epoch": 1.27, "learning_rate": 7.348178137651822e-06, "loss": 0.0574, "step": 16250 }, { "epoch": 1.27, "learning_rate": 7.340392401121147e-06, "loss": 0.0783, "step": 16260 }, { "epoch": 1.27, "learning_rate": 7.33260666459047e-06, "loss": 0.0768, "step": 16270 }, { "epoch": 1.27, "learning_rate": 7.324820928059795e-06, "loss": 0.0784, "step": 16280 }, { "epoch": 1.27, "learning_rate": 7.317035191529119e-06, "loss": 0.1078, "step": 16290 }, { "epoch": 1.27, "learning_rate": 7.309249454998444e-06, "loss": 0.0976, "step": 16300 }, { "epoch": 1.27, "learning_rate": 7.301463718467767e-06, "loss": 0.0803, "step": 16310 }, { "epoch": 1.27, "learning_rate": 7.293677981937092e-06, "loss": 0.0937, "step": 16320 }, { "epoch": 1.27, "learning_rate": 7.285892245406416e-06, "loss": 0.1217, "step": 16330 }, { "epoch": 1.27, "learning_rate": 7.278106508875741e-06, "loss": 0.0702, "step": 16340 }, { "epoch": 1.27, "learning_rate": 7.270320772345064e-06, "loss": 0.0911, "step": 16350 }, { "epoch": 1.27, "learning_rate": 7.262535035814389e-06, "loss": 0.097, "step": 16360 }, { "epoch": 1.27, "learning_rate": 7.254749299283713e-06, "loss": 0.0741, "step": 16370 }, { "epoch": 1.28, "learning_rate": 7.246963562753037e-06, "loss": 0.1093, "step": 16380 }, { "epoch": 1.28, "learning_rate": 7.239177826222361e-06, "loss": 0.0761, "step": 16390 }, { "epoch": 1.28, "learning_rate": 7.231392089691686e-06, "loss": 0.0818, "step": 16400 }, { "epoch": 1.28, "learning_rate": 7.22360635316101e-06, "loss": 0.102, "step": 16410 }, { "epoch": 1.28, "learning_rate": 7.215820616630334e-06, "loss": 0.0706, "step": 16420 }, { "epoch": 1.28, "learning_rate": 7.208034880099658e-06, "loss": 0.0957, "step": 16430 }, { "epoch": 1.28, "learning_rate": 7.200249143568983e-06, "loss": 0.1091, "step": 16440 }, { "epoch": 1.28, "learning_rate": 7.192463407038306e-06, "loss": 0.0743, "step": 16450 }, { "epoch": 1.28, "learning_rate": 7.184677670507631e-06, "loss": 0.0681, "step": 16460 }, { "epoch": 1.28, "learning_rate": 7.176891933976955e-06, "loss": 0.0797, "step": 16470 }, { "epoch": 1.28, "learning_rate": 7.16910619744628e-06, "loss": 0.0789, "step": 16480 }, { "epoch": 1.28, "learning_rate": 7.1613204609156024e-06, "loss": 0.0784, "step": 16490 }, { "epoch": 1.28, "learning_rate": 7.153534724384928e-06, "loss": 0.1042, "step": 16500 }, { "epoch": 1.29, "learning_rate": 7.145748987854252e-06, "loss": 0.0847, "step": 16510 }, { "epoch": 1.29, "learning_rate": 7.137963251323575e-06, "loss": 0.062, "step": 16520 }, { "epoch": 1.29, "learning_rate": 7.130177514792899e-06, "loss": 0.1022, "step": 16530 }, { "epoch": 1.29, "learning_rate": 7.122391778262225e-06, "loss": 0.0677, "step": 16540 }, { "epoch": 1.29, "learning_rate": 7.114606041731549e-06, "loss": 0.0943, "step": 16550 }, { "epoch": 1.29, "learning_rate": 7.106820305200872e-06, "loss": 0.0742, "step": 16560 }, { "epoch": 1.29, "learning_rate": 7.099034568670196e-06, "loss": 0.0905, "step": 16570 }, { "epoch": 1.29, "learning_rate": 7.091248832139521e-06, "loss": 0.1186, "step": 16580 }, { "epoch": 1.29, "learning_rate": 7.083463095608846e-06, "loss": 0.0707, "step": 16590 }, { "epoch": 1.29, "learning_rate": 7.075677359078169e-06, "loss": 0.1026, "step": 16600 }, { "epoch": 1.29, "learning_rate": 7.067891622547493e-06, "loss": 0.0956, "step": 16610 }, { "epoch": 1.29, "learning_rate": 7.060105886016818e-06, "loss": 0.0849, "step": 16620 }, { "epoch": 1.29, "learning_rate": 7.052320149486141e-06, "loss": 0.1157, "step": 16630 }, { "epoch": 1.3, "learning_rate": 7.044534412955466e-06, "loss": 0.1055, "step": 16640 }, { "epoch": 1.3, "learning_rate": 7.03674867642479e-06, "loss": 0.0805, "step": 16650 }, { "epoch": 1.3, "learning_rate": 7.028962939894115e-06, "loss": 0.0785, "step": 16660 }, { "epoch": 1.3, "learning_rate": 7.021177203363438e-06, "loss": 0.0964, "step": 16670 }, { "epoch": 1.3, "learning_rate": 7.013391466832763e-06, "loss": 0.0818, "step": 16680 }, { "epoch": 1.3, "learning_rate": 7.005605730302087e-06, "loss": 0.0897, "step": 16690 }, { "epoch": 1.3, "learning_rate": 6.997819993771411e-06, "loss": 0.0734, "step": 16700 }, { "epoch": 1.3, "learning_rate": 6.990034257240735e-06, "loss": 0.0798, "step": 16710 }, { "epoch": 1.3, "learning_rate": 6.98224852071006e-06, "loss": 0.1029, "step": 16720 }, { "epoch": 1.3, "learning_rate": 6.974462784179384e-06, "loss": 0.0951, "step": 16730 }, { "epoch": 1.3, "learning_rate": 6.966677047648708e-06, "loss": 0.0863, "step": 16740 }, { "epoch": 1.3, "learning_rate": 6.958891311118032e-06, "loss": 0.1014, "step": 16750 }, { "epoch": 1.3, "learning_rate": 6.951105574587357e-06, "loss": 0.0684, "step": 16760 }, { "epoch": 1.31, "learning_rate": 6.94331983805668e-06, "loss": 0.0848, "step": 16770 }, { "epoch": 1.31, "learning_rate": 6.935534101526005e-06, "loss": 0.1228, "step": 16780 }, { "epoch": 1.31, "learning_rate": 6.927748364995329e-06, "loss": 0.0871, "step": 16790 }, { "epoch": 1.31, "learning_rate": 6.919962628464654e-06, "loss": 0.0718, "step": 16800 }, { "epoch": 1.31, "learning_rate": 6.912176891933977e-06, "loss": 0.0939, "step": 16810 }, { "epoch": 1.31, "learning_rate": 6.904391155403302e-06, "loss": 0.0972, "step": 16820 }, { "epoch": 1.31, "learning_rate": 6.896605418872626e-06, "loss": 0.0572, "step": 16830 }, { "epoch": 1.31, "learning_rate": 6.888819682341951e-06, "loss": 0.0753, "step": 16840 }, { "epoch": 1.31, "learning_rate": 6.881033945811274e-06, "loss": 0.0941, "step": 16850 }, { "epoch": 1.31, "learning_rate": 6.873248209280599e-06, "loss": 0.1028, "step": 16860 }, { "epoch": 1.31, "learning_rate": 6.865462472749923e-06, "loss": 0.0869, "step": 16870 }, { "epoch": 1.31, "learning_rate": 6.857676736219247e-06, "loss": 0.0681, "step": 16880 }, { "epoch": 1.32, "learning_rate": 6.849890999688571e-06, "loss": 0.0623, "step": 16890 }, { "epoch": 1.32, "learning_rate": 6.842105263157896e-06, "loss": 0.0851, "step": 16900 }, { "epoch": 1.32, "learning_rate": 6.83431952662722e-06, "loss": 0.0821, "step": 16910 }, { "epoch": 1.32, "learning_rate": 6.826533790096544e-06, "loss": 0.0962, "step": 16920 }, { "epoch": 1.32, "learning_rate": 6.818748053565868e-06, "loss": 0.0576, "step": 16930 }, { "epoch": 1.32, "learning_rate": 6.810962317035193e-06, "loss": 0.0677, "step": 16940 }, { "epoch": 1.32, "learning_rate": 6.803176580504515e-06, "loss": 0.0845, "step": 16950 }, { "epoch": 1.32, "learning_rate": 6.795390843973841e-06, "loss": 0.0849, "step": 16960 }, { "epoch": 1.32, "learning_rate": 6.787605107443165e-06, "loss": 0.0757, "step": 16970 }, { "epoch": 1.32, "learning_rate": 6.77981937091249e-06, "loss": 0.0836, "step": 16980 }, { "epoch": 1.32, "learning_rate": 6.772033634381812e-06, "loss": 0.0912, "step": 16990 }, { "epoch": 1.32, "learning_rate": 6.764247897851138e-06, "loss": 0.1112, "step": 17000 }, { "epoch": 1.32, "learning_rate": 6.756462161320462e-06, "loss": 0.0829, "step": 17010 }, { "epoch": 1.33, "learning_rate": 6.748676424789785e-06, "loss": 0.0699, "step": 17020 }, { "epoch": 1.33, "learning_rate": 6.740890688259109e-06, "loss": 0.0592, "step": 17030 }, { "epoch": 1.33, "learning_rate": 6.733104951728434e-06, "loss": 0.091, "step": 17040 }, { "epoch": 1.33, "learning_rate": 6.725319215197759e-06, "loss": 0.0726, "step": 17050 }, { "epoch": 1.33, "learning_rate": 6.717533478667082e-06, "loss": 0.0809, "step": 17060 }, { "epoch": 1.33, "learning_rate": 6.709747742136406e-06, "loss": 0.1029, "step": 17070 }, { "epoch": 1.33, "learning_rate": 6.701962005605731e-06, "loss": 0.0697, "step": 17080 }, { "epoch": 1.33, "learning_rate": 6.694176269075055e-06, "loss": 0.1216, "step": 17090 }, { "epoch": 1.33, "learning_rate": 6.686390532544379e-06, "loss": 0.0974, "step": 17100 }, { "epoch": 1.33, "learning_rate": 6.678604796013703e-06, "loss": 0.063, "step": 17110 }, { "epoch": 1.33, "learning_rate": 6.670819059483028e-06, "loss": 0.0926, "step": 17120 }, { "epoch": 1.33, "learning_rate": 6.663033322952351e-06, "loss": 0.0712, "step": 17130 }, { "epoch": 1.33, "learning_rate": 6.655247586421676e-06, "loss": 0.076, "step": 17140 }, { "epoch": 1.34, "learning_rate": 6.647461849891e-06, "loss": 0.0821, "step": 17150 }, { "epoch": 1.34, "learning_rate": 6.639676113360325e-06, "loss": 0.0927, "step": 17160 }, { "epoch": 1.34, "learning_rate": 6.631890376829648e-06, "loss": 0.0978, "step": 17170 }, { "epoch": 1.34, "learning_rate": 6.624104640298973e-06, "loss": 0.0637, "step": 17180 }, { "epoch": 1.34, "learning_rate": 6.616318903768297e-06, "loss": 0.1203, "step": 17190 }, { "epoch": 1.34, "learning_rate": 6.608533167237621e-06, "loss": 0.0763, "step": 17200 }, { "epoch": 1.34, "learning_rate": 6.600747430706945e-06, "loss": 0.0763, "step": 17210 }, { "epoch": 1.34, "learning_rate": 6.59296169417627e-06, "loss": 0.0827, "step": 17220 }, { "epoch": 1.34, "learning_rate": 6.585175957645594e-06, "loss": 0.1012, "step": 17230 }, { "epoch": 1.34, "learning_rate": 6.577390221114918e-06, "loss": 0.0608, "step": 17240 }, { "epoch": 1.34, "learning_rate": 6.569604484584242e-06, "loss": 0.0909, "step": 17250 }, { "epoch": 1.34, "learning_rate": 6.561818748053567e-06, "loss": 0.0718, "step": 17260 }, { "epoch": 1.34, "learning_rate": 6.55403301152289e-06, "loss": 0.0828, "step": 17270 }, { "epoch": 1.35, "learning_rate": 6.546247274992215e-06, "loss": 0.0703, "step": 17280 }, { "epoch": 1.35, "learning_rate": 6.538461538461539e-06, "loss": 0.112, "step": 17290 }, { "epoch": 1.35, "learning_rate": 6.530675801930864e-06, "loss": 0.0572, "step": 17300 }, { "epoch": 1.35, "learning_rate": 6.522890065400187e-06, "loss": 0.0934, "step": 17310 }, { "epoch": 1.35, "learning_rate": 6.515104328869512e-06, "loss": 0.0591, "step": 17320 }, { "epoch": 1.35, "learning_rate": 6.507318592338836e-06, "loss": 0.0934, "step": 17330 }, { "epoch": 1.35, "learning_rate": 6.4995328558081605e-06, "loss": 0.092, "step": 17340 }, { "epoch": 1.35, "learning_rate": 6.491747119277484e-06, "loss": 0.0971, "step": 17350 }, { "epoch": 1.35, "learning_rate": 6.483961382746809e-06, "loss": 0.0846, "step": 17360 }, { "epoch": 1.35, "learning_rate": 6.476175646216133e-06, "loss": 0.0944, "step": 17370 }, { "epoch": 1.35, "learning_rate": 6.468389909685457e-06, "loss": 0.0879, "step": 17380 }, { "epoch": 1.35, "learning_rate": 6.460604173154781e-06, "loss": 0.0787, "step": 17390 }, { "epoch": 1.35, "learning_rate": 6.4528184366241056e-06, "loss": 0.0859, "step": 17400 }, { "epoch": 1.36, "learning_rate": 6.44503270009343e-06, "loss": 0.1081, "step": 17410 }, { "epoch": 1.36, "learning_rate": 6.437246963562754e-06, "loss": 0.0907, "step": 17420 }, { "epoch": 1.36, "learning_rate": 6.429461227032078e-06, "loss": 0.0704, "step": 17430 }, { "epoch": 1.36, "learning_rate": 6.4216754905014025e-06, "loss": 0.0748, "step": 17440 }, { "epoch": 1.36, "learning_rate": 6.413889753970725e-06, "loss": 0.0854, "step": 17450 }, { "epoch": 1.36, "learning_rate": 6.406104017440051e-06, "loss": 0.1008, "step": 17460 }, { "epoch": 1.36, "learning_rate": 6.398318280909375e-06, "loss": 0.0992, "step": 17470 }, { "epoch": 1.36, "learning_rate": 6.3905325443786995e-06, "loss": 0.1136, "step": 17480 }, { "epoch": 1.36, "learning_rate": 6.382746807848022e-06, "loss": 0.0753, "step": 17490 }, { "epoch": 1.36, "learning_rate": 6.374961071317347e-06, "loss": 0.0752, "step": 17500 }, { "epoch": 1.36, "learning_rate": 6.367175334786672e-06, "loss": 0.1051, "step": 17510 }, { "epoch": 1.36, "learning_rate": 6.359389598255995e-06, "loss": 0.0922, "step": 17520 }, { "epoch": 1.36, "learning_rate": 6.351603861725319e-06, "loss": 0.1027, "step": 17530 }, { "epoch": 1.37, "learning_rate": 6.343818125194644e-06, "loss": 0.1113, "step": 17540 }, { "epoch": 1.37, "learning_rate": 6.336032388663968e-06, "loss": 0.1094, "step": 17550 }, { "epoch": 1.37, "learning_rate": 6.328246652133292e-06, "loss": 0.0837, "step": 17560 }, { "epoch": 1.37, "learning_rate": 6.320460915602616e-06, "loss": 0.0749, "step": 17570 }, { "epoch": 1.37, "learning_rate": 6.312675179071941e-06, "loss": 0.1025, "step": 17580 }, { "epoch": 1.37, "learning_rate": 6.304889442541265e-06, "loss": 0.0718, "step": 17590 }, { "epoch": 1.37, "learning_rate": 6.297103706010589e-06, "loss": 0.0668, "step": 17600 }, { "epoch": 1.37, "learning_rate": 6.289317969479913e-06, "loss": 0.0659, "step": 17610 }, { "epoch": 1.37, "learning_rate": 6.281532232949238e-06, "loss": 0.096, "step": 17620 }, { "epoch": 1.37, "learning_rate": 6.273746496418561e-06, "loss": 0.0612, "step": 17630 }, { "epoch": 1.37, "learning_rate": 6.265960759887886e-06, "loss": 0.0721, "step": 17640 }, { "epoch": 1.37, "learning_rate": 6.25817502335721e-06, "loss": 0.0804, "step": 17650 }, { "epoch": 1.37, "learning_rate": 6.2503892868265345e-06, "loss": 0.1048, "step": 17660 }, { "epoch": 1.38, "learning_rate": 6.242603550295858e-06, "loss": 0.0688, "step": 17670 }, { "epoch": 1.38, "learning_rate": 6.234817813765183e-06, "loss": 0.072, "step": 17680 }, { "epoch": 1.38, "learning_rate": 6.227032077234507e-06, "loss": 0.0769, "step": 17690 }, { "epoch": 1.38, "learning_rate": 6.219246340703831e-06, "loss": 0.0929, "step": 17700 }, { "epoch": 1.38, "learning_rate": 6.211460604173155e-06, "loss": 0.0908, "step": 17710 }, { "epoch": 1.38, "learning_rate": 6.2036748676424796e-06, "loss": 0.0712, "step": 17720 }, { "epoch": 1.38, "learning_rate": 6.195889131111804e-06, "loss": 0.0786, "step": 17730 }, { "epoch": 1.38, "learning_rate": 6.188103394581128e-06, "loss": 0.1326, "step": 17740 }, { "epoch": 1.38, "learning_rate": 6.180317658050452e-06, "loss": 0.0685, "step": 17750 }, { "epoch": 1.38, "learning_rate": 6.1725319215197765e-06, "loss": 0.0841, "step": 17760 }, { "epoch": 1.38, "learning_rate": 6.1647461849891e-06, "loss": 0.096, "step": 17770 }, { "epoch": 1.38, "learning_rate": 6.156960448458425e-06, "loss": 0.0683, "step": 17780 }, { "epoch": 1.39, "learning_rate": 6.149174711927749e-06, "loss": 0.0958, "step": 17790 }, { "epoch": 1.39, "learning_rate": 6.1413889753970735e-06, "loss": 0.0853, "step": 17800 }, { "epoch": 1.39, "learning_rate": 6.133603238866397e-06, "loss": 0.0882, "step": 17810 }, { "epoch": 1.39, "learning_rate": 6.1258175023357215e-06, "loss": 0.0825, "step": 17820 }, { "epoch": 1.39, "learning_rate": 6.118031765805046e-06, "loss": 0.0723, "step": 17830 }, { "epoch": 1.39, "learning_rate": 6.1102460292743704e-06, "loss": 0.1124, "step": 17840 }, { "epoch": 1.39, "learning_rate": 6.102460292743694e-06, "loss": 0.0929, "step": 17850 }, { "epoch": 1.39, "learning_rate": 6.0946745562130185e-06, "loss": 0.0874, "step": 17860 }, { "epoch": 1.39, "learning_rate": 6.086888819682343e-06, "loss": 0.073, "step": 17870 }, { "epoch": 1.39, "learning_rate": 6.0791030831516666e-06, "loss": 0.079, "step": 17880 }, { "epoch": 1.39, "learning_rate": 6.071317346620991e-06, "loss": 0.0863, "step": 17890 }, { "epoch": 1.39, "learning_rate": 6.0635316100903155e-06, "loss": 0.1066, "step": 17900 }, { "epoch": 1.39, "learning_rate": 6.05574587355964e-06, "loss": 0.1092, "step": 17910 }, { "epoch": 1.4, "learning_rate": 6.0479601370289635e-06, "loss": 0.0614, "step": 17920 }, { "epoch": 1.4, "learning_rate": 6.040174400498288e-06, "loss": 0.0825, "step": 17930 }, { "epoch": 1.4, "learning_rate": 6.0323886639676124e-06, "loss": 0.0989, "step": 17940 }, { "epoch": 1.4, "learning_rate": 6.024602927436935e-06, "loss": 0.0559, "step": 17950 }, { "epoch": 1.4, "learning_rate": 6.01681719090626e-06, "loss": 0.0648, "step": 17960 }, { "epoch": 1.4, "learning_rate": 6.009031454375585e-06, "loss": 0.0726, "step": 17970 }, { "epoch": 1.4, "learning_rate": 6.001245717844909e-06, "loss": 0.0872, "step": 17980 }, { "epoch": 1.4, "learning_rate": 5.993459981314232e-06, "loss": 0.0937, "step": 17990 }, { "epoch": 1.4, "learning_rate": 5.985674244783557e-06, "loss": 0.0985, "step": 18000 }, { "epoch": 1.4, "learning_rate": 5.977888508252881e-06, "loss": 0.0808, "step": 18010 }, { "epoch": 1.4, "learning_rate": 5.970102771722205e-06, "loss": 0.0798, "step": 18020 }, { "epoch": 1.4, "learning_rate": 5.962317035191529e-06, "loss": 0.0668, "step": 18030 }, { "epoch": 1.4, "learning_rate": 5.9545312986608536e-06, "loss": 0.075, "step": 18040 }, { "epoch": 1.41, "learning_rate": 5.946745562130178e-06, "loss": 0.0632, "step": 18050 }, { "epoch": 1.41, "learning_rate": 5.938959825599502e-06, "loss": 0.0637, "step": 18060 }, { "epoch": 1.41, "learning_rate": 5.931174089068826e-06, "loss": 0.0821, "step": 18070 }, { "epoch": 1.41, "learning_rate": 5.9233883525381505e-06, "loss": 0.0734, "step": 18080 }, { "epoch": 1.41, "learning_rate": 5.915602616007475e-06, "loss": 0.0938, "step": 18090 }, { "epoch": 1.41, "learning_rate": 5.907816879476799e-06, "loss": 0.0668, "step": 18100 }, { "epoch": 1.41, "learning_rate": 5.900031142946123e-06, "loss": 0.0949, "step": 18110 }, { "epoch": 1.41, "learning_rate": 5.8922454064154475e-06, "loss": 0.0734, "step": 18120 }, { "epoch": 1.41, "learning_rate": 5.884459669884771e-06, "loss": 0.1135, "step": 18130 }, { "epoch": 1.41, "learning_rate": 5.8766739333540955e-06, "loss": 0.0805, "step": 18140 }, { "epoch": 1.41, "learning_rate": 5.86888819682342e-06, "loss": 0.0736, "step": 18150 }, { "epoch": 1.41, "learning_rate": 5.8611024602927444e-06, "loss": 0.0609, "step": 18160 }, { "epoch": 1.41, "learning_rate": 5.853316723762068e-06, "loss": 0.0842, "step": 18170 }, { "epoch": 1.42, "learning_rate": 5.8455309872313925e-06, "loss": 0.0629, "step": 18180 }, { "epoch": 1.42, "learning_rate": 5.837745250700717e-06, "loss": 0.0642, "step": 18190 }, { "epoch": 1.42, "learning_rate": 5.8299595141700406e-06, "loss": 0.077, "step": 18200 }, { "epoch": 1.42, "learning_rate": 5.822173777639365e-06, "loss": 0.069, "step": 18210 }, { "epoch": 1.42, "learning_rate": 5.8143880411086895e-06, "loss": 0.0733, "step": 18220 }, { "epoch": 1.42, "learning_rate": 5.806602304578014e-06, "loss": 0.0902, "step": 18230 }, { "epoch": 1.42, "learning_rate": 5.7988165680473375e-06, "loss": 0.0662, "step": 18240 }, { "epoch": 1.42, "learning_rate": 5.791030831516662e-06, "loss": 0.0762, "step": 18250 }, { "epoch": 1.42, "learning_rate": 5.7832450949859864e-06, "loss": 0.0728, "step": 18260 }, { "epoch": 1.42, "learning_rate": 5.77545935845531e-06, "loss": 0.0933, "step": 18270 }, { "epoch": 1.42, "learning_rate": 5.7676736219246345e-06, "loss": 0.0765, "step": 18280 }, { "epoch": 1.42, "learning_rate": 5.759887885393959e-06, "loss": 0.0787, "step": 18290 }, { "epoch": 1.42, "learning_rate": 5.752102148863283e-06, "loss": 0.0859, "step": 18300 }, { "epoch": 1.43, "learning_rate": 5.744316412332607e-06, "loss": 0.0879, "step": 18310 }, { "epoch": 1.43, "learning_rate": 5.7365306758019315e-06, "loss": 0.0838, "step": 18320 }, { "epoch": 1.43, "learning_rate": 5.728744939271256e-06, "loss": 0.0876, "step": 18330 }, { "epoch": 1.43, "learning_rate": 5.72095920274058e-06, "loss": 0.0734, "step": 18340 }, { "epoch": 1.43, "learning_rate": 5.713173466209904e-06, "loss": 0.0803, "step": 18350 }, { "epoch": 1.43, "learning_rate": 5.705387729679228e-06, "loss": 0.076, "step": 18360 }, { "epoch": 1.43, "learning_rate": 5.697601993148553e-06, "loss": 0.0562, "step": 18370 }, { "epoch": 1.43, "learning_rate": 5.6898162566178765e-06, "loss": 0.0943, "step": 18380 }, { "epoch": 1.43, "learning_rate": 5.682030520087201e-06, "loss": 0.0991, "step": 18390 }, { "epoch": 1.43, "learning_rate": 5.674244783556525e-06, "loss": 0.0848, "step": 18400 }, { "epoch": 1.43, "learning_rate": 5.66645904702585e-06, "loss": 0.0821, "step": 18410 }, { "epoch": 1.43, "learning_rate": 5.658673310495173e-06, "loss": 0.0894, "step": 18420 }, { "epoch": 1.43, "learning_rate": 5.650887573964498e-06, "loss": 0.0679, "step": 18430 }, { "epoch": 1.44, "learning_rate": 5.643101837433822e-06, "loss": 0.1185, "step": 18440 }, { "epoch": 1.44, "learning_rate": 5.635316100903145e-06, "loss": 0.0814, "step": 18450 }, { "epoch": 1.44, "learning_rate": 5.6275303643724695e-06, "loss": 0.1106, "step": 18460 }, { "epoch": 1.44, "learning_rate": 5.619744627841794e-06, "loss": 0.1136, "step": 18470 }, { "epoch": 1.44, "learning_rate": 5.611958891311119e-06, "loss": 0.0627, "step": 18480 }, { "epoch": 1.44, "learning_rate": 5.604173154780442e-06, "loss": 0.0728, "step": 18490 }, { "epoch": 1.44, "learning_rate": 5.5963874182497665e-06, "loss": 0.0866, "step": 18500 }, { "epoch": 1.44, "learning_rate": 5.588601681719091e-06, "loss": 0.0613, "step": 18510 }, { "epoch": 1.44, "learning_rate": 5.5808159451884146e-06, "loss": 0.0974, "step": 18520 }, { "epoch": 1.44, "learning_rate": 5.573030208657739e-06, "loss": 0.0717, "step": 18530 }, { "epoch": 1.44, "learning_rate": 5.5652444721270635e-06, "loss": 0.0602, "step": 18540 }, { "epoch": 1.44, "learning_rate": 5.557458735596388e-06, "loss": 0.071, "step": 18550 }, { "epoch": 1.45, "learning_rate": 5.5496729990657115e-06, "loss": 0.0881, "step": 18560 }, { "epoch": 1.45, "learning_rate": 5.541887262535036e-06, "loss": 0.0925, "step": 18570 }, { "epoch": 1.45, "learning_rate": 5.5341015260043604e-06, "loss": 0.0794, "step": 18580 }, { "epoch": 1.45, "learning_rate": 5.526315789473685e-06, "loss": 0.06, "step": 18590 }, { "epoch": 1.45, "learning_rate": 5.5185300529430085e-06, "loss": 0.0724, "step": 18600 }, { "epoch": 1.45, "learning_rate": 5.510744316412333e-06, "loss": 0.083, "step": 18610 }, { "epoch": 1.45, "learning_rate": 5.502958579881657e-06, "loss": 0.0701, "step": 18620 }, { "epoch": 1.45, "learning_rate": 5.495172843350981e-06, "loss": 0.0612, "step": 18630 }, { "epoch": 1.45, "learning_rate": 5.4873871068203055e-06, "loss": 0.0599, "step": 18640 }, { "epoch": 1.45, "learning_rate": 5.47960137028963e-06, "loss": 0.108, "step": 18650 }, { "epoch": 1.45, "learning_rate": 5.471815633758954e-06, "loss": 0.0808, "step": 18660 }, { "epoch": 1.45, "learning_rate": 5.464029897228278e-06, "loss": 0.0859, "step": 18670 }, { "epoch": 1.45, "learning_rate": 5.456244160697602e-06, "loss": 0.075, "step": 18680 }, { "epoch": 1.46, "learning_rate": 5.448458424166927e-06, "loss": 0.0868, "step": 18690 }, { "epoch": 1.46, "learning_rate": 5.4406726876362505e-06, "loss": 0.0535, "step": 18700 }, { "epoch": 1.46, "learning_rate": 5.432886951105575e-06, "loss": 0.0705, "step": 18710 }, { "epoch": 1.46, "learning_rate": 5.425101214574899e-06, "loss": 0.0977, "step": 18720 }, { "epoch": 1.46, "learning_rate": 5.417315478044224e-06, "loss": 0.0805, "step": 18730 }, { "epoch": 1.46, "learning_rate": 5.4095297415135474e-06, "loss": 0.0824, "step": 18740 }, { "epoch": 1.46, "learning_rate": 5.401744004982872e-06, "loss": 0.0734, "step": 18750 }, { "epoch": 1.46, "learning_rate": 5.393958268452196e-06, "loss": 0.0822, "step": 18760 }, { "epoch": 1.46, "learning_rate": 5.38617253192152e-06, "loss": 0.0888, "step": 18770 }, { "epoch": 1.46, "learning_rate": 5.378386795390844e-06, "loss": 0.0985, "step": 18780 }, { "epoch": 1.46, "learning_rate": 5.370601058860169e-06, "loss": 0.0934, "step": 18790 }, { "epoch": 1.46, "learning_rate": 5.362815322329493e-06, "loss": 0.0651, "step": 18800 }, { "epoch": 1.46, "learning_rate": 5.355029585798817e-06, "loss": 0.0787, "step": 18810 }, { "epoch": 1.47, "learning_rate": 5.347243849268141e-06, "loss": 0.0637, "step": 18820 }, { "epoch": 1.47, "learning_rate": 5.339458112737466e-06, "loss": 0.0994, "step": 18830 }, { "epoch": 1.47, "learning_rate": 5.33167237620679e-06, "loss": 0.1053, "step": 18840 }, { "epoch": 1.47, "learning_rate": 5.323886639676114e-06, "loss": 0.0963, "step": 18850 }, { "epoch": 1.47, "learning_rate": 5.316100903145438e-06, "loss": 0.0792, "step": 18860 }, { "epoch": 1.47, "learning_rate": 5.308315166614763e-06, "loss": 0.0894, "step": 18870 }, { "epoch": 1.47, "learning_rate": 5.3005294300840855e-06, "loss": 0.0799, "step": 18880 }, { "epoch": 1.47, "learning_rate": 5.292743693553411e-06, "loss": 0.0885, "step": 18890 }, { "epoch": 1.47, "learning_rate": 5.284957957022735e-06, "loss": 0.0689, "step": 18900 }, { "epoch": 1.47, "learning_rate": 5.27717222049206e-06, "loss": 0.0646, "step": 18910 }, { "epoch": 1.47, "learning_rate": 5.2693864839613825e-06, "loss": 0.0984, "step": 18920 }, { "epoch": 1.47, "learning_rate": 5.261600747430707e-06, "loss": 0.0806, "step": 18930 }, { "epoch": 1.47, "learning_rate": 5.253815010900032e-06, "loss": 0.0835, "step": 18940 }, { "epoch": 1.48, "learning_rate": 5.246029274369355e-06, "loss": 0.0663, "step": 18950 }, { "epoch": 1.48, "learning_rate": 5.2382435378386795e-06, "loss": 0.0779, "step": 18960 }, { "epoch": 1.48, "learning_rate": 5.230457801308004e-06, "loss": 0.0765, "step": 18970 }, { "epoch": 1.48, "learning_rate": 5.222672064777329e-06, "loss": 0.0733, "step": 18980 }, { "epoch": 1.48, "learning_rate": 5.214886328246652e-06, "loss": 0.0609, "step": 18990 }, { "epoch": 1.48, "learning_rate": 5.207100591715976e-06, "loss": 0.0666, "step": 19000 }, { "epoch": 1.48, "learning_rate": 5.199314855185301e-06, "loss": 0.0554, "step": 19010 }, { "epoch": 1.48, "learning_rate": 5.1915291186546245e-06, "loss": 0.1113, "step": 19020 }, { "epoch": 1.48, "learning_rate": 5.183743382123949e-06, "loss": 0.0761, "step": 19030 }, { "epoch": 1.48, "learning_rate": 5.175957645593273e-06, "loss": 0.0847, "step": 19040 }, { "epoch": 1.48, "learning_rate": 5.168171909062598e-06, "loss": 0.0709, "step": 19050 }, { "epoch": 1.48, "learning_rate": 5.1603861725319214e-06, "loss": 0.09, "step": 19060 }, { "epoch": 1.48, "learning_rate": 5.152600436001246e-06, "loss": 0.0829, "step": 19070 }, { "epoch": 1.49, "learning_rate": 5.14481469947057e-06, "loss": 0.0785, "step": 19080 }, { "epoch": 1.49, "learning_rate": 5.137028962939895e-06, "loss": 0.083, "step": 19090 }, { "epoch": 1.49, "learning_rate": 5.129243226409218e-06, "loss": 0.0889, "step": 19100 }, { "epoch": 1.49, "learning_rate": 5.121457489878543e-06, "loss": 0.0777, "step": 19110 }, { "epoch": 1.49, "learning_rate": 5.113671753347867e-06, "loss": 0.0712, "step": 19120 }, { "epoch": 1.49, "learning_rate": 5.105886016817191e-06, "loss": 0.0776, "step": 19130 }, { "epoch": 1.49, "learning_rate": 5.098100280286515e-06, "loss": 0.082, "step": 19140 }, { "epoch": 1.49, "learning_rate": 5.09031454375584e-06, "loss": 0.0936, "step": 19150 }, { "epoch": 1.49, "learning_rate": 5.082528807225164e-06, "loss": 0.0829, "step": 19160 }, { "epoch": 1.49, "learning_rate": 5.074743070694488e-06, "loss": 0.0602, "step": 19170 }, { "epoch": 1.49, "learning_rate": 5.066957334163812e-06, "loss": 0.0754, "step": 19180 }, { "epoch": 1.49, "learning_rate": 5.059171597633137e-06, "loss": 0.0742, "step": 19190 }, { "epoch": 1.49, "learning_rate": 5.05138586110246e-06, "loss": 0.0701, "step": 19200 }, { "epoch": 1.5, "learning_rate": 5.043600124571785e-06, "loss": 0.0626, "step": 19210 }, { "epoch": 1.5, "learning_rate": 5.035814388041109e-06, "loss": 0.0862, "step": 19220 }, { "epoch": 1.5, "learning_rate": 5.028028651510434e-06, "loss": 0.0889, "step": 19230 }, { "epoch": 1.5, "learning_rate": 5.020242914979757e-06, "loss": 0.0613, "step": 19240 }, { "epoch": 1.5, "learning_rate": 5.012457178449082e-06, "loss": 0.0709, "step": 19250 }, { "epoch": 1.5, "learning_rate": 5.004671441918406e-06, "loss": 0.0782, "step": 19260 }, { "epoch": 1.5, "learning_rate": 4.99688570538773e-06, "loss": 0.0766, "step": 19270 }, { "epoch": 1.5, "learning_rate": 4.989099968857054e-06, "loss": 0.0827, "step": 19280 }, { "epoch": 1.5, "learning_rate": 4.981314232326379e-06, "loss": 0.0721, "step": 19290 }, { "epoch": 1.5, "learning_rate": 4.973528495795702e-06, "loss": 0.0705, "step": 19300 }, { "epoch": 1.5, "learning_rate": 4.965742759265027e-06, "loss": 0.0664, "step": 19310 }, { "epoch": 1.5, "learning_rate": 4.957957022734351e-06, "loss": 0.0729, "step": 19320 }, { "epoch": 1.5, "learning_rate": 4.950171286203676e-06, "loss": 0.0542, "step": 19330 }, { "epoch": 1.51, "learning_rate": 4.942385549672999e-06, "loss": 0.0768, "step": 19340 }, { "epoch": 1.51, "learning_rate": 4.934599813142324e-06, "loss": 0.0725, "step": 19350 }, { "epoch": 1.51, "learning_rate": 4.926814076611648e-06, "loss": 0.0694, "step": 19360 }, { "epoch": 1.51, "learning_rate": 4.919028340080972e-06, "loss": 0.0858, "step": 19370 }, { "epoch": 1.51, "learning_rate": 4.911242603550296e-06, "loss": 0.072, "step": 19380 }, { "epoch": 1.51, "learning_rate": 4.903456867019621e-06, "loss": 0.0589, "step": 19390 }, { "epoch": 1.51, "learning_rate": 4.895671130488945e-06, "loss": 0.0718, "step": 19400 }, { "epoch": 1.51, "learning_rate": 4.887885393958269e-06, "loss": 0.06, "step": 19410 }, { "epoch": 1.51, "learning_rate": 4.880099657427593e-06, "loss": 0.0718, "step": 19420 }, { "epoch": 1.51, "learning_rate": 4.872313920896917e-06, "loss": 0.0861, "step": 19430 }, { "epoch": 1.51, "learning_rate": 4.864528184366241e-06, "loss": 0.1108, "step": 19440 }, { "epoch": 1.51, "learning_rate": 4.856742447835566e-06, "loss": 0.093, "step": 19450 }, { "epoch": 1.52, "learning_rate": 4.848956711304889e-06, "loss": 0.0642, "step": 19460 }, { "epoch": 1.52, "learning_rate": 4.841170974774214e-06, "loss": 0.06, "step": 19470 }, { "epoch": 1.52, "learning_rate": 4.833385238243538e-06, "loss": 0.0794, "step": 19480 }, { "epoch": 1.52, "learning_rate": 4.825599501712863e-06, "loss": 0.0722, "step": 19490 }, { "epoch": 1.52, "learning_rate": 4.817813765182186e-06, "loss": 0.0821, "step": 19500 }, { "epoch": 1.52, "learning_rate": 4.810028028651511e-06, "loss": 0.0646, "step": 19510 }, { "epoch": 1.52, "learning_rate": 4.802242292120835e-06, "loss": 0.0491, "step": 19520 }, { "epoch": 1.52, "learning_rate": 4.794456555590159e-06, "loss": 0.0593, "step": 19530 }, { "epoch": 1.52, "learning_rate": 4.786670819059483e-06, "loss": 0.0873, "step": 19540 }, { "epoch": 1.52, "learning_rate": 4.778885082528808e-06, "loss": 0.0623, "step": 19550 }, { "epoch": 1.52, "learning_rate": 4.771099345998132e-06, "loss": 0.0547, "step": 19560 }, { "epoch": 1.52, "learning_rate": 4.763313609467456e-06, "loss": 0.0675, "step": 19570 }, { "epoch": 1.52, "learning_rate": 4.75552787293678e-06, "loss": 0.0766, "step": 19580 }, { "epoch": 1.53, "learning_rate": 4.747742136406105e-06, "loss": 0.0863, "step": 19590 }, { "epoch": 1.53, "learning_rate": 4.739956399875428e-06, "loss": 0.0744, "step": 19600 }, { "epoch": 1.53, "learning_rate": 4.732170663344753e-06, "loss": 0.07, "step": 19610 }, { "epoch": 1.53, "learning_rate": 4.724384926814077e-06, "loss": 0.0729, "step": 19620 }, { "epoch": 1.53, "learning_rate": 4.716599190283402e-06, "loss": 0.0503, "step": 19630 }, { "epoch": 1.53, "learning_rate": 4.708813453752725e-06, "loss": 0.1092, "step": 19640 }, { "epoch": 1.53, "learning_rate": 4.70102771722205e-06, "loss": 0.0741, "step": 19650 }, { "epoch": 1.53, "learning_rate": 4.693241980691373e-06, "loss": 0.0693, "step": 19660 }, { "epoch": 1.53, "learning_rate": 4.685456244160699e-06, "loss": 0.0544, "step": 19670 }, { "epoch": 1.53, "learning_rate": 4.677670507630022e-06, "loss": 0.097, "step": 19680 }, { "epoch": 1.53, "learning_rate": 4.669884771099346e-06, "loss": 0.0594, "step": 19690 }, { "epoch": 1.53, "learning_rate": 4.66209903456867e-06, "loss": 0.0591, "step": 19700 }, { "epoch": 1.53, "learning_rate": 4.654313298037995e-06, "loss": 0.0565, "step": 19710 }, { "epoch": 1.54, "learning_rate": 4.646527561507319e-06, "loss": 0.0606, "step": 19720 }, { "epoch": 1.54, "learning_rate": 4.638741824976643e-06, "loss": 0.108, "step": 19730 }, { "epoch": 1.54, "learning_rate": 4.630956088445967e-06, "loss": 0.0692, "step": 19740 }, { "epoch": 1.54, "learning_rate": 4.623170351915292e-06, "loss": 0.065, "step": 19750 }, { "epoch": 1.54, "learning_rate": 4.615384615384616e-06, "loss": 0.0743, "step": 19760 }, { "epoch": 1.54, "learning_rate": 4.60759887885394e-06, "loss": 0.0921, "step": 19770 }, { "epoch": 1.54, "learning_rate": 4.599813142323264e-06, "loss": 0.085, "step": 19780 }, { "epoch": 1.54, "learning_rate": 4.592027405792589e-06, "loss": 0.0714, "step": 19790 }, { "epoch": 1.54, "learning_rate": 4.584241669261912e-06, "loss": 0.0638, "step": 19800 }, { "epoch": 1.54, "learning_rate": 4.576455932731237e-06, "loss": 0.0683, "step": 19810 }, { "epoch": 1.54, "learning_rate": 4.568670196200561e-06, "loss": 0.062, "step": 19820 }, { "epoch": 1.54, "learning_rate": 4.560884459669886e-06, "loss": 0.0723, "step": 19830 }, { "epoch": 1.54, "learning_rate": 4.553098723139209e-06, "loss": 0.0721, "step": 19840 }, { "epoch": 1.55, "learning_rate": 4.545312986608534e-06, "loss": 0.0679, "step": 19850 }, { "epoch": 1.55, "learning_rate": 4.537527250077858e-06, "loss": 0.104, "step": 19860 }, { "epoch": 1.55, "learning_rate": 4.529741513547182e-06, "loss": 0.0858, "step": 19870 }, { "epoch": 1.55, "learning_rate": 4.521955777016506e-06, "loss": 0.1106, "step": 19880 }, { "epoch": 1.55, "learning_rate": 4.51417004048583e-06, "loss": 0.0747, "step": 19890 }, { "epoch": 1.55, "learning_rate": 4.506384303955155e-06, "loss": 0.0624, "step": 19900 }, { "epoch": 1.55, "learning_rate": 4.498598567424479e-06, "loss": 0.0819, "step": 19910 }, { "epoch": 1.55, "learning_rate": 4.490812830893803e-06, "loss": 0.0693, "step": 19920 }, { "epoch": 1.55, "learning_rate": 4.483027094363127e-06, "loss": 0.0673, "step": 19930 }, { "epoch": 1.55, "learning_rate": 4.475241357832451e-06, "loss": 0.0689, "step": 19940 }, { "epoch": 1.55, "learning_rate": 4.467455621301776e-06, "loss": 0.0653, "step": 19950 }, { "epoch": 1.55, "learning_rate": 4.459669884771099e-06, "loss": 0.08, "step": 19960 }, { "epoch": 1.55, "learning_rate": 4.451884148240424e-06, "loss": 0.0639, "step": 19970 }, { "epoch": 1.56, "learning_rate": 4.444098411709748e-06, "loss": 0.0538, "step": 19980 }, { "epoch": 1.56, "learning_rate": 4.436312675179073e-06, "loss": 0.0622, "step": 19990 }, { "epoch": 1.56, "learning_rate": 4.428526938648396e-06, "loss": 0.0897, "step": 20000 }, { "epoch": 1.56, "learning_rate": 4.420741202117721e-06, "loss": 0.0705, "step": 20010 }, { "epoch": 1.56, "learning_rate": 4.412955465587045e-06, "loss": 0.0757, "step": 20020 }, { "epoch": 1.56, "learning_rate": 4.405169729056369e-06, "loss": 0.0572, "step": 20030 }, { "epoch": 1.56, "learning_rate": 4.397383992525693e-06, "loss": 0.0789, "step": 20040 }, { "epoch": 1.56, "learning_rate": 4.389598255995018e-06, "loss": 0.059, "step": 20050 }, { "epoch": 1.56, "learning_rate": 4.381812519464342e-06, "loss": 0.0831, "step": 20060 }, { "epoch": 1.56, "learning_rate": 4.374026782933666e-06, "loss": 0.0875, "step": 20070 }, { "epoch": 1.56, "learning_rate": 4.36624104640299e-06, "loss": 0.0861, "step": 20080 }, { "epoch": 1.56, "learning_rate": 4.358455309872315e-06, "loss": 0.0651, "step": 20090 }, { "epoch": 1.56, "learning_rate": 4.350669573341639e-06, "loss": 0.0564, "step": 20100 }, { "epoch": 1.57, "learning_rate": 4.342883836810963e-06, "loss": 0.0737, "step": 20110 }, { "epoch": 1.57, "learning_rate": 4.335098100280286e-06, "loss": 0.076, "step": 20120 }, { "epoch": 1.57, "learning_rate": 4.3273123637496116e-06, "loss": 0.0597, "step": 20130 }, { "epoch": 1.57, "learning_rate": 4.319526627218935e-06, "loss": 0.1011, "step": 20140 }, { "epoch": 1.57, "learning_rate": 4.31174089068826e-06, "loss": 0.0547, "step": 20150 }, { "epoch": 1.57, "learning_rate": 4.303955154157583e-06, "loss": 0.0597, "step": 20160 }, { "epoch": 1.57, "learning_rate": 4.296169417626908e-06, "loss": 0.0533, "step": 20170 }, { "epoch": 1.57, "learning_rate": 4.288383681096232e-06, "loss": 0.0823, "step": 20180 }, { "epoch": 1.57, "learning_rate": 4.280597944565556e-06, "loss": 0.0749, "step": 20190 }, { "epoch": 1.57, "learning_rate": 4.27281220803488e-06, "loss": 0.0603, "step": 20200 }, { "epoch": 1.57, "learning_rate": 4.265026471504205e-06, "loss": 0.0547, "step": 20210 }, { "epoch": 1.57, "learning_rate": 4.257240734973529e-06, "loss": 0.0781, "step": 20220 }, { "epoch": 1.58, "learning_rate": 4.249454998442853e-06, "loss": 0.0612, "step": 20230 }, { "epoch": 1.58, "learning_rate": 4.241669261912177e-06, "loss": 0.0592, "step": 20240 }, { "epoch": 1.58, "learning_rate": 4.233883525381502e-06, "loss": 0.0739, "step": 20250 }, { "epoch": 1.58, "learning_rate": 4.226097788850826e-06, "loss": 0.074, "step": 20260 }, { "epoch": 1.58, "learning_rate": 4.21831205232015e-06, "loss": 0.0622, "step": 20270 }, { "epoch": 1.58, "learning_rate": 4.210526315789474e-06, "loss": 0.0701, "step": 20280 }, { "epoch": 1.58, "learning_rate": 4.2027405792587986e-06, "loss": 0.0865, "step": 20290 }, { "epoch": 1.58, "learning_rate": 4.194954842728122e-06, "loss": 0.073, "step": 20300 }, { "epoch": 1.58, "learning_rate": 4.187169106197447e-06, "loss": 0.07, "step": 20310 }, { "epoch": 1.58, "learning_rate": 4.179383369666771e-06, "loss": 0.0903, "step": 20320 }, { "epoch": 1.58, "learning_rate": 4.1715976331360955e-06, "loss": 0.0775, "step": 20330 }, { "epoch": 1.58, "learning_rate": 4.163811896605419e-06, "loss": 0.0615, "step": 20340 }, { "epoch": 1.58, "learning_rate": 4.156026160074744e-06, "loss": 0.0602, "step": 20350 }, { "epoch": 1.59, "learning_rate": 4.148240423544068e-06, "loss": 0.0716, "step": 20360 }, { "epoch": 1.59, "learning_rate": 4.140454687013392e-06, "loss": 0.0571, "step": 20370 }, { "epoch": 1.59, "learning_rate": 4.132668950482716e-06, "loss": 0.0653, "step": 20380 }, { "epoch": 1.59, "learning_rate": 4.12488321395204e-06, "loss": 0.0731, "step": 20390 }, { "epoch": 1.59, "learning_rate": 4.117097477421364e-06, "loss": 0.0984, "step": 20400 }, { "epoch": 1.59, "learning_rate": 4.109311740890689e-06, "loss": 0.0659, "step": 20410 }, { "epoch": 1.59, "learning_rate": 4.101526004360013e-06, "loss": 0.0587, "step": 20420 }, { "epoch": 1.59, "learning_rate": 4.093740267829337e-06, "loss": 0.0577, "step": 20430 }, { "epoch": 1.59, "learning_rate": 4.085954531298661e-06, "loss": 0.0841, "step": 20440 }, { "epoch": 1.59, "learning_rate": 4.0781687947679856e-06, "loss": 0.0606, "step": 20450 }, { "epoch": 1.59, "learning_rate": 4.070383058237309e-06, "loss": 0.0598, "step": 20460 }, { "epoch": 1.59, "learning_rate": 4.062597321706634e-06, "loss": 0.066, "step": 20470 }, { "epoch": 1.59, "learning_rate": 4.054811585175958e-06, "loss": 0.0769, "step": 20480 }, { "epoch": 1.6, "learning_rate": 4.0470258486452825e-06, "loss": 0.1022, "step": 20490 }, { "epoch": 1.6, "learning_rate": 4.039240112114606e-06, "loss": 0.0523, "step": 20500 }, { "epoch": 1.6, "learning_rate": 4.031454375583931e-06, "loss": 0.057, "step": 20510 }, { "epoch": 1.6, "learning_rate": 4.023668639053255e-06, "loss": 0.0895, "step": 20520 }, { "epoch": 1.6, "learning_rate": 4.015882902522579e-06, "loss": 0.0652, "step": 20530 }, { "epoch": 1.6, "learning_rate": 4.008097165991903e-06, "loss": 0.0703, "step": 20540 }, { "epoch": 1.6, "learning_rate": 4.0003114294612276e-06, "loss": 0.0669, "step": 20550 }, { "epoch": 1.6, "learning_rate": 3.992525692930552e-06, "loss": 0.0819, "step": 20560 }, { "epoch": 1.6, "learning_rate": 3.984739956399876e-06, "loss": 0.0562, "step": 20570 }, { "epoch": 1.6, "learning_rate": 3.9769542198692e-06, "loss": 0.0699, "step": 20580 }, { "epoch": 1.6, "learning_rate": 3.9691684833385245e-06, "loss": 0.0727, "step": 20590 }, { "epoch": 1.6, "learning_rate": 3.961382746807849e-06, "loss": 0.0525, "step": 20600 }, { "epoch": 1.6, "learning_rate": 3.9535970102771726e-06, "loss": 0.1218, "step": 20610 }, { "epoch": 1.61, "learning_rate": 3.945811273746496e-06, "loss": 0.0535, "step": 20620 }, { "epoch": 1.61, "learning_rate": 3.938025537215821e-06, "loss": 0.0886, "step": 20630 }, { "epoch": 1.61, "learning_rate": 3.930239800685145e-06, "loss": 0.0605, "step": 20640 }, { "epoch": 1.61, "learning_rate": 3.9224540641544695e-06, "loss": 0.0641, "step": 20650 }, { "epoch": 1.61, "learning_rate": 3.914668327623793e-06, "loss": 0.0815, "step": 20660 }, { "epoch": 1.61, "learning_rate": 3.906882591093118e-06, "loss": 0.0974, "step": 20670 }, { "epoch": 1.61, "learning_rate": 3.899096854562442e-06, "loss": 0.0513, "step": 20680 }, { "epoch": 1.61, "learning_rate": 3.891311118031766e-06, "loss": 0.0919, "step": 20690 }, { "epoch": 1.61, "learning_rate": 3.88352538150109e-06, "loss": 0.0689, "step": 20700 }, { "epoch": 1.61, "learning_rate": 3.8757396449704146e-06, "loss": 0.0691, "step": 20710 }, { "epoch": 1.61, "learning_rate": 3.867953908439739e-06, "loss": 0.0676, "step": 20720 }, { "epoch": 1.61, "learning_rate": 3.860168171909063e-06, "loss": 0.0675, "step": 20730 }, { "epoch": 1.61, "learning_rate": 3.852382435378387e-06, "loss": 0.0807, "step": 20740 }, { "epoch": 1.62, "learning_rate": 3.8445966988477115e-06, "loss": 0.0686, "step": 20750 }, { "epoch": 1.62, "learning_rate": 3.836810962317036e-06, "loss": 0.0761, "step": 20760 }, { "epoch": 1.62, "learning_rate": 3.8290252257863596e-06, "loss": 0.0639, "step": 20770 }, { "epoch": 1.62, "learning_rate": 3.821239489255684e-06, "loss": 0.0606, "step": 20780 }, { "epoch": 1.62, "learning_rate": 3.813453752725008e-06, "loss": 0.0722, "step": 20790 }, { "epoch": 1.62, "learning_rate": 3.805668016194332e-06, "loss": 0.0785, "step": 20800 }, { "epoch": 1.62, "learning_rate": 3.7978822796636565e-06, "loss": 0.0789, "step": 20810 }, { "epoch": 1.62, "learning_rate": 3.7900965431329806e-06, "loss": 0.0773, "step": 20820 }, { "epoch": 1.62, "learning_rate": 3.782310806602305e-06, "loss": 0.0637, "step": 20830 }, { "epoch": 1.62, "learning_rate": 3.774525070071629e-06, "loss": 0.0512, "step": 20840 }, { "epoch": 1.62, "learning_rate": 3.7667393335409535e-06, "loss": 0.0735, "step": 20850 }, { "epoch": 1.62, "learning_rate": 3.7589535970102775e-06, "loss": 0.0617, "step": 20860 }, { "epoch": 1.62, "learning_rate": 3.7511678604796016e-06, "loss": 0.0548, "step": 20870 }, { "epoch": 1.63, "learning_rate": 3.743382123948926e-06, "loss": 0.0762, "step": 20880 }, { "epoch": 1.63, "learning_rate": 3.73559638741825e-06, "loss": 0.0502, "step": 20890 }, { "epoch": 1.63, "learning_rate": 3.7278106508875745e-06, "loss": 0.0757, "step": 20900 }, { "epoch": 1.63, "learning_rate": 3.7200249143568985e-06, "loss": 0.0631, "step": 20910 }, { "epoch": 1.63, "learning_rate": 3.712239177826223e-06, "loss": 0.0561, "step": 20920 }, { "epoch": 1.63, "learning_rate": 3.704453441295547e-06, "loss": 0.0517, "step": 20930 }, { "epoch": 1.63, "learning_rate": 3.6966677047648706e-06, "loss": 0.048, "step": 20940 }, { "epoch": 1.63, "learning_rate": 3.6888819682341955e-06, "loss": 0.0684, "step": 20950 }, { "epoch": 1.63, "learning_rate": 3.681096231703519e-06, "loss": 0.0685, "step": 20960 }, { "epoch": 1.63, "learning_rate": 3.673310495172844e-06, "loss": 0.0593, "step": 20970 }, { "epoch": 1.63, "learning_rate": 3.6655247586421676e-06, "loss": 0.0667, "step": 20980 }, { "epoch": 1.63, "learning_rate": 3.6577390221114924e-06, "loss": 0.068, "step": 20990 }, { "epoch": 1.64, "learning_rate": 3.649953285580816e-06, "loss": 0.059, "step": 21000 }, { "epoch": 1.64, "learning_rate": 3.6421675490501405e-06, "loss": 0.0627, "step": 21010 }, { "epoch": 1.64, "learning_rate": 3.6343818125194645e-06, "loss": 0.0708, "step": 21020 }, { "epoch": 1.64, "learning_rate": 3.6265960759887886e-06, "loss": 0.0581, "step": 21030 }, { "epoch": 1.64, "learning_rate": 3.618810339458113e-06, "loss": 0.0885, "step": 21040 }, { "epoch": 1.64, "learning_rate": 3.611024602927437e-06, "loss": 0.0644, "step": 21050 }, { "epoch": 1.64, "learning_rate": 3.6032388663967615e-06, "loss": 0.0666, "step": 21060 }, { "epoch": 1.64, "learning_rate": 3.5954531298660855e-06, "loss": 0.0569, "step": 21070 }, { "epoch": 1.64, "learning_rate": 3.58766739333541e-06, "loss": 0.0582, "step": 21080 }, { "epoch": 1.64, "learning_rate": 3.579881656804734e-06, "loss": 0.0694, "step": 21090 }, { "epoch": 1.64, "learning_rate": 3.5720959202740585e-06, "loss": 0.075, "step": 21100 }, { "epoch": 1.64, "learning_rate": 3.5643101837433825e-06, "loss": 0.0667, "step": 21110 }, { "epoch": 1.64, "learning_rate": 3.5565244472127065e-06, "loss": 0.047, "step": 21120 }, { "epoch": 1.65, "learning_rate": 3.548738710682031e-06, "loss": 0.0575, "step": 21130 }, { "epoch": 1.65, "learning_rate": 3.540952974151355e-06, "loss": 0.0633, "step": 21140 }, { "epoch": 1.65, "learning_rate": 3.5331672376206794e-06, "loss": 0.0762, "step": 21150 }, { "epoch": 1.65, "learning_rate": 3.5253815010900035e-06, "loss": 0.0584, "step": 21160 }, { "epoch": 1.65, "learning_rate": 3.517595764559328e-06, "loss": 0.0767, "step": 21170 }, { "epoch": 1.65, "learning_rate": 3.509810028028652e-06, "loss": 0.0565, "step": 21180 }, { "epoch": 1.65, "learning_rate": 3.5020242914979756e-06, "loss": 0.0597, "step": 21190 }, { "epoch": 1.65, "learning_rate": 3.4942385549673004e-06, "loss": 0.0615, "step": 21200 }, { "epoch": 1.65, "learning_rate": 3.486452818436624e-06, "loss": 0.0759, "step": 21210 }, { "epoch": 1.65, "learning_rate": 3.478667081905949e-06, "loss": 0.0709, "step": 21220 }, { "epoch": 1.65, "learning_rate": 3.4708813453752725e-06, "loss": 0.0482, "step": 21230 }, { "epoch": 1.65, "learning_rate": 3.463095608844597e-06, "loss": 0.0536, "step": 21240 }, { "epoch": 1.65, "learning_rate": 3.455309872313921e-06, "loss": 0.0625, "step": 21250 }, { "epoch": 1.66, "learning_rate": 3.4475241357832455e-06, "loss": 0.0656, "step": 21260 }, { "epoch": 1.66, "learning_rate": 3.4397383992525695e-06, "loss": 0.0585, "step": 21270 }, { "epoch": 1.66, "learning_rate": 3.4319526627218935e-06, "loss": 0.0724, "step": 21280 }, { "epoch": 1.66, "learning_rate": 3.424166926191218e-06, "loss": 0.0817, "step": 21290 }, { "epoch": 1.66, "learning_rate": 3.416381189660542e-06, "loss": 0.0846, "step": 21300 }, { "epoch": 1.66, "learning_rate": 3.4085954531298664e-06, "loss": 0.0793, "step": 21310 }, { "epoch": 1.66, "learning_rate": 3.4008097165991905e-06, "loss": 0.0558, "step": 21320 }, { "epoch": 1.66, "learning_rate": 3.393023980068515e-06, "loss": 0.0817, "step": 21330 }, { "epoch": 1.66, "learning_rate": 3.385238243537839e-06, "loss": 0.0643, "step": 21340 }, { "epoch": 1.66, "learning_rate": 3.3774525070071634e-06, "loss": 0.0625, "step": 21350 }, { "epoch": 1.66, "learning_rate": 3.3696667704764874e-06, "loss": 0.052, "step": 21360 }, { "epoch": 1.66, "learning_rate": 3.3618810339458115e-06, "loss": 0.0811, "step": 21370 }, { "epoch": 1.66, "learning_rate": 3.354095297415136e-06, "loss": 0.0507, "step": 21380 }, { "epoch": 1.67, "learning_rate": 3.34630956088446e-06, "loss": 0.0736, "step": 21390 }, { "epoch": 1.67, "learning_rate": 3.3385238243537844e-06, "loss": 0.0615, "step": 21400 }, { "epoch": 1.67, "learning_rate": 3.3307380878231084e-06, "loss": 0.0487, "step": 21410 }, { "epoch": 1.67, "learning_rate": 3.322952351292433e-06, "loss": 0.0606, "step": 21420 }, { "epoch": 1.67, "learning_rate": 3.315166614761757e-06, "loss": 0.06, "step": 21430 }, { "epoch": 1.67, "learning_rate": 3.3073808782310805e-06, "loss": 0.0644, "step": 21440 }, { "epoch": 1.67, "learning_rate": 3.2995951417004054e-06, "loss": 0.0583, "step": 21450 }, { "epoch": 1.67, "learning_rate": 3.291809405169729e-06, "loss": 0.0763, "step": 21460 }, { "epoch": 1.67, "learning_rate": 3.2840236686390534e-06, "loss": 0.059, "step": 21470 }, { "epoch": 1.67, "learning_rate": 3.2762379321083775e-06, "loss": 0.0718, "step": 21480 }, { "epoch": 1.67, "learning_rate": 3.268452195577702e-06, "loss": 0.0708, "step": 21490 }, { "epoch": 1.67, "learning_rate": 3.260666459047026e-06, "loss": 0.08, "step": 21500 }, { "epoch": 1.67, "learning_rate": 3.2528807225163504e-06, "loss": 0.069, "step": 21510 }, { "epoch": 1.68, "learning_rate": 3.2450949859856744e-06, "loss": 0.0592, "step": 21520 }, { "epoch": 1.68, "learning_rate": 3.2373092494549985e-06, "loss": 0.0589, "step": 21530 }, { "epoch": 1.68, "learning_rate": 3.229523512924323e-06, "loss": 0.0703, "step": 21540 }, { "epoch": 1.68, "learning_rate": 3.221737776393647e-06, "loss": 0.0549, "step": 21550 }, { "epoch": 1.68, "learning_rate": 3.2139520398629714e-06, "loss": 0.0663, "step": 21560 }, { "epoch": 1.68, "learning_rate": 3.2061663033322954e-06, "loss": 0.0913, "step": 21570 }, { "epoch": 1.68, "learning_rate": 3.19838056680162e-06, "loss": 0.0677, "step": 21580 }, { "epoch": 1.68, "learning_rate": 3.190594830270944e-06, "loss": 0.0821, "step": 21590 }, { "epoch": 1.68, "learning_rate": 3.1828090937402684e-06, "loss": 0.0569, "step": 21600 }, { "epoch": 1.68, "learning_rate": 3.1750233572095924e-06, "loss": 0.0686, "step": 21610 }, { "epoch": 1.68, "learning_rate": 3.1672376206789164e-06, "loss": 0.0636, "step": 21620 }, { "epoch": 1.68, "learning_rate": 3.159451884148241e-06, "loss": 0.0526, "step": 21630 }, { "epoch": 1.68, "learning_rate": 3.151666147617565e-06, "loss": 0.0579, "step": 21640 }, { "epoch": 1.69, "learning_rate": 3.1438804110868893e-06, "loss": 0.0526, "step": 21650 }, { "epoch": 1.69, "learning_rate": 3.1360946745562134e-06, "loss": 0.0538, "step": 21660 }, { "epoch": 1.69, "learning_rate": 3.128308938025538e-06, "loss": 0.0636, "step": 21670 }, { "epoch": 1.69, "learning_rate": 3.120523201494862e-06, "loss": 0.0528, "step": 21680 }, { "epoch": 1.69, "learning_rate": 3.1127374649641855e-06, "loss": 0.0524, "step": 21690 }, { "epoch": 1.69, "learning_rate": 3.10495172843351e-06, "loss": 0.0692, "step": 21700 }, { "epoch": 1.69, "learning_rate": 3.097165991902834e-06, "loss": 0.059, "step": 21710 }, { "epoch": 1.69, "learning_rate": 3.0893802553721584e-06, "loss": 0.0491, "step": 21720 }, { "epoch": 1.69, "learning_rate": 3.0815945188414824e-06, "loss": 0.0622, "step": 21730 }, { "epoch": 1.69, "learning_rate": 3.073808782310807e-06, "loss": 0.0707, "step": 21740 }, { "epoch": 1.69, "learning_rate": 3.066023045780131e-06, "loss": 0.0717, "step": 21750 }, { "epoch": 1.69, "learning_rate": 3.0582373092494554e-06, "loss": 0.0597, "step": 21760 }, { "epoch": 1.69, "learning_rate": 3.0504515727187794e-06, "loss": 0.0566, "step": 21770 }, { "epoch": 1.7, "learning_rate": 3.0426658361881034e-06, "loss": 0.0611, "step": 21780 }, { "epoch": 1.7, "learning_rate": 3.034880099657428e-06, "loss": 0.0599, "step": 21790 }, { "epoch": 1.7, "learning_rate": 3.027094363126752e-06, "loss": 0.0624, "step": 21800 }, { "epoch": 1.7, "learning_rate": 3.0193086265960763e-06, "loss": 0.0705, "step": 21810 }, { "epoch": 1.7, "learning_rate": 3.0115228900654004e-06, "loss": 0.0663, "step": 21820 }, { "epoch": 1.7, "learning_rate": 3.003737153534725e-06, "loss": 0.0571, "step": 21830 }, { "epoch": 1.7, "learning_rate": 2.995951417004049e-06, "loss": 0.0521, "step": 21840 }, { "epoch": 1.7, "learning_rate": 2.9881656804733733e-06, "loss": 0.072, "step": 21850 }, { "epoch": 1.7, "learning_rate": 2.9803799439426973e-06, "loss": 0.0653, "step": 21860 }, { "epoch": 1.7, "learning_rate": 2.9725942074120214e-06, "loss": 0.0642, "step": 21870 }, { "epoch": 1.7, "learning_rate": 2.964808470881346e-06, "loss": 0.0493, "step": 21880 }, { "epoch": 1.7, "learning_rate": 2.95702273435067e-06, "loss": 0.0645, "step": 21890 }, { "epoch": 1.71, "learning_rate": 2.9492369978199943e-06, "loss": 0.0505, "step": 21900 }, { "epoch": 1.71, "learning_rate": 2.9414512612893183e-06, "loss": 0.0873, "step": 21910 }, { "epoch": 1.71, "learning_rate": 2.9336655247586428e-06, "loss": 0.0579, "step": 21920 }, { "epoch": 1.71, "learning_rate": 2.9258797882279664e-06, "loss": 0.088, "step": 21930 }, { "epoch": 1.71, "learning_rate": 2.9180940516972904e-06, "loss": 0.0714, "step": 21940 }, { "epoch": 1.71, "learning_rate": 2.910308315166615e-06, "loss": 0.0774, "step": 21950 }, { "epoch": 1.71, "learning_rate": 2.902522578635939e-06, "loss": 0.0496, "step": 21960 }, { "epoch": 1.71, "learning_rate": 2.8947368421052634e-06, "loss": 0.0519, "step": 21970 }, { "epoch": 1.71, "learning_rate": 2.8869511055745874e-06, "loss": 0.0596, "step": 21980 }, { "epoch": 1.71, "learning_rate": 2.879165369043912e-06, "loss": 0.0627, "step": 21990 }, { "epoch": 1.71, "learning_rate": 2.871379632513236e-06, "loss": 0.0714, "step": 22000 }, { "epoch": 1.71, "learning_rate": 2.8635938959825603e-06, "loss": 0.0571, "step": 22010 }, { "epoch": 1.71, "learning_rate": 2.8558081594518843e-06, "loss": 0.0609, "step": 22020 }, { "epoch": 1.72, "learning_rate": 2.8480224229212084e-06, "loss": 0.0658, "step": 22030 }, { "epoch": 1.72, "learning_rate": 2.840236686390533e-06, "loss": 0.0533, "step": 22040 }, { "epoch": 1.72, "learning_rate": 2.832450949859857e-06, "loss": 0.0456, "step": 22050 }, { "epoch": 1.72, "learning_rate": 2.8246652133291813e-06, "loss": 0.06, "step": 22060 }, { "epoch": 1.72, "learning_rate": 2.8168794767985053e-06, "loss": 0.0509, "step": 22070 }, { "epoch": 1.72, "learning_rate": 2.8090937402678298e-06, "loss": 0.0561, "step": 22080 }, { "epoch": 1.72, "learning_rate": 2.801308003737154e-06, "loss": 0.0552, "step": 22090 }, { "epoch": 1.72, "learning_rate": 2.7935222672064783e-06, "loss": 0.0636, "step": 22100 }, { "epoch": 1.72, "learning_rate": 2.7857365306758023e-06, "loss": 0.0457, "step": 22110 }, { "epoch": 1.72, "learning_rate": 2.7779507941451263e-06, "loss": 0.0634, "step": 22120 }, { "epoch": 1.72, "learning_rate": 2.7701650576144508e-06, "loss": 0.0594, "step": 22130 }, { "epoch": 1.72, "learning_rate": 2.762379321083775e-06, "loss": 0.0687, "step": 22140 }, { "epoch": 1.72, "learning_rate": 2.7545935845530993e-06, "loss": 0.0634, "step": 22150 }, { "epoch": 1.73, "learning_rate": 2.746807848022423e-06, "loss": 0.0563, "step": 22160 }, { "epoch": 1.73, "learning_rate": 2.7390221114917477e-06, "loss": 0.0745, "step": 22170 }, { "epoch": 1.73, "learning_rate": 2.7312363749610713e-06, "loss": 0.0735, "step": 22180 }, { "epoch": 1.73, "learning_rate": 2.7234506384303954e-06, "loss": 0.0593, "step": 22190 }, { "epoch": 1.73, "learning_rate": 2.71566490189972e-06, "loss": 0.0509, "step": 22200 }, { "epoch": 1.73, "learning_rate": 2.707879165369044e-06, "loss": 0.0707, "step": 22210 }, { "epoch": 1.73, "learning_rate": 2.7000934288383683e-06, "loss": 0.0653, "step": 22220 }, { "epoch": 1.73, "learning_rate": 2.6923076923076923e-06, "loss": 0.0514, "step": 22230 }, { "epoch": 1.73, "learning_rate": 2.6845219557770168e-06, "loss": 0.0593, "step": 22240 }, { "epoch": 1.73, "learning_rate": 2.676736219246341e-06, "loss": 0.043, "step": 22250 }, { "epoch": 1.73, "learning_rate": 2.6689504827156653e-06, "loss": 0.0839, "step": 22260 }, { "epoch": 1.73, "learning_rate": 2.6611647461849893e-06, "loss": 0.0555, "step": 22270 }, { "epoch": 1.73, "learning_rate": 2.6533790096543133e-06, "loss": 0.0646, "step": 22280 }, { "epoch": 1.74, "learning_rate": 2.6455932731236378e-06, "loss": 0.063, "step": 22290 }, { "epoch": 1.74, "learning_rate": 2.637807536592962e-06, "loss": 0.0461, "step": 22300 }, { "epoch": 1.74, "learning_rate": 2.6300218000622863e-06, "loss": 0.0611, "step": 22310 }, { "epoch": 1.74, "learning_rate": 2.6222360635316103e-06, "loss": 0.0546, "step": 22320 }, { "epoch": 1.74, "learning_rate": 2.6144503270009347e-06, "loss": 0.0607, "step": 22330 }, { "epoch": 1.74, "learning_rate": 2.6066645904702588e-06, "loss": 0.0552, "step": 22340 }, { "epoch": 1.74, "learning_rate": 2.5988788539395832e-06, "loss": 0.0628, "step": 22350 }, { "epoch": 1.74, "learning_rate": 2.5910931174089072e-06, "loss": 0.0504, "step": 22360 }, { "epoch": 1.74, "learning_rate": 2.5833073808782313e-06, "loss": 0.0537, "step": 22370 }, { "epoch": 1.74, "learning_rate": 2.5755216443475557e-06, "loss": 0.0548, "step": 22380 }, { "epoch": 1.74, "learning_rate": 2.5677359078168793e-06, "loss": 0.0621, "step": 22390 }, { "epoch": 1.74, "learning_rate": 2.559950171286204e-06, "loss": 0.0773, "step": 22400 }, { "epoch": 1.74, "learning_rate": 2.552164434755528e-06, "loss": 0.0585, "step": 22410 }, { "epoch": 1.75, "learning_rate": 2.5443786982248527e-06, "loss": 0.0711, "step": 22420 }, { "epoch": 1.75, "learning_rate": 2.5365929616941763e-06, "loss": 0.0469, "step": 22430 }, { "epoch": 1.75, "learning_rate": 2.5288072251635003e-06, "loss": 0.0724, "step": 22440 }, { "epoch": 1.75, "learning_rate": 2.5210214886328248e-06, "loss": 0.0616, "step": 22450 }, { "epoch": 1.75, "learning_rate": 2.513235752102149e-06, "loss": 0.0506, "step": 22460 }, { "epoch": 1.75, "learning_rate": 2.5054500155714733e-06, "loss": 0.0653, "step": 22470 }, { "epoch": 1.75, "learning_rate": 2.4976642790407973e-06, "loss": 0.0543, "step": 22480 }, { "epoch": 1.75, "learning_rate": 2.4898785425101217e-06, "loss": 0.0484, "step": 22490 }, { "epoch": 1.75, "learning_rate": 2.4820928059794458e-06, "loss": 0.0549, "step": 22500 }, { "epoch": 1.75, "learning_rate": 2.4743070694487702e-06, "loss": 0.0575, "step": 22510 }, { "epoch": 1.75, "learning_rate": 2.4665213329180942e-06, "loss": 0.0619, "step": 22520 }, { "epoch": 1.75, "learning_rate": 2.4587355963874183e-06, "loss": 0.0482, "step": 22530 }, { "epoch": 1.75, "learning_rate": 2.4509498598567427e-06, "loss": 0.0511, "step": 22540 }, { "epoch": 1.76, "learning_rate": 2.4431641233260668e-06, "loss": 0.0605, "step": 22550 }, { "epoch": 1.76, "learning_rate": 2.435378386795391e-06, "loss": 0.0646, "step": 22560 }, { "epoch": 1.76, "learning_rate": 2.4275926502647152e-06, "loss": 0.0593, "step": 22570 }, { "epoch": 1.76, "learning_rate": 2.4198069137340393e-06, "loss": 0.0577, "step": 22580 }, { "epoch": 1.76, "learning_rate": 2.4120211772033637e-06, "loss": 0.0608, "step": 22590 }, { "epoch": 1.76, "learning_rate": 2.4042354406726877e-06, "loss": 0.0519, "step": 22600 }, { "epoch": 1.76, "learning_rate": 2.396449704142012e-06, "loss": 0.049, "step": 22610 }, { "epoch": 1.76, "learning_rate": 2.3886639676113362e-06, "loss": 0.0477, "step": 22620 }, { "epoch": 1.76, "learning_rate": 2.3808782310806607e-06, "loss": 0.0564, "step": 22630 }, { "epoch": 1.76, "learning_rate": 2.3730924945499847e-06, "loss": 0.05, "step": 22640 }, { "epoch": 1.76, "learning_rate": 2.3653067580193087e-06, "loss": 0.06, "step": 22650 }, { "epoch": 1.76, "learning_rate": 2.3575210214886328e-06, "loss": 0.0576, "step": 22660 }, { "epoch": 1.77, "learning_rate": 2.3497352849579572e-06, "loss": 0.072, "step": 22670 }, { "epoch": 1.77, "learning_rate": 2.3419495484272812e-06, "loss": 0.0507, "step": 22680 }, { "epoch": 1.77, "learning_rate": 2.3341638118966057e-06, "loss": 0.0557, "step": 22690 }, { "epoch": 1.77, "learning_rate": 2.3263780753659297e-06, "loss": 0.0553, "step": 22700 }, { "epoch": 1.77, "learning_rate": 2.318592338835254e-06, "loss": 0.0493, "step": 22710 }, { "epoch": 1.77, "learning_rate": 2.310806602304578e-06, "loss": 0.0688, "step": 22720 }, { "epoch": 1.77, "learning_rate": 2.3030208657739022e-06, "loss": 0.0801, "step": 22730 }, { "epoch": 1.77, "learning_rate": 2.2952351292432267e-06, "loss": 0.0608, "step": 22740 }, { "epoch": 1.77, "learning_rate": 2.2874493927125507e-06, "loss": 0.0542, "step": 22750 }, { "epoch": 1.77, "learning_rate": 2.279663656181875e-06, "loss": 0.0521, "step": 22760 }, { "epoch": 1.77, "learning_rate": 2.271877919651199e-06, "loss": 0.059, "step": 22770 }, { "epoch": 1.77, "learning_rate": 2.2640921831205232e-06, "loss": 0.0754, "step": 22780 }, { "epoch": 1.77, "learning_rate": 2.2563064465898477e-06, "loss": 0.0577, "step": 22790 }, { "epoch": 1.78, "learning_rate": 2.2485207100591717e-06, "loss": 0.0455, "step": 22800 }, { "epoch": 1.78, "learning_rate": 2.240734973528496e-06, "loss": 0.046, "step": 22810 }, { "epoch": 1.78, "learning_rate": 2.23294923699782e-06, "loss": 0.0575, "step": 22820 }, { "epoch": 1.78, "learning_rate": 2.2251635004671442e-06, "loss": 0.0499, "step": 22830 }, { "epoch": 1.78, "learning_rate": 2.2173777639364687e-06, "loss": 0.0864, "step": 22840 }, { "epoch": 1.78, "learning_rate": 2.2095920274057927e-06, "loss": 0.056, "step": 22850 }, { "epoch": 1.78, "learning_rate": 2.201806290875117e-06, "loss": 0.0756, "step": 22860 }, { "epoch": 1.78, "learning_rate": 2.194020554344441e-06, "loss": 0.049, "step": 22870 }, { "epoch": 1.78, "learning_rate": 2.1862348178137656e-06, "loss": 0.0531, "step": 22880 }, { "epoch": 1.78, "learning_rate": 2.1784490812830897e-06, "loss": 0.0495, "step": 22890 }, { "epoch": 1.78, "learning_rate": 2.1706633447524137e-06, "loss": 0.0526, "step": 22900 }, { "epoch": 1.78, "learning_rate": 2.1628776082217377e-06, "loss": 0.0588, "step": 22910 }, { "epoch": 1.78, "learning_rate": 2.155091871691062e-06, "loss": 0.0513, "step": 22920 }, { "epoch": 1.79, "learning_rate": 2.147306135160386e-06, "loss": 0.0512, "step": 22930 }, { "epoch": 1.79, "learning_rate": 2.1395203986297107e-06, "loss": 0.0594, "step": 22940 }, { "epoch": 1.79, "learning_rate": 2.1317346620990347e-06, "loss": 0.0626, "step": 22950 }, { "epoch": 1.79, "learning_rate": 2.123948925568359e-06, "loss": 0.0845, "step": 22960 }, { "epoch": 1.79, "learning_rate": 2.116163189037683e-06, "loss": 0.0489, "step": 22970 }, { "epoch": 1.79, "learning_rate": 2.108377452507007e-06, "loss": 0.0525, "step": 22980 }, { "epoch": 1.79, "learning_rate": 2.1005917159763316e-06, "loss": 0.0653, "step": 22990 }, { "epoch": 1.79, "learning_rate": 2.0928059794456557e-06, "loss": 0.0467, "step": 23000 }, { "epoch": 1.79, "learning_rate": 2.0850202429149797e-06, "loss": 0.0558, "step": 23010 }, { "epoch": 1.79, "learning_rate": 2.077234506384304e-06, "loss": 0.0544, "step": 23020 }, { "epoch": 1.79, "learning_rate": 2.069448769853628e-06, "loss": 0.0781, "step": 23030 }, { "epoch": 1.79, "learning_rate": 2.0616630333229526e-06, "loss": 0.0646, "step": 23040 }, { "epoch": 1.79, "learning_rate": 2.0538772967922767e-06, "loss": 0.0495, "step": 23050 }, { "epoch": 1.8, "learning_rate": 2.046091560261601e-06, "loss": 0.0606, "step": 23060 }, { "epoch": 1.8, "learning_rate": 2.038305823730925e-06, "loss": 0.0693, "step": 23070 }, { "epoch": 1.8, "learning_rate": 2.030520087200249e-06, "loss": 0.0549, "step": 23080 }, { "epoch": 1.8, "learning_rate": 2.0227343506695736e-06, "loss": 0.0592, "step": 23090 }, { "epoch": 1.8, "learning_rate": 2.0149486141388977e-06, "loss": 0.0567, "step": 23100 }, { "epoch": 1.8, "learning_rate": 2.007162877608222e-06, "loss": 0.0647, "step": 23110 }, { "epoch": 1.8, "learning_rate": 1.999377141077546e-06, "loss": 0.0504, "step": 23120 }, { "epoch": 1.8, "learning_rate": 1.9915914045468706e-06, "loss": 0.0566, "step": 23130 }, { "epoch": 1.8, "learning_rate": 1.9838056680161946e-06, "loss": 0.0621, "step": 23140 }, { "epoch": 1.8, "learning_rate": 1.9760199314855186e-06, "loss": 0.0541, "step": 23150 }, { "epoch": 1.8, "learning_rate": 1.9682341949548427e-06, "loss": 0.0555, "step": 23160 }, { "epoch": 1.8, "learning_rate": 1.960448458424167e-06, "loss": 0.0583, "step": 23170 }, { "epoch": 1.8, "learning_rate": 1.952662721893491e-06, "loss": 0.047, "step": 23180 }, { "epoch": 1.81, "learning_rate": 1.9448769853628156e-06, "loss": 0.0502, "step": 23190 }, { "epoch": 1.81, "learning_rate": 1.9370912488321396e-06, "loss": 0.0535, "step": 23200 }, { "epoch": 1.81, "learning_rate": 1.929305512301464e-06, "loss": 0.0561, "step": 23210 }, { "epoch": 1.81, "learning_rate": 1.921519775770788e-06, "loss": 0.0583, "step": 23220 }, { "epoch": 1.81, "learning_rate": 1.913734039240112e-06, "loss": 0.0512, "step": 23230 }, { "epoch": 1.81, "learning_rate": 1.9059483027094364e-06, "loss": 0.0499, "step": 23240 }, { "epoch": 1.81, "learning_rate": 1.8981625661787606e-06, "loss": 0.0464, "step": 23250 }, { "epoch": 1.81, "learning_rate": 1.8903768296480849e-06, "loss": 0.057, "step": 23260 }, { "epoch": 1.81, "learning_rate": 1.8825910931174091e-06, "loss": 0.0488, "step": 23270 }, { "epoch": 1.81, "learning_rate": 1.8748053565867333e-06, "loss": 0.0562, "step": 23280 }, { "epoch": 1.81, "learning_rate": 1.8670196200560576e-06, "loss": 0.0447, "step": 23290 }, { "epoch": 1.81, "learning_rate": 1.8592338835253818e-06, "loss": 0.0522, "step": 23300 }, { "epoch": 1.81, "learning_rate": 1.851448146994706e-06, "loss": 0.0425, "step": 23310 }, { "epoch": 1.82, "learning_rate": 1.8436624104640299e-06, "loss": 0.0546, "step": 23320 }, { "epoch": 1.82, "learning_rate": 1.8358766739333541e-06, "loss": 0.0551, "step": 23330 }, { "epoch": 1.82, "learning_rate": 1.8280909374026784e-06, "loss": 0.058, "step": 23340 }, { "epoch": 1.82, "learning_rate": 1.8203052008720026e-06, "loss": 0.0714, "step": 23350 }, { "epoch": 1.82, "learning_rate": 1.8125194643413268e-06, "loss": 0.0617, "step": 23360 }, { "epoch": 1.82, "learning_rate": 1.804733727810651e-06, "loss": 0.0618, "step": 23370 }, { "epoch": 1.82, "learning_rate": 1.7969479912799753e-06, "loss": 0.0504, "step": 23380 }, { "epoch": 1.82, "learning_rate": 1.7891622547492996e-06, "loss": 0.0538, "step": 23390 }, { "epoch": 1.82, "learning_rate": 1.7813765182186236e-06, "loss": 0.0561, "step": 23400 }, { "epoch": 1.82, "learning_rate": 1.7735907816879478e-06, "loss": 0.0585, "step": 23410 }, { "epoch": 1.82, "learning_rate": 1.7658050451572719e-06, "loss": 0.0659, "step": 23420 }, { "epoch": 1.82, "learning_rate": 1.7580193086265961e-06, "loss": 0.0485, "step": 23430 }, { "epoch": 1.82, "learning_rate": 1.7502335720959204e-06, "loss": 0.055, "step": 23440 }, { "epoch": 1.83, "learning_rate": 1.7424478355652446e-06, "loss": 0.0522, "step": 23450 }, { "epoch": 1.83, "learning_rate": 1.7346620990345688e-06, "loss": 0.0438, "step": 23460 }, { "epoch": 1.83, "learning_rate": 1.726876362503893e-06, "loss": 0.0501, "step": 23470 }, { "epoch": 1.83, "learning_rate": 1.719090625973217e-06, "loss": 0.0645, "step": 23480 }, { "epoch": 1.83, "learning_rate": 1.7113048894425413e-06, "loss": 0.0577, "step": 23490 }, { "epoch": 1.83, "learning_rate": 1.7035191529118656e-06, "loss": 0.0557, "step": 23500 }, { "epoch": 1.83, "learning_rate": 1.6957334163811898e-06, "loss": 0.0464, "step": 23510 }, { "epoch": 1.83, "learning_rate": 1.687947679850514e-06, "loss": 0.0498, "step": 23520 }, { "epoch": 1.83, "learning_rate": 1.6801619433198383e-06, "loss": 0.0553, "step": 23530 }, { "epoch": 1.83, "learning_rate": 1.6723762067891625e-06, "loss": 0.0639, "step": 23540 }, { "epoch": 1.83, "learning_rate": 1.6645904702584868e-06, "loss": 0.0564, "step": 23550 }, { "epoch": 1.83, "learning_rate": 1.656804733727811e-06, "loss": 0.0635, "step": 23560 }, { "epoch": 1.84, "learning_rate": 1.6490189971971348e-06, "loss": 0.0623, "step": 23570 }, { "epoch": 1.84, "learning_rate": 1.641233260666459e-06, "loss": 0.0545, "step": 23580 }, { "epoch": 1.84, "learning_rate": 1.6334475241357833e-06, "loss": 0.0678, "step": 23590 }, { "epoch": 1.84, "learning_rate": 1.6256617876051076e-06, "loss": 0.0484, "step": 23600 }, { "epoch": 1.84, "learning_rate": 1.6178760510744318e-06, "loss": 0.0545, "step": 23610 }, { "epoch": 1.84, "learning_rate": 1.610090314543756e-06, "loss": 0.0485, "step": 23620 }, { "epoch": 1.84, "learning_rate": 1.6023045780130803e-06, "loss": 0.0558, "step": 23630 }, { "epoch": 1.84, "learning_rate": 1.5945188414824045e-06, "loss": 0.0579, "step": 23640 }, { "epoch": 1.84, "learning_rate": 1.5867331049517283e-06, "loss": 0.0686, "step": 23650 }, { "epoch": 1.84, "learning_rate": 1.5789473684210526e-06, "loss": 0.045, "step": 23660 }, { "epoch": 1.84, "learning_rate": 1.5711616318903768e-06, "loss": 0.0663, "step": 23670 }, { "epoch": 1.84, "learning_rate": 1.563375895359701e-06, "loss": 0.056, "step": 23680 }, { "epoch": 1.84, "learning_rate": 1.5555901588290253e-06, "loss": 0.0638, "step": 23690 }, { "epoch": 1.85, "learning_rate": 1.5478044222983495e-06, "loss": 0.056, "step": 23700 }, { "epoch": 1.85, "learning_rate": 1.5400186857676738e-06, "loss": 0.0558, "step": 23710 }, { "epoch": 1.85, "learning_rate": 1.532232949236998e-06, "loss": 0.0473, "step": 23720 }, { "epoch": 1.85, "learning_rate": 1.524447212706322e-06, "loss": 0.0602, "step": 23730 }, { "epoch": 1.85, "learning_rate": 1.5166614761756463e-06, "loss": 0.0457, "step": 23740 }, { "epoch": 1.85, "learning_rate": 1.5088757396449705e-06, "loss": 0.0562, "step": 23750 }, { "epoch": 1.85, "learning_rate": 1.5010900031142948e-06, "loss": 0.0479, "step": 23760 }, { "epoch": 1.85, "learning_rate": 1.493304266583619e-06, "loss": 0.0469, "step": 23770 }, { "epoch": 1.85, "learning_rate": 1.4855185300529433e-06, "loss": 0.0498, "step": 23780 }, { "epoch": 1.85, "learning_rate": 1.4777327935222675e-06, "loss": 0.0484, "step": 23790 }, { "epoch": 1.85, "learning_rate": 1.4699470569915915e-06, "loss": 0.0557, "step": 23800 }, { "epoch": 1.85, "learning_rate": 1.4621613204609158e-06, "loss": 0.0488, "step": 23810 }, { "epoch": 1.85, "learning_rate": 1.4543755839302398e-06, "loss": 0.0634, "step": 23820 }, { "epoch": 1.86, "learning_rate": 1.446589847399564e-06, "loss": 0.0568, "step": 23830 }, { "epoch": 1.86, "learning_rate": 1.4388041108688883e-06, "loss": 0.0454, "step": 23840 }, { "epoch": 1.86, "learning_rate": 1.4310183743382125e-06, "loss": 0.0615, "step": 23850 }, { "epoch": 1.86, "learning_rate": 1.4232326378075368e-06, "loss": 0.0548, "step": 23860 }, { "epoch": 1.86, "learning_rate": 1.415446901276861e-06, "loss": 0.052, "step": 23870 }, { "epoch": 1.86, "learning_rate": 1.4076611647461852e-06, "loss": 0.0526, "step": 23880 }, { "epoch": 1.86, "learning_rate": 1.3998754282155095e-06, "loss": 0.0594, "step": 23890 }, { "epoch": 1.86, "learning_rate": 1.3920896916848333e-06, "loss": 0.0643, "step": 23900 }, { "epoch": 1.86, "learning_rate": 1.3843039551541575e-06, "loss": 0.0797, "step": 23910 }, { "epoch": 1.86, "learning_rate": 1.3765182186234818e-06, "loss": 0.0487, "step": 23920 }, { "epoch": 1.86, "learning_rate": 1.368732482092806e-06, "loss": 0.0526, "step": 23930 }, { "epoch": 1.86, "learning_rate": 1.3609467455621303e-06, "loss": 0.0528, "step": 23940 }, { "epoch": 1.86, "learning_rate": 1.3531610090314545e-06, "loss": 0.0491, "step": 23950 }, { "epoch": 1.87, "learning_rate": 1.3453752725007787e-06, "loss": 0.0495, "step": 23960 }, { "epoch": 1.87, "learning_rate": 1.337589535970103e-06, "loss": 0.0685, "step": 23970 }, { "epoch": 1.87, "learning_rate": 1.329803799439427e-06, "loss": 0.0624, "step": 23980 }, { "epoch": 1.87, "learning_rate": 1.3220180629087512e-06, "loss": 0.0419, "step": 23990 }, { "epoch": 1.87, "learning_rate": 1.3142323263780755e-06, "loss": 0.0822, "step": 24000 }, { "epoch": 1.87, "learning_rate": 1.3064465898473997e-06, "loss": 0.0649, "step": 24010 }, { "epoch": 1.87, "learning_rate": 1.298660853316724e-06, "loss": 0.0521, "step": 24020 }, { "epoch": 1.87, "learning_rate": 1.290875116786048e-06, "loss": 0.0432, "step": 24030 }, { "epoch": 1.87, "learning_rate": 1.2830893802553722e-06, "loss": 0.0511, "step": 24040 }, { "epoch": 1.87, "learning_rate": 1.2753036437246965e-06, "loss": 0.0467, "step": 24050 }, { "epoch": 1.87, "learning_rate": 1.2675179071940207e-06, "loss": 0.0554, "step": 24060 }, { "epoch": 1.87, "learning_rate": 1.2597321706633447e-06, "loss": 0.0483, "step": 24070 }, { "epoch": 1.87, "learning_rate": 1.251946434132669e-06, "loss": 0.0533, "step": 24080 }, { "epoch": 1.88, "learning_rate": 1.2441606976019932e-06, "loss": 0.0481, "step": 24090 }, { "epoch": 1.88, "learning_rate": 1.2363749610713175e-06, "loss": 0.054, "step": 24100 }, { "epoch": 1.88, "learning_rate": 1.2285892245406417e-06, "loss": 0.0536, "step": 24110 }, { "epoch": 1.88, "learning_rate": 1.220803488009966e-06, "loss": 0.0509, "step": 24120 }, { "epoch": 1.88, "learning_rate": 1.21301775147929e-06, "loss": 0.0462, "step": 24130 }, { "epoch": 1.88, "learning_rate": 1.2052320149486142e-06, "loss": 0.0478, "step": 24140 }, { "epoch": 1.88, "learning_rate": 1.1974462784179385e-06, "loss": 0.0488, "step": 24150 }, { "epoch": 1.88, "learning_rate": 1.1896605418872627e-06, "loss": 0.057, "step": 24160 }, { "epoch": 1.88, "learning_rate": 1.1818748053565867e-06, "loss": 0.0469, "step": 24170 }, { "epoch": 1.88, "learning_rate": 1.174089068825911e-06, "loss": 0.054, "step": 24180 }, { "epoch": 1.88, "learning_rate": 1.1663033322952352e-06, "loss": 0.0527, "step": 24190 }, { "epoch": 1.88, "learning_rate": 1.1585175957645595e-06, "loss": 0.057, "step": 24200 }, { "epoch": 1.88, "learning_rate": 1.1507318592338835e-06, "loss": 0.0508, "step": 24210 }, { "epoch": 1.89, "learning_rate": 1.1429461227032077e-06, "loss": 0.0526, "step": 24220 }, { "epoch": 1.89, "learning_rate": 1.135160386172532e-06, "loss": 0.0578, "step": 24230 }, { "epoch": 1.89, "learning_rate": 1.1273746496418562e-06, "loss": 0.0585, "step": 24240 }, { "epoch": 1.89, "learning_rate": 1.1195889131111804e-06, "loss": 0.0459, "step": 24250 }, { "epoch": 1.89, "learning_rate": 1.1118031765805045e-06, "loss": 0.05, "step": 24260 }, { "epoch": 1.89, "learning_rate": 1.1040174400498287e-06, "loss": 0.0441, "step": 24270 }, { "epoch": 1.89, "learning_rate": 1.096231703519153e-06, "loss": 0.0597, "step": 24280 }, { "epoch": 1.89, "learning_rate": 1.0884459669884772e-06, "loss": 0.0609, "step": 24290 }, { "epoch": 1.89, "learning_rate": 1.0806602304578014e-06, "loss": 0.0493, "step": 24300 }, { "epoch": 1.89, "learning_rate": 1.0728744939271257e-06, "loss": 0.0494, "step": 24310 }, { "epoch": 1.89, "learning_rate": 1.06508875739645e-06, "loss": 0.0537, "step": 24320 }, { "epoch": 1.89, "learning_rate": 1.0573030208657742e-06, "loss": 0.0433, "step": 24330 }, { "epoch": 1.9, "learning_rate": 1.0495172843350982e-06, "loss": 0.052, "step": 24340 }, { "epoch": 1.9, "learning_rate": 1.0417315478044224e-06, "loss": 0.0463, "step": 24350 }, { "epoch": 1.9, "learning_rate": 1.0339458112737467e-06, "loss": 0.0498, "step": 24360 }, { "epoch": 1.9, "learning_rate": 1.026160074743071e-06, "loss": 0.0515, "step": 24370 }, { "epoch": 1.9, "learning_rate": 1.018374338212395e-06, "loss": 0.0493, "step": 24380 }, { "epoch": 1.9, "learning_rate": 1.0105886016817192e-06, "loss": 0.0601, "step": 24390 }, { "epoch": 1.9, "learning_rate": 1.0028028651510434e-06, "loss": 0.0566, "step": 24400 }, { "epoch": 1.9, "learning_rate": 9.950171286203677e-07, "loss": 0.0496, "step": 24410 }, { "epoch": 1.9, "learning_rate": 9.872313920896917e-07, "loss": 0.0473, "step": 24420 }, { "epoch": 1.9, "learning_rate": 9.79445655559016e-07, "loss": 0.0538, "step": 24430 }, { "epoch": 1.9, "learning_rate": 9.716599190283402e-07, "loss": 0.046, "step": 24440 }, { "epoch": 1.9, "learning_rate": 9.638741824976644e-07, "loss": 0.05, "step": 24450 }, { "epoch": 1.9, "learning_rate": 9.560884459669884e-07, "loss": 0.0656, "step": 24460 }, { "epoch": 1.91, "learning_rate": 9.483027094363127e-07, "loss": 0.0433, "step": 24470 }, { "epoch": 1.91, "learning_rate": 9.405169729056369e-07, "loss": 0.0466, "step": 24480 }, { "epoch": 1.91, "learning_rate": 9.327312363749612e-07, "loss": 0.0493, "step": 24490 }, { "epoch": 1.91, "learning_rate": 9.249454998442853e-07, "loss": 0.0453, "step": 24500 }, { "epoch": 1.91, "learning_rate": 9.171597633136095e-07, "loss": 0.0536, "step": 24510 }, { "epoch": 1.91, "learning_rate": 9.093740267829338e-07, "loss": 0.0484, "step": 24520 }, { "epoch": 1.91, "learning_rate": 9.01588290252258e-07, "loss": 0.0514, "step": 24530 }, { "epoch": 1.91, "learning_rate": 8.938025537215823e-07, "loss": 0.0488, "step": 24540 }, { "epoch": 1.91, "learning_rate": 8.860168171909063e-07, "loss": 0.0458, "step": 24550 }, { "epoch": 1.91, "learning_rate": 8.782310806602305e-07, "loss": 0.0555, "step": 24560 }, { "epoch": 1.91, "learning_rate": 8.704453441295548e-07, "loss": 0.0469, "step": 24570 }, { "epoch": 1.91, "learning_rate": 8.62659607598879e-07, "loss": 0.0444, "step": 24580 }, { "epoch": 1.91, "learning_rate": 8.54873871068203e-07, "loss": 0.0475, "step": 24590 }, { "epoch": 1.92, "learning_rate": 8.470881345375273e-07, "loss": 0.0501, "step": 24600 }, { "epoch": 1.92, "learning_rate": 8.393023980068515e-07, "loss": 0.0474, "step": 24610 }, { "epoch": 1.92, "learning_rate": 8.315166614761758e-07, "loss": 0.0438, "step": 24620 }, { "epoch": 1.92, "learning_rate": 8.237309249454999e-07, "loss": 0.0552, "step": 24630 }, { "epoch": 1.92, "learning_rate": 8.159451884148241e-07, "loss": 0.046, "step": 24640 }, { "epoch": 1.92, "learning_rate": 8.081594518841484e-07, "loss": 0.05, "step": 24650 }, { "epoch": 1.92, "learning_rate": 8.003737153534725e-07, "loss": 0.0518, "step": 24660 }, { "epoch": 1.92, "learning_rate": 7.925879788227966e-07, "loss": 0.0492, "step": 24670 }, { "epoch": 1.92, "learning_rate": 7.848022422921209e-07, "loss": 0.0509, "step": 24680 }, { "epoch": 1.92, "learning_rate": 7.770165057614451e-07, "loss": 0.0511, "step": 24690 }, { "epoch": 1.92, "learning_rate": 7.692307692307694e-07, "loss": 0.0505, "step": 24700 }, { "epoch": 1.92, "learning_rate": 7.614450327000934e-07, "loss": 0.043, "step": 24710 }, { "epoch": 1.92, "learning_rate": 7.536592961694176e-07, "loss": 0.0521, "step": 24720 }, { "epoch": 1.93, "learning_rate": 7.458735596387419e-07, "loss": 0.0427, "step": 24730 }, { "epoch": 1.93, "learning_rate": 7.380878231080661e-07, "loss": 0.0721, "step": 24740 }, { "epoch": 1.93, "learning_rate": 7.303020865773902e-07, "loss": 0.0533, "step": 24750 }, { "epoch": 1.93, "learning_rate": 7.225163500467145e-07, "loss": 0.052, "step": 24760 }, { "epoch": 1.93, "learning_rate": 7.147306135160387e-07, "loss": 0.0488, "step": 24770 }, { "epoch": 1.93, "learning_rate": 7.069448769853629e-07, "loss": 0.0465, "step": 24780 }, { "epoch": 1.93, "learning_rate": 6.991591404546871e-07, "loss": 0.0504, "step": 24790 }, { "epoch": 1.93, "learning_rate": 6.913734039240112e-07, "loss": 0.0436, "step": 24800 }, { "epoch": 1.93, "learning_rate": 6.835876673933355e-07, "loss": 0.0696, "step": 24810 }, { "epoch": 1.93, "learning_rate": 6.758019308626597e-07, "loss": 0.0497, "step": 24820 }, { "epoch": 1.93, "learning_rate": 6.68016194331984e-07, "loss": 0.0512, "step": 24830 }, { "epoch": 1.93, "learning_rate": 6.60230457801308e-07, "loss": 0.0539, "step": 24840 }, { "epoch": 1.93, "learning_rate": 6.524447212706322e-07, "loss": 0.0436, "step": 24850 }, { "epoch": 1.94, "learning_rate": 6.446589847399565e-07, "loss": 0.0455, "step": 24860 }, { "epoch": 1.94, "learning_rate": 6.368732482092807e-07, "loss": 0.0483, "step": 24870 }, { "epoch": 1.94, "learning_rate": 6.290875116786048e-07, "loss": 0.0462, "step": 24880 }, { "epoch": 1.94, "learning_rate": 6.21301775147929e-07, "loss": 0.0469, "step": 24890 }, { "epoch": 1.94, "learning_rate": 6.135160386172532e-07, "loss": 0.0565, "step": 24900 }, { "epoch": 1.94, "learning_rate": 6.057303020865775e-07, "loss": 0.0503, "step": 24910 }, { "epoch": 1.94, "learning_rate": 5.979445655559017e-07, "loss": 0.0479, "step": 24920 }, { "epoch": 1.94, "learning_rate": 5.901588290252258e-07, "loss": 0.0553, "step": 24930 }, { "epoch": 1.94, "learning_rate": 5.823730924945501e-07, "loss": 0.05, "step": 24940 }, { "epoch": 1.94, "learning_rate": 5.745873559638742e-07, "loss": 0.0501, "step": 24950 }, { "epoch": 1.94, "learning_rate": 5.668016194331984e-07, "loss": 0.0461, "step": 24960 }, { "epoch": 1.94, "learning_rate": 5.590158829025226e-07, "loss": 0.0465, "step": 24970 }, { "epoch": 1.94, "learning_rate": 5.512301463718468e-07, "loss": 0.0527, "step": 24980 }, { "epoch": 1.95, "learning_rate": 5.43444409841171e-07, "loss": 0.049, "step": 24990 }, { "epoch": 1.95, "learning_rate": 5.356586733104952e-07, "loss": 0.0606, "step": 25000 }, { "epoch": 1.95, "learning_rate": 5.278729367798194e-07, "loss": 0.0497, "step": 25010 }, { "epoch": 1.95, "learning_rate": 5.200872002491436e-07, "loss": 0.046, "step": 25020 }, { "epoch": 1.95, "learning_rate": 5.123014637184678e-07, "loss": 0.0531, "step": 25030 }, { "epoch": 1.95, "learning_rate": 5.045157271877921e-07, "loss": 0.0456, "step": 25040 }, { "epoch": 1.95, "learning_rate": 4.967299906571162e-07, "loss": 0.0539, "step": 25050 }, { "epoch": 1.95, "learning_rate": 4.889442541264404e-07, "loss": 0.0565, "step": 25060 }, { "epoch": 1.95, "learning_rate": 4.811585175957647e-07, "loss": 0.0444, "step": 25070 }, { "epoch": 1.95, "learning_rate": 4.733727810650888e-07, "loss": 0.0467, "step": 25080 }, { "epoch": 1.95, "learning_rate": 4.65587044534413e-07, "loss": 0.0465, "step": 25090 }, { "epoch": 1.95, "learning_rate": 4.578013080037372e-07, "loss": 0.0434, "step": 25100 }, { "epoch": 1.95, "learning_rate": 4.500155714730614e-07, "loss": 0.046, "step": 25110 }, { "epoch": 1.96, "learning_rate": 4.4222983494238555e-07, "loss": 0.0437, "step": 25120 }, { "epoch": 1.96, "learning_rate": 4.344440984117098e-07, "loss": 0.0507, "step": 25130 }, { "epoch": 1.96, "learning_rate": 4.26658361881034e-07, "loss": 0.046, "step": 25140 }, { "epoch": 1.96, "learning_rate": 4.1887262535035817e-07, "loss": 0.0529, "step": 25150 }, { "epoch": 1.96, "learning_rate": 4.1108688881968236e-07, "loss": 0.0452, "step": 25160 }, { "epoch": 1.96, "learning_rate": 4.033011522890066e-07, "loss": 0.0502, "step": 25170 }, { "epoch": 1.96, "learning_rate": 3.9551541575833073e-07, "loss": 0.0535, "step": 25180 }, { "epoch": 1.96, "learning_rate": 3.8772967922765497e-07, "loss": 0.0493, "step": 25190 }, { "epoch": 1.96, "learning_rate": 3.7994394269697916e-07, "loss": 0.0482, "step": 25200 }, { "epoch": 1.96, "learning_rate": 3.7215820616630335e-07, "loss": 0.0464, "step": 25210 }, { "epoch": 1.96, "learning_rate": 3.6437246963562754e-07, "loss": 0.0455, "step": 25220 }, { "epoch": 1.96, "learning_rate": 3.565867331049518e-07, "loss": 0.0492, "step": 25230 }, { "epoch": 1.97, "learning_rate": 3.488009965742759e-07, "loss": 0.0479, "step": 25240 }, { "epoch": 1.97, "learning_rate": 3.4101526004360015e-07, "loss": 0.0455, "step": 25250 }, { "epoch": 1.97, "learning_rate": 3.332295235129244e-07, "loss": 0.0465, "step": 25260 }, { "epoch": 1.97, "learning_rate": 3.254437869822485e-07, "loss": 0.0505, "step": 25270 }, { "epoch": 1.97, "learning_rate": 3.1765805045157277e-07, "loss": 0.0576, "step": 25280 }, { "epoch": 1.97, "learning_rate": 3.0987231392089695e-07, "loss": 0.0439, "step": 25290 }, { "epoch": 1.97, "learning_rate": 3.0208657739022114e-07, "loss": 0.0429, "step": 25300 }, { "epoch": 1.97, "learning_rate": 2.9430084085954533e-07, "loss": 0.0464, "step": 25310 }, { "epoch": 1.97, "learning_rate": 2.865151043288695e-07, "loss": 0.0443, "step": 25320 }, { "epoch": 1.97, "learning_rate": 2.787293677981937e-07, "loss": 0.0475, "step": 25330 }, { "epoch": 1.97, "learning_rate": 2.709436312675179e-07, "loss": 0.0553, "step": 25340 }, { "epoch": 1.97, "learning_rate": 2.6315789473684213e-07, "loss": 0.0485, "step": 25350 }, { "epoch": 1.97, "learning_rate": 2.553721582061663e-07, "loss": 0.0471, "step": 25360 }, { "epoch": 1.98, "learning_rate": 2.4758642167549056e-07, "loss": 0.0511, "step": 25370 }, { "epoch": 1.98, "learning_rate": 2.3980068514481475e-07, "loss": 0.0455, "step": 25380 }, { "epoch": 1.98, "learning_rate": 2.3201494861413894e-07, "loss": 0.0517, "step": 25390 }, { "epoch": 1.98, "learning_rate": 2.2422921208346312e-07, "loss": 0.0553, "step": 25400 }, { "epoch": 1.98, "learning_rate": 2.164434755527873e-07, "loss": 0.0453, "step": 25410 }, { "epoch": 1.98, "learning_rate": 2.0865773902211152e-07, "loss": 0.0576, "step": 25420 }, { "epoch": 1.98, "learning_rate": 2.008720024914357e-07, "loss": 0.0471, "step": 25430 }, { "epoch": 1.98, "learning_rate": 1.930862659607599e-07, "loss": 0.0471, "step": 25440 }, { "epoch": 1.98, "learning_rate": 1.8530052943008411e-07, "loss": 0.05, "step": 25450 }, { "epoch": 1.98, "learning_rate": 1.775147928994083e-07, "loss": 0.0434, "step": 25460 }, { "epoch": 1.98, "learning_rate": 1.697290563687325e-07, "loss": 0.0439, "step": 25470 }, { "epoch": 1.98, "learning_rate": 1.619433198380567e-07, "loss": 0.0468, "step": 25480 }, { "epoch": 1.98, "learning_rate": 1.541575833073809e-07, "loss": 0.0457, "step": 25490 }, { "epoch": 1.99, "learning_rate": 1.4637184677670508e-07, "loss": 0.0457, "step": 25500 }, { "epoch": 1.99, "learning_rate": 1.385861102460293e-07, "loss": 0.0485, "step": 25510 }, { "epoch": 1.99, "learning_rate": 1.3080037371535348e-07, "loss": 0.0418, "step": 25520 }, { "epoch": 1.99, "learning_rate": 1.2301463718467767e-07, "loss": 0.0558, "step": 25530 }, { "epoch": 1.99, "learning_rate": 1.1522890065400187e-07, "loss": 0.0544, "step": 25540 }, { "epoch": 1.99, "learning_rate": 1.0744316412332607e-07, "loss": 0.048, "step": 25550 }, { "epoch": 1.99, "learning_rate": 9.965742759265028e-08, "loss": 0.0439, "step": 25560 }, { "epoch": 1.99, "learning_rate": 9.187169106197447e-08, "loss": 0.0493, "step": 25570 }, { "epoch": 1.99, "learning_rate": 8.408595453129867e-08, "loss": 0.0447, "step": 25580 }, { "epoch": 1.99, "learning_rate": 7.630021800062287e-08, "loss": 0.041, "step": 25590 }, { "epoch": 1.99, "learning_rate": 6.851448146994706e-08, "loss": 0.047, "step": 25600 }, { "epoch": 1.99, "learning_rate": 6.072874493927126e-08, "loss": 0.0484, "step": 25610 }, { "epoch": 1.99, "learning_rate": 5.2943008408595454e-08, "loss": 0.0442, "step": 25620 }, { "epoch": 2.0, "learning_rate": 4.5157271877919655e-08, "loss": 0.0501, "step": 25630 }, { "epoch": 2.0, "learning_rate": 3.7371535347243856e-08, "loss": 0.0449, "step": 25640 }, { "epoch": 2.0, "learning_rate": 2.958579881656805e-08, "loss": 0.0474, "step": 25650 }, { "epoch": 2.0, "learning_rate": 2.1800062285892244e-08, "loss": 0.0452, "step": 25660 }, { "epoch": 2.0, "learning_rate": 1.4014325755216445e-08, "loss": 0.0536, "step": 25670 }, { "epoch": 2.0, "learning_rate": 6.228589224540643e-09, "loss": 0.0451, "step": 25680 } ], "max_steps": 25688, "num_train_epochs": 2, "total_flos": 1.9075085981712384e+17, "trial_name": null, "trial_params": null }