diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15424 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "global_step": 25688, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9992214263469324e-05, + "loss": 3.6476, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9984428526938652e-05, + "loss": 0.3029, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.9976642790407974e-05, + "loss": 0.4554, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.9968857053877296e-05, + "loss": 0.3546, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.9961071317346625e-05, + "loss": 0.4369, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.9953285580815947e-05, + "loss": 0.4187, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.9945499844285272e-05, + "loss": 0.3758, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.9937714107754594e-05, + "loss": 0.442, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.992992837122392e-05, + "loss": 0.3408, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.9922142634693244e-05, + "loss": 0.2933, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.9914356898162566e-05, + "loss": 0.5627, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.990657116163189e-05, + "loss": 0.406, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.9898785425101217e-05, + "loss": 0.3029, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.989099968857054e-05, + "loss": 0.3583, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.9883213952039864e-05, + "loss": 0.4022, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.987542821550919e-05, + "loss": 0.3508, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.986764247897851e-05, + "loss": 0.3816, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 1.9859856742447837e-05, + "loss": 0.422, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.9852071005917162e-05, + "loss": 0.4494, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 1.9844285269386484e-05, + "loss": 0.3588, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 1.983649953285581e-05, + "loss": 0.4859, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 1.9828713796325135e-05, + "loss": 0.4338, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 1.9820928059794456e-05, + "loss": 0.4002, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 1.9813142323263782e-05, + "loss": 0.4142, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 1.9805356586733107e-05, + "loss": 0.369, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 1.979757085020243e-05, + "loss": 0.2912, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 1.9789785113671754e-05, + "loss": 0.4347, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 1.978199937714108e-05, + "loss": 0.3762, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 1.97742136406104e-05, + "loss": 0.2991, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 1.9766427904079727e-05, + "loss": 0.355, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 1.9758642167549052e-05, + "loss": 0.3871, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 1.9750856431018377e-05, + "loss": 0.3469, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 1.97430706944877e-05, + "loss": 0.3732, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 1.9735284957957025e-05, + "loss": 0.3906, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 1.972749922142635e-05, + "loss": 0.3773, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 1.9719713484895672e-05, + "loss": 0.4073, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 1.9711927748364997e-05, + "loss": 0.3961, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 1.9704142011834322e-05, + "loss": 0.4076, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 1.9696356275303644e-05, + "loss": 0.4342, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 1.968857053877297e-05, + "loss": 0.4247, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 1.9680784802242295e-05, + "loss": 0.4857, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 1.9672999065711617e-05, + "loss": 0.3507, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 1.9665213329180942e-05, + "loss": 0.447, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 1.9657427592650267e-05, + "loss": 0.4799, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 1.964964185611959e-05, + "loss": 0.4324, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 1.9641856119588915e-05, + "loss": 0.3724, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 1.963407038305824e-05, + "loss": 0.3797, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 1.9626284646527562e-05, + "loss": 0.342, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 1.9618498909996887e-05, + "loss": 0.3696, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 1.9610713173466212e-05, + "loss": 0.3531, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9602927436935534e-05, + "loss": 0.3707, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 1.959514170040486e-05, + "loss": 0.3054, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 1.9587355963874185e-05, + "loss": 0.333, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 1.9579570227343507e-05, + "loss": 0.4072, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 1.9571784490812832e-05, + "loss": 0.3363, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 1.9563998754282157e-05, + "loss": 0.3723, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 1.9556213017751483e-05, + "loss": 0.4651, + "step": 570 + }, + { + "epoch": 0.05, + "learning_rate": 1.9548427281220805e-05, + "loss": 0.3766, + "step": 580 + }, + { + "epoch": 0.05, + "learning_rate": 1.954064154469013e-05, + "loss": 0.3247, + "step": 590 + }, + { + "epoch": 0.05, + "learning_rate": 1.9532855808159455e-05, + "loss": 0.4209, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 1.9525070071628777e-05, + "loss": 0.4038, + "step": 610 + }, + { + "epoch": 0.05, + "learning_rate": 1.9517284335098102e-05, + "loss": 0.305, + "step": 620 + }, + { + "epoch": 0.05, + "learning_rate": 1.9509498598567428e-05, + "loss": 0.4737, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 1.950171286203675e-05, + "loss": 0.3816, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 1.9493927125506075e-05, + "loss": 0.3406, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 1.94861413889754e-05, + "loss": 0.4612, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 1.9478355652444722e-05, + "loss": 0.396, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 1.9470569915914047e-05, + "loss": 0.2885, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 1.9462784179383373e-05, + "loss": 0.372, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 1.9454998442852695e-05, + "loss": 0.3646, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 1.944721270632202e-05, + "loss": 0.3517, + "step": 710 + }, + { + "epoch": 0.06, + "learning_rate": 1.9439426969791345e-05, + "loss": 0.3244, + "step": 720 + }, + { + "epoch": 0.06, + "learning_rate": 1.9431641233260667e-05, + "loss": 0.3107, + "step": 730 + }, + { + "epoch": 0.06, + "learning_rate": 1.9423855496729992e-05, + "loss": 0.4312, + "step": 740 + }, + { + "epoch": 0.06, + "learning_rate": 1.9416069760199318e-05, + "loss": 0.3501, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 1.940828402366864e-05, + "loss": 0.3394, + "step": 760 + }, + { + "epoch": 0.06, + "learning_rate": 1.9400498287137965e-05, + "loss": 0.2721, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 1.939271255060729e-05, + "loss": 0.3824, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 1.9384926814076612e-05, + "loss": 0.3357, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 1.9377141077545937e-05, + "loss": 0.5051, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 1.9369355341015263e-05, + "loss": 0.3467, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 1.9361569604484588e-05, + "loss": 0.3079, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 1.935378386795391e-05, + "loss": 0.3362, + "step": 830 + }, + { + "epoch": 0.07, + "learning_rate": 1.9345998131423235e-05, + "loss": 0.3535, + "step": 840 + }, + { + "epoch": 0.07, + "learning_rate": 1.933821239489256e-05, + "loss": 0.3338, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 1.9330426658361883e-05, + "loss": 0.3705, + "step": 860 + }, + { + "epoch": 0.07, + "learning_rate": 1.9322640921831204e-05, + "loss": 0.3507, + "step": 870 + }, + { + "epoch": 0.07, + "learning_rate": 1.9314855185300533e-05, + "loss": 0.2796, + "step": 880 + }, + { + "epoch": 0.07, + "learning_rate": 1.9307069448769855e-05, + "loss": 0.3828, + "step": 890 + }, + { + "epoch": 0.07, + "learning_rate": 1.9299283712239177e-05, + "loss": 0.3091, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 1.9291497975708506e-05, + "loss": 0.294, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 1.9283712239177828e-05, + "loss": 0.3353, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 1.927592650264715e-05, + "loss": 0.4199, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 1.9268140766116478e-05, + "loss": 0.4175, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 1.92603550295858e-05, + "loss": 0.396, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 1.9252569293055125e-05, + "loss": 0.3726, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 1.924478355652445e-05, + "loss": 0.4031, + "step": 970 + }, + { + "epoch": 0.08, + "learning_rate": 1.9236997819993773e-05, + "loss": 0.3575, + "step": 980 + }, + { + "epoch": 0.08, + "learning_rate": 1.9229212083463098e-05, + "loss": 0.3118, + "step": 990 + }, + { + "epoch": 0.08, + "learning_rate": 1.922142634693242e-05, + "loss": 0.272, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 1.9213640610401745e-05, + "loss": 0.3698, + "step": 1010 + }, + { + "epoch": 0.08, + "learning_rate": 1.920585487387107e-05, + "loss": 0.3232, + "step": 1020 + }, + { + "epoch": 0.08, + "learning_rate": 1.9198069137340392e-05, + "loss": 0.4195, + "step": 1030 + }, + { + "epoch": 0.08, + "learning_rate": 1.9190283400809718e-05, + "loss": 0.377, + "step": 1040 + }, + { + "epoch": 0.08, + "learning_rate": 1.9182497664279043e-05, + "loss": 0.371, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 1.9174711927748365e-05, + "loss": 0.3674, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 1.9166926191217693e-05, + "loss": 0.3503, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 1.9159140454687015e-05, + "loss": 0.3805, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 1.9151354718156337e-05, + "loss": 0.3474, + "step": 1090 + }, + { + "epoch": 0.09, + "learning_rate": 1.9143568981625663e-05, + "loss": 0.3868, + "step": 1100 + }, + { + "epoch": 0.09, + "learning_rate": 1.9135783245094988e-05, + "loss": 0.3652, + "step": 1110 + }, + { + "epoch": 0.09, + "learning_rate": 1.912799750856431e-05, + "loss": 0.3521, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 1.9120211772033635e-05, + "loss": 0.3004, + "step": 1130 + }, + { + "epoch": 0.09, + "learning_rate": 1.911242603550296e-05, + "loss": 0.3605, + "step": 1140 + }, + { + "epoch": 0.09, + "learning_rate": 1.9104640298972282e-05, + "loss": 0.2845, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 1.9096854562441608e-05, + "loss": 0.3038, + "step": 1160 + }, + { + "epoch": 0.09, + "learning_rate": 1.9089068825910933e-05, + "loss": 0.3712, + "step": 1170 + }, + { + "epoch": 0.09, + "learning_rate": 1.9081283089380255e-05, + "loss": 0.303, + "step": 1180 + }, + { + "epoch": 0.09, + "learning_rate": 1.907349735284958e-05, + "loss": 0.2412, + "step": 1190 + }, + { + "epoch": 0.09, + "learning_rate": 1.9065711616318905e-05, + "loss": 0.3342, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 1.905792587978823e-05, + "loss": 0.2995, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 1.9050140143257553e-05, + "loss": 0.4164, + "step": 1220 + }, + { + "epoch": 0.1, + "learning_rate": 1.9042354406726878e-05, + "loss": 0.3316, + "step": 1230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9034568670196203e-05, + "loss": 0.3575, + "step": 1240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9026782933665525e-05, + "loss": 0.3399, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 1.901899719713485e-05, + "loss": 0.3008, + "step": 1260 + }, + { + "epoch": 0.1, + "learning_rate": 1.9011211460604176e-05, + "loss": 0.3817, + "step": 1270 + }, + { + "epoch": 0.1, + "learning_rate": 1.9003425724073498e-05, + "loss": 0.2931, + "step": 1280 + }, + { + "epoch": 0.1, + "learning_rate": 1.8995639987542823e-05, + "loss": 0.3154, + "step": 1290 + }, + { + "epoch": 0.1, + "learning_rate": 1.8987854251012148e-05, + "loss": 0.3168, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 1.898006851448147e-05, + "loss": 0.2592, + "step": 1310 + }, + { + "epoch": 0.1, + "learning_rate": 1.8972282777950795e-05, + "loss": 0.3804, + "step": 1320 + }, + { + "epoch": 0.1, + "learning_rate": 1.896449704142012e-05, + "loss": 0.3438, + "step": 1330 + }, + { + "epoch": 0.1, + "learning_rate": 1.8956711304889443e-05, + "loss": 0.3112, + "step": 1340 + }, + { + "epoch": 0.11, + "learning_rate": 1.8948925568358768e-05, + "loss": 0.3406, + "step": 1350 + }, + { + "epoch": 0.11, + "learning_rate": 1.8941139831828093e-05, + "loss": 0.4156, + "step": 1360 + }, + { + "epoch": 0.11, + "learning_rate": 1.8933354095297415e-05, + "loss": 0.3699, + "step": 1370 + }, + { + "epoch": 0.11, + "learning_rate": 1.892556835876674e-05, + "loss": 0.3685, + "step": 1380 + }, + { + "epoch": 0.11, + "learning_rate": 1.8917782622236066e-05, + "loss": 0.3602, + "step": 1390 + }, + { + "epoch": 0.11, + "learning_rate": 1.8909996885705388e-05, + "loss": 0.3602, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 1.8902211149174713e-05, + "loss": 0.399, + "step": 1410 + }, + { + "epoch": 0.11, + "learning_rate": 1.8894425412644038e-05, + "loss": 0.3202, + "step": 1420 + }, + { + "epoch": 0.11, + "learning_rate": 1.888663967611336e-05, + "loss": 0.4622, + "step": 1430 + }, + { + "epoch": 0.11, + "learning_rate": 1.8878853939582685e-05, + "loss": 0.3126, + "step": 1440 + }, + { + "epoch": 0.11, + "learning_rate": 1.887106820305201e-05, + "loss": 0.3729, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 1.8863282466521336e-05, + "loss": 0.345, + "step": 1460 + }, + { + "epoch": 0.11, + "learning_rate": 1.8855496729990658e-05, + "loss": 0.3146, + "step": 1470 + }, + { + "epoch": 0.12, + "learning_rate": 1.8847710993459983e-05, + "loss": 0.3615, + "step": 1480 + }, + { + "epoch": 0.12, + "learning_rate": 1.883992525692931e-05, + "loss": 0.3023, + "step": 1490 + }, + { + "epoch": 0.12, + "learning_rate": 1.883213952039863e-05, + "loss": 0.3833, + "step": 1500 + }, + { + "epoch": 0.12, + "learning_rate": 1.8824353783867956e-05, + "loss": 0.3573, + "step": 1510 + }, + { + "epoch": 0.12, + "learning_rate": 1.881656804733728e-05, + "loss": 0.2883, + "step": 1520 + }, + { + "epoch": 0.12, + "learning_rate": 1.8808782310806603e-05, + "loss": 0.4392, + "step": 1530 + }, + { + "epoch": 0.12, + "learning_rate": 1.880099657427593e-05, + "loss": 0.3942, + "step": 1540 + }, + { + "epoch": 0.12, + "learning_rate": 1.8793210837745254e-05, + "loss": 0.3247, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 1.8785425101214576e-05, + "loss": 0.378, + "step": 1560 + }, + { + "epoch": 0.12, + "learning_rate": 1.87776393646839e-05, + "loss": 0.3076, + "step": 1570 + }, + { + "epoch": 0.12, + "learning_rate": 1.8769853628153226e-05, + "loss": 0.3093, + "step": 1580 + }, + { + "epoch": 0.12, + "learning_rate": 1.8762067891622548e-05, + "loss": 0.2823, + "step": 1590 + }, + { + "epoch": 0.12, + "learning_rate": 1.8754282155091873e-05, + "loss": 0.2736, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 1.87464964185612e-05, + "loss": 0.2913, + "step": 1610 + }, + { + "epoch": 0.13, + "learning_rate": 1.873871068203052e-05, + "loss": 0.3343, + "step": 1620 + }, + { + "epoch": 0.13, + "learning_rate": 1.8730924945499846e-05, + "loss": 0.3567, + "step": 1630 + }, + { + "epoch": 0.13, + "learning_rate": 1.872313920896917e-05, + "loss": 0.3122, + "step": 1640 + }, + { + "epoch": 0.13, + "learning_rate": 1.8715353472438493e-05, + "loss": 0.2749, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 1.870756773590782e-05, + "loss": 0.3274, + "step": 1660 + }, + { + "epoch": 0.13, + "learning_rate": 1.8699781999377144e-05, + "loss": 0.4423, + "step": 1670 + }, + { + "epoch": 0.13, + "learning_rate": 1.8691996262846466e-05, + "loss": 0.2746, + "step": 1680 + }, + { + "epoch": 0.13, + "learning_rate": 1.868421052631579e-05, + "loss": 0.3538, + "step": 1690 + }, + { + "epoch": 0.13, + "learning_rate": 1.8676424789785116e-05, + "loss": 0.3878, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 1.866863905325444e-05, + "loss": 0.3795, + "step": 1710 + }, + { + "epoch": 0.13, + "learning_rate": 1.8660853316723763e-05, + "loss": 0.3302, + "step": 1720 + }, + { + "epoch": 0.13, + "learning_rate": 1.865306758019309e-05, + "loss": 0.3231, + "step": 1730 + }, + { + "epoch": 0.14, + "learning_rate": 1.8645281843662414e-05, + "loss": 0.3356, + "step": 1740 + }, + { + "epoch": 0.14, + "learning_rate": 1.8637496107131736e-05, + "loss": 0.3151, + "step": 1750 + }, + { + "epoch": 0.14, + "learning_rate": 1.862971037060106e-05, + "loss": 0.3759, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 1.8621924634070386e-05, + "loss": 0.3534, + "step": 1770 + }, + { + "epoch": 0.14, + "learning_rate": 1.861413889753971e-05, + "loss": 0.303, + "step": 1780 + }, + { + "epoch": 0.14, + "learning_rate": 1.860635316100903e-05, + "loss": 0.2945, + "step": 1790 + }, + { + "epoch": 0.14, + "learning_rate": 1.859856742447836e-05, + "loss": 0.349, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 1.859078168794768e-05, + "loss": 0.3398, + "step": 1810 + }, + { + "epoch": 0.14, + "learning_rate": 1.8582995951417006e-05, + "loss": 0.2744, + "step": 1820 + }, + { + "epoch": 0.14, + "learning_rate": 1.857521021488633e-05, + "loss": 0.3276, + "step": 1830 + }, + { + "epoch": 0.14, + "learning_rate": 1.8567424478355653e-05, + "loss": 0.2523, + "step": 1840 + }, + { + "epoch": 0.14, + "learning_rate": 1.855963874182498e-05, + "loss": 0.3286, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 1.8551853005294304e-05, + "loss": 0.3119, + "step": 1860 + }, + { + "epoch": 0.15, + "learning_rate": 1.8544067268763626e-05, + "loss": 0.3293, + "step": 1870 + }, + { + "epoch": 0.15, + "learning_rate": 1.853628153223295e-05, + "loss": 0.3766, + "step": 1880 + }, + { + "epoch": 0.15, + "learning_rate": 1.8528495795702277e-05, + "loss": 0.2765, + "step": 1890 + }, + { + "epoch": 0.15, + "learning_rate": 1.85207100591716e-05, + "loss": 0.4001, + "step": 1900 + }, + { + "epoch": 0.15, + "learning_rate": 1.8512924322640924e-05, + "loss": 0.3359, + "step": 1910 + }, + { + "epoch": 0.15, + "learning_rate": 1.8505138586110246e-05, + "loss": 0.3543, + "step": 1920 + }, + { + "epoch": 0.15, + "learning_rate": 1.849735284957957e-05, + "loss": 0.3326, + "step": 1930 + }, + { + "epoch": 0.15, + "learning_rate": 1.8489567113048896e-05, + "loss": 0.2847, + "step": 1940 + }, + { + "epoch": 0.15, + "learning_rate": 1.8481781376518218e-05, + "loss": 0.3548, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 1.8473995639987547e-05, + "loss": 0.3486, + "step": 1960 + }, + { + "epoch": 0.15, + "learning_rate": 1.846620990345687e-05, + "loss": 0.3246, + "step": 1970 + }, + { + "epoch": 0.15, + "learning_rate": 1.845842416692619e-05, + "loss": 0.4146, + "step": 1980 + }, + { + "epoch": 0.15, + "learning_rate": 1.845063843039552e-05, + "loss": 0.3075, + "step": 1990 + }, + { + "epoch": 0.16, + "learning_rate": 1.844285269386484e-05, + "loss": 0.3088, + "step": 2000 + }, + { + "epoch": 0.16, + "learning_rate": 1.8435066957334163e-05, + "loss": 0.3123, + "step": 2010 + }, + { + "epoch": 0.16, + "learning_rate": 1.842728122080349e-05, + "loss": 0.3547, + "step": 2020 + }, + { + "epoch": 0.16, + "learning_rate": 1.8419495484272814e-05, + "loss": 0.2637, + "step": 2030 + }, + { + "epoch": 0.16, + "learning_rate": 1.8411709747742136e-05, + "loss": 0.3146, + "step": 2040 + }, + { + "epoch": 0.16, + "learning_rate": 1.840392401121146e-05, + "loss": 0.2766, + "step": 2050 + }, + { + "epoch": 0.16, + "learning_rate": 1.8396138274680786e-05, + "loss": 0.3732, + "step": 2060 + }, + { + "epoch": 0.16, + "learning_rate": 1.838835253815011e-05, + "loss": 0.263, + "step": 2070 + }, + { + "epoch": 0.16, + "learning_rate": 1.8380566801619433e-05, + "loss": 0.3383, + "step": 2080 + }, + { + "epoch": 0.16, + "learning_rate": 1.837278106508876e-05, + "loss": 0.3495, + "step": 2090 + }, + { + "epoch": 0.16, + "learning_rate": 1.8364995328558084e-05, + "loss": 0.3986, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 1.8357209592027406e-05, + "loss": 0.3264, + "step": 2110 + }, + { + "epoch": 0.17, + "learning_rate": 1.834942385549673e-05, + "loss": 0.3782, + "step": 2120 + }, + { + "epoch": 0.17, + "learning_rate": 1.8341638118966057e-05, + "loss": 0.3523, + "step": 2130 + }, + { + "epoch": 0.17, + "learning_rate": 1.833385238243538e-05, + "loss": 0.2724, + "step": 2140 + }, + { + "epoch": 0.17, + "learning_rate": 1.8326066645904704e-05, + "loss": 0.3245, + "step": 2150 + }, + { + "epoch": 0.17, + "learning_rate": 1.831828090937403e-05, + "loss": 0.259, + "step": 2160 + }, + { + "epoch": 0.17, + "learning_rate": 1.831049517284335e-05, + "loss": 0.4271, + "step": 2170 + }, + { + "epoch": 0.17, + "learning_rate": 1.8302709436312676e-05, + "loss": 0.2523, + "step": 2180 + }, + { + "epoch": 0.17, + "learning_rate": 1.8294923699782e-05, + "loss": 0.267, + "step": 2190 + }, + { + "epoch": 0.17, + "learning_rate": 1.8287137963251324e-05, + "loss": 0.3008, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 1.827935222672065e-05, + "loss": 0.3555, + "step": 2210 + }, + { + "epoch": 0.17, + "learning_rate": 1.8271566490189974e-05, + "loss": 0.3355, + "step": 2220 + }, + { + "epoch": 0.17, + "learning_rate": 1.8263780753659296e-05, + "loss": 0.2799, + "step": 2230 + }, + { + "epoch": 0.17, + "learning_rate": 1.825599501712862e-05, + "loss": 0.3444, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 1.8248209280597947e-05, + "loss": 0.2557, + "step": 2250 + }, + { + "epoch": 0.18, + "learning_rate": 1.824042354406727e-05, + "loss": 0.316, + "step": 2260 + }, + { + "epoch": 0.18, + "learning_rate": 1.8232637807536594e-05, + "loss": 0.2644, + "step": 2270 + }, + { + "epoch": 0.18, + "learning_rate": 1.822485207100592e-05, + "loss": 0.3134, + "step": 2280 + }, + { + "epoch": 0.18, + "learning_rate": 1.821706633447524e-05, + "loss": 0.243, + "step": 2290 + }, + { + "epoch": 0.18, + "learning_rate": 1.8209280597944566e-05, + "loss": 0.4135, + "step": 2300 + }, + { + "epoch": 0.18, + "learning_rate": 1.820149486141389e-05, + "loss": 0.2781, + "step": 2310 + }, + { + "epoch": 0.18, + "learning_rate": 1.8193709124883217e-05, + "loss": 0.2512, + "step": 2320 + }, + { + "epoch": 0.18, + "learning_rate": 1.818592338835254e-05, + "loss": 0.2923, + "step": 2330 + }, + { + "epoch": 0.18, + "learning_rate": 1.8178137651821864e-05, + "loss": 0.3151, + "step": 2340 + }, + { + "epoch": 0.18, + "learning_rate": 1.817035191529119e-05, + "loss": 0.2766, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 1.816256617876051e-05, + "loss": 0.3372, + "step": 2360 + }, + { + "epoch": 0.18, + "learning_rate": 1.8154780442229837e-05, + "loss": 0.3924, + "step": 2370 + }, + { + "epoch": 0.19, + "learning_rate": 1.8146994705699162e-05, + "loss": 0.2954, + "step": 2380 + }, + { + "epoch": 0.19, + "learning_rate": 1.8139208969168484e-05, + "loss": 0.2981, + "step": 2390 + }, + { + "epoch": 0.19, + "learning_rate": 1.813142323263781e-05, + "loss": 0.3011, + "step": 2400 + }, + { + "epoch": 0.19, + "learning_rate": 1.8123637496107134e-05, + "loss": 0.2724, + "step": 2410 + }, + { + "epoch": 0.19, + "learning_rate": 1.8115851759576456e-05, + "loss": 0.3791, + "step": 2420 + }, + { + "epoch": 0.19, + "learning_rate": 1.8108066023045782e-05, + "loss": 0.3037, + "step": 2430 + }, + { + "epoch": 0.19, + "learning_rate": 1.8100280286515107e-05, + "loss": 0.2986, + "step": 2440 + }, + { + "epoch": 0.19, + "learning_rate": 1.809249454998443e-05, + "loss": 0.3418, + "step": 2450 + }, + { + "epoch": 0.19, + "learning_rate": 1.8084708813453754e-05, + "loss": 0.3293, + "step": 2460 + }, + { + "epoch": 0.19, + "learning_rate": 1.807692307692308e-05, + "loss": 0.3038, + "step": 2470 + }, + { + "epoch": 0.19, + "learning_rate": 1.80691373403924e-05, + "loss": 0.3099, + "step": 2480 + }, + { + "epoch": 0.19, + "learning_rate": 1.8061351603861727e-05, + "loss": 0.3628, + "step": 2490 + }, + { + "epoch": 0.19, + "learning_rate": 1.8053565867331052e-05, + "loss": 0.3277, + "step": 2500 + }, + { + "epoch": 0.2, + "learning_rate": 1.8045780130800374e-05, + "loss": 0.2927, + "step": 2510 + }, + { + "epoch": 0.2, + "learning_rate": 1.80379943942697e-05, + "loss": 0.2666, + "step": 2520 + }, + { + "epoch": 0.2, + "learning_rate": 1.8030208657739025e-05, + "loss": 0.4419, + "step": 2530 + }, + { + "epoch": 0.2, + "learning_rate": 1.8022422921208346e-05, + "loss": 0.3165, + "step": 2540 + }, + { + "epoch": 0.2, + "learning_rate": 1.8014637184677672e-05, + "loss": 0.3474, + "step": 2550 + }, + { + "epoch": 0.2, + "learning_rate": 1.8006851448146997e-05, + "loss": 0.3313, + "step": 2560 + }, + { + "epoch": 0.2, + "learning_rate": 1.7999065711616322e-05, + "loss": 0.4063, + "step": 2570 + }, + { + "epoch": 0.2, + "learning_rate": 1.7991279975085644e-05, + "loss": 0.3396, + "step": 2580 + }, + { + "epoch": 0.2, + "learning_rate": 1.798349423855497e-05, + "loss": 0.2792, + "step": 2590 + }, + { + "epoch": 0.2, + "learning_rate": 1.7975708502024295e-05, + "loss": 0.3595, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 1.7967922765493617e-05, + "loss": 0.2986, + "step": 2610 + }, + { + "epoch": 0.2, + "learning_rate": 1.7960137028962942e-05, + "loss": 0.3442, + "step": 2620 + }, + { + "epoch": 0.2, + "learning_rate": 1.7952351292432267e-05, + "loss": 0.3373, + "step": 2630 + }, + { + "epoch": 0.21, + "learning_rate": 1.794456555590159e-05, + "loss": 0.273, + "step": 2640 + }, + { + "epoch": 0.21, + "learning_rate": 1.7936779819370915e-05, + "loss": 0.3987, + "step": 2650 + }, + { + "epoch": 0.21, + "learning_rate": 1.792899408284024e-05, + "loss": 0.3173, + "step": 2660 + }, + { + "epoch": 0.21, + "learning_rate": 1.7921208346309562e-05, + "loss": 0.3384, + "step": 2670 + }, + { + "epoch": 0.21, + "learning_rate": 1.7913422609778887e-05, + "loss": 0.376, + "step": 2680 + }, + { + "epoch": 0.21, + "learning_rate": 1.7905636873248212e-05, + "loss": 0.2247, + "step": 2690 + }, + { + "epoch": 0.21, + "learning_rate": 1.7897851136717534e-05, + "loss": 0.326, + "step": 2700 + }, + { + "epoch": 0.21, + "learning_rate": 1.789006540018686e-05, + "loss": 0.2287, + "step": 2710 + }, + { + "epoch": 0.21, + "learning_rate": 1.7882279663656185e-05, + "loss": 0.3976, + "step": 2720 + }, + { + "epoch": 0.21, + "learning_rate": 1.7874493927125507e-05, + "loss": 0.2625, + "step": 2730 + }, + { + "epoch": 0.21, + "learning_rate": 1.7866708190594832e-05, + "loss": 0.3386, + "step": 2740 + }, + { + "epoch": 0.21, + "learning_rate": 1.7858922454064157e-05, + "loss": 0.3251, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 1.785113671753348e-05, + "loss": 0.3312, + "step": 2760 + }, + { + "epoch": 0.22, + "learning_rate": 1.7843350981002805e-05, + "loss": 0.3089, + "step": 2770 + }, + { + "epoch": 0.22, + "learning_rate": 1.783556524447213e-05, + "loss": 0.3043, + "step": 2780 + }, + { + "epoch": 0.22, + "learning_rate": 1.7827779507941452e-05, + "loss": 0.3096, + "step": 2790 + }, + { + "epoch": 0.22, + "learning_rate": 1.7819993771410777e-05, + "loss": 0.282, + "step": 2800 + }, + { + "epoch": 0.22, + "learning_rate": 1.7812208034880102e-05, + "loss": 0.3608, + "step": 2810 + }, + { + "epoch": 0.22, + "learning_rate": 1.7804422298349428e-05, + "loss": 0.3096, + "step": 2820 + }, + { + "epoch": 0.22, + "learning_rate": 1.779663656181875e-05, + "loss": 0.3158, + "step": 2830 + }, + { + "epoch": 0.22, + "learning_rate": 1.778885082528807e-05, + "loss": 0.3508, + "step": 2840 + }, + { + "epoch": 0.22, + "learning_rate": 1.77810650887574e-05, + "loss": 0.2838, + "step": 2850 + }, + { + "epoch": 0.22, + "learning_rate": 1.7773279352226722e-05, + "loss": 0.2463, + "step": 2860 + }, + { + "epoch": 0.22, + "learning_rate": 1.7765493615696044e-05, + "loss": 0.4101, + "step": 2870 + }, + { + "epoch": 0.22, + "learning_rate": 1.7757707879165373e-05, + "loss": 0.3024, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 1.7749922142634695e-05, + "loss": 0.3887, + "step": 2890 + }, + { + "epoch": 0.23, + "learning_rate": 1.7742136406104017e-05, + "loss": 0.3184, + "step": 2900 + }, + { + "epoch": 0.23, + "learning_rate": 1.7734350669573345e-05, + "loss": 0.3213, + "step": 2910 + }, + { + "epoch": 0.23, + "learning_rate": 1.7726564933042667e-05, + "loss": 0.3363, + "step": 2920 + }, + { + "epoch": 0.23, + "learning_rate": 1.771877919651199e-05, + "loss": 0.3635, + "step": 2930 + }, + { + "epoch": 0.23, + "learning_rate": 1.7710993459981314e-05, + "loss": 0.2902, + "step": 2940 + }, + { + "epoch": 0.23, + "learning_rate": 1.770320772345064e-05, + "loss": 0.3068, + "step": 2950 + }, + { + "epoch": 0.23, + "learning_rate": 1.7695421986919965e-05, + "loss": 0.2164, + "step": 2960 + }, + { + "epoch": 0.23, + "learning_rate": 1.7687636250389287e-05, + "loss": 0.3048, + "step": 2970 + }, + { + "epoch": 0.23, + "learning_rate": 1.7679850513858612e-05, + "loss": 0.3578, + "step": 2980 + }, + { + "epoch": 0.23, + "learning_rate": 1.7672064777327937e-05, + "loss": 0.3142, + "step": 2990 + }, + { + "epoch": 0.23, + "learning_rate": 1.766427904079726e-05, + "loss": 0.3153, + "step": 3000 + }, + { + "epoch": 0.23, + "learning_rate": 1.7656493304266585e-05, + "loss": 0.254, + "step": 3010 + }, + { + "epoch": 0.24, + "learning_rate": 1.764870756773591e-05, + "loss": 0.3416, + "step": 3020 + }, + { + "epoch": 0.24, + "learning_rate": 1.7640921831205232e-05, + "loss": 0.3015, + "step": 3030 + }, + { + "epoch": 0.24, + "learning_rate": 1.7633136094674557e-05, + "loss": 0.306, + "step": 3040 + }, + { + "epoch": 0.24, + "learning_rate": 1.7625350358143882e-05, + "loss": 0.3475, + "step": 3050 + }, + { + "epoch": 0.24, + "learning_rate": 1.7617564621613204e-05, + "loss": 0.2974, + "step": 3060 + }, + { + "epoch": 0.24, + "learning_rate": 1.760977888508253e-05, + "loss": 0.3741, + "step": 3070 + }, + { + "epoch": 0.24, + "learning_rate": 1.7601993148551855e-05, + "loss": 0.3203, + "step": 3080 + }, + { + "epoch": 0.24, + "learning_rate": 1.7594207412021177e-05, + "loss": 0.3682, + "step": 3090 + }, + { + "epoch": 0.24, + "learning_rate": 1.7586421675490502e-05, + "loss": 0.3191, + "step": 3100 + }, + { + "epoch": 0.24, + "learning_rate": 1.7578635938959828e-05, + "loss": 0.2515, + "step": 3110 + }, + { + "epoch": 0.24, + "learning_rate": 1.757085020242915e-05, + "loss": 0.394, + "step": 3120 + }, + { + "epoch": 0.24, + "learning_rate": 1.7563064465898475e-05, + "loss": 0.317, + "step": 3130 + }, + { + "epoch": 0.24, + "learning_rate": 1.75552787293678e-05, + "loss": 0.2698, + "step": 3140 + }, + { + "epoch": 0.25, + "learning_rate": 1.7547492992837122e-05, + "loss": 0.3299, + "step": 3150 + }, + { + "epoch": 0.25, + "learning_rate": 1.7539707256306447e-05, + "loss": 0.3183, + "step": 3160 + }, + { + "epoch": 0.25, + "learning_rate": 1.7531921519775773e-05, + "loss": 0.3598, + "step": 3170 + }, + { + "epoch": 0.25, + "learning_rate": 1.7524135783245094e-05, + "loss": 0.3637, + "step": 3180 + }, + { + "epoch": 0.25, + "learning_rate": 1.751635004671442e-05, + "loss": 0.2586, + "step": 3190 + }, + { + "epoch": 0.25, + "learning_rate": 1.7508564310183745e-05, + "loss": 0.3151, + "step": 3200 + }, + { + "epoch": 0.25, + "learning_rate": 1.750077857365307e-05, + "loss": 0.2587, + "step": 3210 + }, + { + "epoch": 0.25, + "learning_rate": 1.7492992837122392e-05, + "loss": 0.342, + "step": 3220 + }, + { + "epoch": 0.25, + "learning_rate": 1.7485207100591718e-05, + "loss": 0.305, + "step": 3230 + }, + { + "epoch": 0.25, + "learning_rate": 1.7477421364061043e-05, + "loss": 0.3429, + "step": 3240 + }, + { + "epoch": 0.25, + "learning_rate": 1.7469635627530365e-05, + "loss": 0.2381, + "step": 3250 + }, + { + "epoch": 0.25, + "learning_rate": 1.746184989099969e-05, + "loss": 0.2777, + "step": 3260 + }, + { + "epoch": 0.25, + "learning_rate": 1.7454064154469015e-05, + "loss": 0.3232, + "step": 3270 + }, + { + "epoch": 0.26, + "learning_rate": 1.7446278417938337e-05, + "loss": 0.2442, + "step": 3280 + }, + { + "epoch": 0.26, + "learning_rate": 1.7438492681407663e-05, + "loss": 0.2881, + "step": 3290 + }, + { + "epoch": 0.26, + "learning_rate": 1.7430706944876988e-05, + "loss": 0.299, + "step": 3300 + }, + { + "epoch": 0.26, + "learning_rate": 1.742292120834631e-05, + "loss": 0.2971, + "step": 3310 + }, + { + "epoch": 0.26, + "learning_rate": 1.7415135471815635e-05, + "loss": 0.2872, + "step": 3320 + }, + { + "epoch": 0.26, + "learning_rate": 1.740734973528496e-05, + "loss": 0.2707, + "step": 3330 + }, + { + "epoch": 0.26, + "learning_rate": 1.7399563998754282e-05, + "loss": 0.3922, + "step": 3340 + }, + { + "epoch": 0.26, + "learning_rate": 1.7391778262223608e-05, + "loss": 0.371, + "step": 3350 + }, + { + "epoch": 0.26, + "learning_rate": 1.7383992525692933e-05, + "loss": 0.2891, + "step": 3360 + }, + { + "epoch": 0.26, + "learning_rate": 1.7376206789162255e-05, + "loss": 0.3711, + "step": 3370 + }, + { + "epoch": 0.26, + "learning_rate": 1.736842105263158e-05, + "loss": 0.2751, + "step": 3380 + }, + { + "epoch": 0.26, + "learning_rate": 1.7360635316100905e-05, + "loss": 0.3483, + "step": 3390 + }, + { + "epoch": 0.26, + "learning_rate": 1.7352849579570227e-05, + "loss": 0.3248, + "step": 3400 + }, + { + "epoch": 0.27, + "learning_rate": 1.7345063843039553e-05, + "loss": 0.2927, + "step": 3410 + }, + { + "epoch": 0.27, + "learning_rate": 1.7337278106508878e-05, + "loss": 0.3026, + "step": 3420 + }, + { + "epoch": 0.27, + "learning_rate": 1.73294923699782e-05, + "loss": 0.2577, + "step": 3430 + }, + { + "epoch": 0.27, + "learning_rate": 1.7321706633447525e-05, + "loss": 0.2536, + "step": 3440 + }, + { + "epoch": 0.27, + "learning_rate": 1.731392089691685e-05, + "loss": 0.3338, + "step": 3450 + }, + { + "epoch": 0.27, + "learning_rate": 1.7306135160386176e-05, + "loss": 0.3278, + "step": 3460 + }, + { + "epoch": 0.27, + "learning_rate": 1.7298349423855498e-05, + "loss": 0.3179, + "step": 3470 + }, + { + "epoch": 0.27, + "learning_rate": 1.7290563687324823e-05, + "loss": 0.3449, + "step": 3480 + }, + { + "epoch": 0.27, + "learning_rate": 1.7282777950794148e-05, + "loss": 0.3133, + "step": 3490 + }, + { + "epoch": 0.27, + "learning_rate": 1.727499221426347e-05, + "loss": 0.3097, + "step": 3500 + }, + { + "epoch": 0.27, + "learning_rate": 1.7267206477732795e-05, + "loss": 0.3252, + "step": 3510 + }, + { + "epoch": 0.27, + "learning_rate": 1.725942074120212e-05, + "loss": 0.3041, + "step": 3520 + }, + { + "epoch": 0.27, + "learning_rate": 1.7251635004671443e-05, + "loss": 0.2786, + "step": 3530 + }, + { + "epoch": 0.28, + "learning_rate": 1.7243849268140768e-05, + "loss": 0.2478, + "step": 3540 + }, + { + "epoch": 0.28, + "learning_rate": 1.7236063531610093e-05, + "loss": 0.2706, + "step": 3550 + }, + { + "epoch": 0.28, + "learning_rate": 1.7228277795079415e-05, + "loss": 0.3272, + "step": 3560 + }, + { + "epoch": 0.28, + "learning_rate": 1.722049205854874e-05, + "loss": 0.3018, + "step": 3570 + }, + { + "epoch": 0.28, + "learning_rate": 1.7212706322018066e-05, + "loss": 0.2802, + "step": 3580 + }, + { + "epoch": 0.28, + "learning_rate": 1.7204920585487388e-05, + "loss": 0.274, + "step": 3590 + }, + { + "epoch": 0.28, + "learning_rate": 1.7197134848956713e-05, + "loss": 0.3024, + "step": 3600 + }, + { + "epoch": 0.28, + "learning_rate": 1.7189349112426038e-05, + "loss": 0.3475, + "step": 3610 + }, + { + "epoch": 0.28, + "learning_rate": 1.718156337589536e-05, + "loss": 0.2996, + "step": 3620 + }, + { + "epoch": 0.28, + "learning_rate": 1.7173777639364685e-05, + "loss": 0.2765, + "step": 3630 + }, + { + "epoch": 0.28, + "learning_rate": 1.716599190283401e-05, + "loss": 0.2422, + "step": 3640 + }, + { + "epoch": 0.28, + "learning_rate": 1.7158206166303333e-05, + "loss": 0.3942, + "step": 3650 + }, + { + "epoch": 0.28, + "learning_rate": 1.7150420429772658e-05, + "loss": 0.2361, + "step": 3660 + }, + { + "epoch": 0.29, + "learning_rate": 1.7142634693241983e-05, + "loss": 0.3354, + "step": 3670 + }, + { + "epoch": 0.29, + "learning_rate": 1.7134848956711305e-05, + "loss": 0.2544, + "step": 3680 + }, + { + "epoch": 0.29, + "learning_rate": 1.712706322018063e-05, + "loss": 0.3437, + "step": 3690 + }, + { + "epoch": 0.29, + "learning_rate": 1.7119277483649956e-05, + "loss": 0.2315, + "step": 3700 + }, + { + "epoch": 0.29, + "learning_rate": 1.711149174711928e-05, + "loss": 0.296, + "step": 3710 + }, + { + "epoch": 0.29, + "learning_rate": 1.7103706010588603e-05, + "loss": 0.2921, + "step": 3720 + }, + { + "epoch": 0.29, + "learning_rate": 1.709592027405793e-05, + "loss": 0.33, + "step": 3730 + }, + { + "epoch": 0.29, + "learning_rate": 1.7088134537527254e-05, + "loss": 0.2921, + "step": 3740 + }, + { + "epoch": 0.29, + "learning_rate": 1.7080348800996576e-05, + "loss": 0.3615, + "step": 3750 + }, + { + "epoch": 0.29, + "learning_rate": 1.7072563064465897e-05, + "loss": 0.3131, + "step": 3760 + }, + { + "epoch": 0.29, + "learning_rate": 1.7064777327935226e-05, + "loss": 0.372, + "step": 3770 + }, + { + "epoch": 0.29, + "learning_rate": 1.7056991591404548e-05, + "loss": 0.2867, + "step": 3780 + }, + { + "epoch": 0.3, + "learning_rate": 1.704920585487387e-05, + "loss": 0.3098, + "step": 3790 + }, + { + "epoch": 0.3, + "learning_rate": 1.70414201183432e-05, + "loss": 0.2592, + "step": 3800 + }, + { + "epoch": 0.3, + "learning_rate": 1.703363438181252e-05, + "loss": 0.3017, + "step": 3810 + }, + { + "epoch": 0.3, + "learning_rate": 1.7025848645281846e-05, + "loss": 0.3378, + "step": 3820 + }, + { + "epoch": 0.3, + "learning_rate": 1.701806290875117e-05, + "loss": 0.2605, + "step": 3830 + }, + { + "epoch": 0.3, + "learning_rate": 1.7010277172220493e-05, + "loss": 0.2793, + "step": 3840 + }, + { + "epoch": 0.3, + "learning_rate": 1.700249143568982e-05, + "loss": 0.2787, + "step": 3850 + }, + { + "epoch": 0.3, + "learning_rate": 1.699470569915914e-05, + "loss": 0.3085, + "step": 3860 + }, + { + "epoch": 0.3, + "learning_rate": 1.6986919962628466e-05, + "loss": 0.2703, + "step": 3870 + }, + { + "epoch": 0.3, + "learning_rate": 1.697913422609779e-05, + "loss": 0.3528, + "step": 3880 + }, + { + "epoch": 0.3, + "learning_rate": 1.6971348489567113e-05, + "loss": 0.2685, + "step": 3890 + }, + { + "epoch": 0.3, + "learning_rate": 1.6963562753036438e-05, + "loss": 0.3184, + "step": 3900 + }, + { + "epoch": 0.3, + "learning_rate": 1.6955777016505763e-05, + "loss": 0.2761, + "step": 3910 + }, + { + "epoch": 0.31, + "learning_rate": 1.6947991279975085e-05, + "loss": 0.2261, + "step": 3920 + }, + { + "epoch": 0.31, + "learning_rate": 1.694020554344441e-05, + "loss": 0.3712, + "step": 3930 + }, + { + "epoch": 0.31, + "learning_rate": 1.6932419806913736e-05, + "loss": 0.2567, + "step": 3940 + }, + { + "epoch": 0.31, + "learning_rate": 1.6924634070383058e-05, + "loss": 0.2936, + "step": 3950 + }, + { + "epoch": 0.31, + "learning_rate": 1.6916848333852386e-05, + "loss": 0.2667, + "step": 3960 + }, + { + "epoch": 0.31, + "learning_rate": 1.690906259732171e-05, + "loss": 0.35, + "step": 3970 + }, + { + "epoch": 0.31, + "learning_rate": 1.690127686079103e-05, + "loss": 0.2335, + "step": 3980 + }, + { + "epoch": 0.31, + "learning_rate": 1.6893491124260356e-05, + "loss": 0.2586, + "step": 3990 + }, + { + "epoch": 0.31, + "learning_rate": 1.688570538772968e-05, + "loss": 0.3377, + "step": 4000 + }, + { + "epoch": 0.31, + "learning_rate": 1.6877919651199003e-05, + "loss": 0.2969, + "step": 4010 + }, + { + "epoch": 0.31, + "learning_rate": 1.6870133914668328e-05, + "loss": 0.2893, + "step": 4020 + }, + { + "epoch": 0.31, + "learning_rate": 1.6862348178137653e-05, + "loss": 0.3195, + "step": 4030 + }, + { + "epoch": 0.31, + "learning_rate": 1.6854562441606975e-05, + "loss": 0.3279, + "step": 4040 + }, + { + "epoch": 0.32, + "learning_rate": 1.68467767050763e-05, + "loss": 0.2271, + "step": 4050 + }, + { + "epoch": 0.32, + "learning_rate": 1.6838990968545626e-05, + "loss": 0.3524, + "step": 4060 + }, + { + "epoch": 0.32, + "learning_rate": 1.683120523201495e-05, + "loss": 0.2838, + "step": 4070 + }, + { + "epoch": 0.32, + "learning_rate": 1.6823419495484273e-05, + "loss": 0.364, + "step": 4080 + }, + { + "epoch": 0.32, + "learning_rate": 1.68156337589536e-05, + "loss": 0.2356, + "step": 4090 + }, + { + "epoch": 0.32, + "learning_rate": 1.6807848022422924e-05, + "loss": 0.33, + "step": 4100 + }, + { + "epoch": 0.32, + "learning_rate": 1.6800062285892246e-05, + "loss": 0.3097, + "step": 4110 + }, + { + "epoch": 0.32, + "learning_rate": 1.679227654936157e-05, + "loss": 0.3582, + "step": 4120 + }, + { + "epoch": 0.32, + "learning_rate": 1.6784490812830896e-05, + "loss": 0.2895, + "step": 4130 + }, + { + "epoch": 0.32, + "learning_rate": 1.6776705076300218e-05, + "loss": 0.2226, + "step": 4140 + }, + { + "epoch": 0.32, + "learning_rate": 1.6768919339769543e-05, + "loss": 0.255, + "step": 4150 + }, + { + "epoch": 0.32, + "learning_rate": 1.676113360323887e-05, + "loss": 0.2919, + "step": 4160 + }, + { + "epoch": 0.32, + "learning_rate": 1.675334786670819e-05, + "loss": 0.3014, + "step": 4170 + }, + { + "epoch": 0.33, + "learning_rate": 1.6745562130177516e-05, + "loss": 0.3285, + "step": 4180 + }, + { + "epoch": 0.33, + "learning_rate": 1.673777639364684e-05, + "loss": 0.3023, + "step": 4190 + }, + { + "epoch": 0.33, + "learning_rate": 1.6729990657116163e-05, + "loss": 0.2802, + "step": 4200 + }, + { + "epoch": 0.33, + "learning_rate": 1.672220492058549e-05, + "loss": 0.2365, + "step": 4210 + }, + { + "epoch": 0.33, + "learning_rate": 1.6714419184054814e-05, + "loss": 0.2338, + "step": 4220 + }, + { + "epoch": 0.33, + "learning_rate": 1.6706633447524136e-05, + "loss": 0.3102, + "step": 4230 + }, + { + "epoch": 0.33, + "learning_rate": 1.669884771099346e-05, + "loss": 0.3002, + "step": 4240 + }, + { + "epoch": 0.33, + "learning_rate": 1.6691061974462786e-05, + "loss": 0.2361, + "step": 4250 + }, + { + "epoch": 0.33, + "learning_rate": 1.6683276237932108e-05, + "loss": 0.2159, + "step": 4260 + }, + { + "epoch": 0.33, + "learning_rate": 1.6675490501401433e-05, + "loss": 0.2764, + "step": 4270 + }, + { + "epoch": 0.33, + "learning_rate": 1.666770476487076e-05, + "loss": 0.2654, + "step": 4280 + }, + { + "epoch": 0.33, + "learning_rate": 1.665991902834008e-05, + "loss": 0.2997, + "step": 4290 + }, + { + "epoch": 0.33, + "learning_rate": 1.6652133291809406e-05, + "loss": 0.257, + "step": 4300 + }, + { + "epoch": 0.34, + "learning_rate": 1.664434755527873e-05, + "loss": 0.3513, + "step": 4310 + }, + { + "epoch": 0.34, + "learning_rate": 1.6636561818748057e-05, + "loss": 0.2621, + "step": 4320 + }, + { + "epoch": 0.34, + "learning_rate": 1.662877608221738e-05, + "loss": 0.2865, + "step": 4330 + }, + { + "epoch": 0.34, + "learning_rate": 1.6620990345686704e-05, + "loss": 0.3538, + "step": 4340 + }, + { + "epoch": 0.34, + "learning_rate": 1.661320460915603e-05, + "loss": 0.2507, + "step": 4350 + }, + { + "epoch": 0.34, + "learning_rate": 1.660541887262535e-05, + "loss": 0.2413, + "step": 4360 + }, + { + "epoch": 0.34, + "learning_rate": 1.6597633136094676e-05, + "loss": 0.2306, + "step": 4370 + }, + { + "epoch": 0.34, + "learning_rate": 1.6589847399564e-05, + "loss": 0.3176, + "step": 4380 + }, + { + "epoch": 0.34, + "learning_rate": 1.6582061663033324e-05, + "loss": 0.2894, + "step": 4390 + }, + { + "epoch": 0.34, + "learning_rate": 1.657427592650265e-05, + "loss": 0.2636, + "step": 4400 + }, + { + "epoch": 0.34, + "learning_rate": 1.6566490189971974e-05, + "loss": 0.4195, + "step": 4410 + }, + { + "epoch": 0.34, + "learning_rate": 1.6558704453441296e-05, + "loss": 0.3316, + "step": 4420 + }, + { + "epoch": 0.34, + "learning_rate": 1.655091871691062e-05, + "loss": 0.2798, + "step": 4430 + }, + { + "epoch": 0.35, + "learning_rate": 1.6543132980379947e-05, + "loss": 0.3231, + "step": 4440 + }, + { + "epoch": 0.35, + "learning_rate": 1.653534724384927e-05, + "loss": 0.2856, + "step": 4450 + }, + { + "epoch": 0.35, + "learning_rate": 1.6527561507318594e-05, + "loss": 0.444, + "step": 4460 + }, + { + "epoch": 0.35, + "learning_rate": 1.651977577078792e-05, + "loss": 0.2894, + "step": 4470 + }, + { + "epoch": 0.35, + "learning_rate": 1.651199003425724e-05, + "loss": 0.2602, + "step": 4480 + }, + { + "epoch": 0.35, + "learning_rate": 1.6504204297726566e-05, + "loss": 0.2079, + "step": 4490 + }, + { + "epoch": 0.35, + "learning_rate": 1.649641856119589e-05, + "loss": 0.3006, + "step": 4500 + }, + { + "epoch": 0.35, + "learning_rate": 1.6488632824665214e-05, + "loss": 0.2701, + "step": 4510 + }, + { + "epoch": 0.35, + "learning_rate": 1.648084708813454e-05, + "loss": 0.2697, + "step": 4520 + }, + { + "epoch": 0.35, + "learning_rate": 1.6473061351603864e-05, + "loss": 0.3324, + "step": 4530 + }, + { + "epoch": 0.35, + "learning_rate": 1.6465275615073186e-05, + "loss": 0.3033, + "step": 4540 + }, + { + "epoch": 0.35, + "learning_rate": 1.645748987854251e-05, + "loss": 0.3216, + "step": 4550 + }, + { + "epoch": 0.36, + "learning_rate": 1.6449704142011837e-05, + "loss": 0.2848, + "step": 4560 + }, + { + "epoch": 0.36, + "learning_rate": 1.6441918405481162e-05, + "loss": 0.2466, + "step": 4570 + }, + { + "epoch": 0.36, + "learning_rate": 1.6434132668950484e-05, + "loss": 0.3451, + "step": 4580 + }, + { + "epoch": 0.36, + "learning_rate": 1.642634693241981e-05, + "loss": 0.3165, + "step": 4590 + }, + { + "epoch": 0.36, + "learning_rate": 1.6418561195889134e-05, + "loss": 0.3146, + "step": 4600 + }, + { + "epoch": 0.36, + "learning_rate": 1.6410775459358456e-05, + "loss": 0.1862, + "step": 4610 + }, + { + "epoch": 0.36, + "learning_rate": 1.640298972282778e-05, + "loss": 0.255, + "step": 4620 + }, + { + "epoch": 0.36, + "learning_rate": 1.6395203986297107e-05, + "loss": 0.3015, + "step": 4630 + }, + { + "epoch": 0.36, + "learning_rate": 1.638741824976643e-05, + "loss": 0.2665, + "step": 4640 + }, + { + "epoch": 0.36, + "learning_rate": 1.6379632513235754e-05, + "loss": 0.234, + "step": 4650 + }, + { + "epoch": 0.36, + "learning_rate": 1.637184677670508e-05, + "loss": 0.2748, + "step": 4660 + }, + { + "epoch": 0.36, + "learning_rate": 1.63640610401744e-05, + "loss": 0.2846, + "step": 4670 + }, + { + "epoch": 0.36, + "learning_rate": 1.6356275303643723e-05, + "loss": 0.2349, + "step": 4680 + }, + { + "epoch": 0.37, + "learning_rate": 1.6348489567113052e-05, + "loss": 0.2729, + "step": 4690 + }, + { + "epoch": 0.37, + "learning_rate": 1.6340703830582374e-05, + "loss": 0.1976, + "step": 4700 + }, + { + "epoch": 0.37, + "learning_rate": 1.63329180940517e-05, + "loss": 0.3199, + "step": 4710 + }, + { + "epoch": 0.37, + "learning_rate": 1.6325132357521025e-05, + "loss": 0.4013, + "step": 4720 + }, + { + "epoch": 0.37, + "learning_rate": 1.6317346620990346e-05, + "loss": 0.2417, + "step": 4730 + }, + { + "epoch": 0.37, + "learning_rate": 1.6309560884459672e-05, + "loss": 0.2593, + "step": 4740 + }, + { + "epoch": 0.37, + "learning_rate": 1.6301775147928997e-05, + "loss": 0.2663, + "step": 4750 + }, + { + "epoch": 0.37, + "learning_rate": 1.629398941139832e-05, + "loss": 0.2613, + "step": 4760 + }, + { + "epoch": 0.37, + "learning_rate": 1.6286203674867644e-05, + "loss": 0.3266, + "step": 4770 + }, + { + "epoch": 0.37, + "learning_rate": 1.6278417938336966e-05, + "loss": 0.3107, + "step": 4780 + }, + { + "epoch": 0.37, + "learning_rate": 1.627063220180629e-05, + "loss": 0.278, + "step": 4790 + }, + { + "epoch": 0.37, + "learning_rate": 1.6262846465275617e-05, + "loss": 0.3524, + "step": 4800 + }, + { + "epoch": 0.37, + "learning_rate": 1.625506072874494e-05, + "loss": 0.2858, + "step": 4810 + }, + { + "epoch": 0.38, + "learning_rate": 1.6247274992214267e-05, + "loss": 0.292, + "step": 4820 + }, + { + "epoch": 0.38, + "learning_rate": 1.623948925568359e-05, + "loss": 0.2983, + "step": 4830 + }, + { + "epoch": 0.38, + "learning_rate": 1.623170351915291e-05, + "loss": 0.2949, + "step": 4840 + }, + { + "epoch": 0.38, + "learning_rate": 1.622391778262224e-05, + "loss": 0.3657, + "step": 4850 + }, + { + "epoch": 0.38, + "learning_rate": 1.6216132046091562e-05, + "loss": 0.3057, + "step": 4860 + }, + { + "epoch": 0.38, + "learning_rate": 1.6208346309560884e-05, + "loss": 0.2418, + "step": 4870 + }, + { + "epoch": 0.38, + "learning_rate": 1.6200560573030212e-05, + "loss": 0.3026, + "step": 4880 + }, + { + "epoch": 0.38, + "learning_rate": 1.6192774836499534e-05, + "loss": 0.2972, + "step": 4890 + }, + { + "epoch": 0.38, + "learning_rate": 1.6184989099968856e-05, + "loss": 0.2747, + "step": 4900 + }, + { + "epoch": 0.38, + "learning_rate": 1.617720336343818e-05, + "loss": 0.2987, + "step": 4910 + }, + { + "epoch": 0.38, + "learning_rate": 1.6169417626907507e-05, + "loss": 0.3031, + "step": 4920 + }, + { + "epoch": 0.38, + "learning_rate": 1.616163189037683e-05, + "loss": 0.3612, + "step": 4930 + }, + { + "epoch": 0.38, + "learning_rate": 1.6153846153846154e-05, + "loss": 0.2909, + "step": 4940 + }, + { + "epoch": 0.39, + "learning_rate": 1.614606041731548e-05, + "loss": 0.2469, + "step": 4950 + }, + { + "epoch": 0.39, + "learning_rate": 1.6138274680784805e-05, + "loss": 0.2322, + "step": 4960 + }, + { + "epoch": 0.39, + "learning_rate": 1.6130488944254127e-05, + "loss": 0.3777, + "step": 4970 + }, + { + "epoch": 0.39, + "learning_rate": 1.6122703207723452e-05, + "loss": 0.2769, + "step": 4980 + }, + { + "epoch": 0.39, + "learning_rate": 1.6114917471192777e-05, + "loss": 0.3218, + "step": 4990 + }, + { + "epoch": 0.39, + "learning_rate": 1.61071317346621e-05, + "loss": 0.3272, + "step": 5000 + }, + { + "epoch": 0.39, + "learning_rate": 1.6099345998131424e-05, + "loss": 0.3187, + "step": 5010 + }, + { + "epoch": 0.39, + "learning_rate": 1.609156026160075e-05, + "loss": 0.228, + "step": 5020 + }, + { + "epoch": 0.39, + "learning_rate": 1.608377452507007e-05, + "loss": 0.2892, + "step": 5030 + }, + { + "epoch": 0.39, + "learning_rate": 1.6075988788539397e-05, + "loss": 0.3135, + "step": 5040 + }, + { + "epoch": 0.39, + "learning_rate": 1.6068203052008722e-05, + "loss": 0.2711, + "step": 5050 + }, + { + "epoch": 0.39, + "learning_rate": 1.6060417315478044e-05, + "loss": 0.2548, + "step": 5060 + }, + { + "epoch": 0.39, + "learning_rate": 1.605263157894737e-05, + "loss": 0.256, + "step": 5070 + }, + { + "epoch": 0.4, + "learning_rate": 1.6044845842416695e-05, + "loss": 0.3005, + "step": 5080 + }, + { + "epoch": 0.4, + "learning_rate": 1.6037060105886017e-05, + "loss": 0.3019, + "step": 5090 + }, + { + "epoch": 0.4, + "learning_rate": 1.6029274369355342e-05, + "loss": 0.2936, + "step": 5100 + }, + { + "epoch": 0.4, + "learning_rate": 1.6021488632824667e-05, + "loss": 0.3083, + "step": 5110 + }, + { + "epoch": 0.4, + "learning_rate": 1.601370289629399e-05, + "loss": 0.2696, + "step": 5120 + }, + { + "epoch": 0.4, + "learning_rate": 1.6005917159763314e-05, + "loss": 0.3029, + "step": 5130 + }, + { + "epoch": 0.4, + "learning_rate": 1.599813142323264e-05, + "loss": 0.2439, + "step": 5140 + }, + { + "epoch": 0.4, + "learning_rate": 1.599034568670196e-05, + "loss": 0.2714, + "step": 5150 + }, + { + "epoch": 0.4, + "learning_rate": 1.5982559950171287e-05, + "loss": 0.3455, + "step": 5160 + }, + { + "epoch": 0.4, + "learning_rate": 1.5974774213640612e-05, + "loss": 0.273, + "step": 5170 + }, + { + "epoch": 0.4, + "learning_rate": 1.5966988477109934e-05, + "loss": 0.216, + "step": 5180 + }, + { + "epoch": 0.4, + "learning_rate": 1.595920274057926e-05, + "loss": 0.3535, + "step": 5190 + }, + { + "epoch": 0.4, + "learning_rate": 1.5951417004048585e-05, + "loss": 0.2712, + "step": 5200 + }, + { + "epoch": 0.41, + "learning_rate": 1.594363126751791e-05, + "loss": 0.4113, + "step": 5210 + }, + { + "epoch": 0.41, + "learning_rate": 1.5935845530987232e-05, + "loss": 0.2541, + "step": 5220 + }, + { + "epoch": 0.41, + "learning_rate": 1.5928059794456557e-05, + "loss": 0.2662, + "step": 5230 + }, + { + "epoch": 0.41, + "learning_rate": 1.5920274057925882e-05, + "loss": 0.2713, + "step": 5240 + }, + { + "epoch": 0.41, + "learning_rate": 1.5912488321395204e-05, + "loss": 0.252, + "step": 5250 + }, + { + "epoch": 0.41, + "learning_rate": 1.590470258486453e-05, + "loss": 0.3018, + "step": 5260 + }, + { + "epoch": 0.41, + "learning_rate": 1.5896916848333855e-05, + "loss": 0.2971, + "step": 5270 + }, + { + "epoch": 0.41, + "learning_rate": 1.5889131111803177e-05, + "loss": 0.2898, + "step": 5280 + }, + { + "epoch": 0.41, + "learning_rate": 1.5881345375272502e-05, + "loss": 0.3558, + "step": 5290 + }, + { + "epoch": 0.41, + "learning_rate": 1.5873559638741827e-05, + "loss": 0.3142, + "step": 5300 + }, + { + "epoch": 0.41, + "learning_rate": 1.586577390221115e-05, + "loss": 0.308, + "step": 5310 + }, + { + "epoch": 0.41, + "learning_rate": 1.5857988165680475e-05, + "loss": 0.2551, + "step": 5320 + }, + { + "epoch": 0.41, + "learning_rate": 1.58502024291498e-05, + "loss": 0.3259, + "step": 5330 + }, + { + "epoch": 0.42, + "learning_rate": 1.5842416692619122e-05, + "loss": 0.3212, + "step": 5340 + }, + { + "epoch": 0.42, + "learning_rate": 1.5834630956088447e-05, + "loss": 0.3083, + "step": 5350 + }, + { + "epoch": 0.42, + "learning_rate": 1.5826845219557773e-05, + "loss": 0.2442, + "step": 5360 + }, + { + "epoch": 0.42, + "learning_rate": 1.5819059483027094e-05, + "loss": 0.2715, + "step": 5370 + }, + { + "epoch": 0.42, + "learning_rate": 1.581127374649642e-05, + "loss": 0.314, + "step": 5380 + }, + { + "epoch": 0.42, + "learning_rate": 1.5803488009965745e-05, + "loss": 0.3391, + "step": 5390 + }, + { + "epoch": 0.42, + "learning_rate": 1.5795702273435067e-05, + "loss": 0.3402, + "step": 5400 + }, + { + "epoch": 0.42, + "learning_rate": 1.5787916536904392e-05, + "loss": 0.2857, + "step": 5410 + }, + { + "epoch": 0.42, + "learning_rate": 1.5780130800373718e-05, + "loss": 0.2099, + "step": 5420 + }, + { + "epoch": 0.42, + "learning_rate": 1.577234506384304e-05, + "loss": 0.2864, + "step": 5430 + }, + { + "epoch": 0.42, + "learning_rate": 1.5764559327312365e-05, + "loss": 0.3086, + "step": 5440 + }, + { + "epoch": 0.42, + "learning_rate": 1.575677359078169e-05, + "loss": 0.2425, + "step": 5450 + }, + { + "epoch": 0.43, + "learning_rate": 1.5748987854251015e-05, + "loss": 0.3098, + "step": 5460 + }, + { + "epoch": 0.43, + "learning_rate": 1.5741202117720337e-05, + "loss": 0.2194, + "step": 5470 + }, + { + "epoch": 0.43, + "learning_rate": 1.5733416381189663e-05, + "loss": 0.3205, + "step": 5480 + }, + { + "epoch": 0.43, + "learning_rate": 1.5725630644658988e-05, + "loss": 0.3202, + "step": 5490 + }, + { + "epoch": 0.43, + "learning_rate": 1.571784490812831e-05, + "loss": 0.3467, + "step": 5500 + }, + { + "epoch": 0.43, + "learning_rate": 1.5710059171597635e-05, + "loss": 0.2815, + "step": 5510 + }, + { + "epoch": 0.43, + "learning_rate": 1.570227343506696e-05, + "loss": 0.2683, + "step": 5520 + }, + { + "epoch": 0.43, + "learning_rate": 1.5694487698536282e-05, + "loss": 0.3093, + "step": 5530 + }, + { + "epoch": 0.43, + "learning_rate": 1.5686701962005608e-05, + "loss": 0.2464, + "step": 5540 + }, + { + "epoch": 0.43, + "learning_rate": 1.5678916225474933e-05, + "loss": 0.3131, + "step": 5550 + }, + { + "epoch": 0.43, + "learning_rate": 1.5671130488944255e-05, + "loss": 0.262, + "step": 5560 + }, + { + "epoch": 0.43, + "learning_rate": 1.566334475241358e-05, + "loss": 0.329, + "step": 5570 + }, + { + "epoch": 0.43, + "learning_rate": 1.5655559015882905e-05, + "loss": 0.3237, + "step": 5580 + }, + { + "epoch": 0.44, + "learning_rate": 1.5647773279352227e-05, + "loss": 0.2645, + "step": 5590 + }, + { + "epoch": 0.44, + "learning_rate": 1.5639987542821553e-05, + "loss": 0.3115, + "step": 5600 + }, + { + "epoch": 0.44, + "learning_rate": 1.5632201806290878e-05, + "loss": 0.2472, + "step": 5610 + }, + { + "epoch": 0.44, + "learning_rate": 1.56244160697602e-05, + "loss": 0.2549, + "step": 5620 + }, + { + "epoch": 0.44, + "learning_rate": 1.5616630333229525e-05, + "loss": 0.3139, + "step": 5630 + }, + { + "epoch": 0.44, + "learning_rate": 1.560884459669885e-05, + "loss": 0.2782, + "step": 5640 + }, + { + "epoch": 0.44, + "learning_rate": 1.5601058860168172e-05, + "loss": 0.2747, + "step": 5650 + }, + { + "epoch": 0.44, + "learning_rate": 1.5593273123637498e-05, + "loss": 0.2519, + "step": 5660 + }, + { + "epoch": 0.44, + "learning_rate": 1.5585487387106823e-05, + "loss": 0.3232, + "step": 5670 + }, + { + "epoch": 0.44, + "learning_rate": 1.5577701650576145e-05, + "loss": 0.281, + "step": 5680 + }, + { + "epoch": 0.44, + "learning_rate": 1.556991591404547e-05, + "loss": 0.2814, + "step": 5690 + }, + { + "epoch": 0.44, + "learning_rate": 1.5562130177514792e-05, + "loss": 0.3394, + "step": 5700 + }, + { + "epoch": 0.44, + "learning_rate": 1.555434444098412e-05, + "loss": 0.257, + "step": 5710 + }, + { + "epoch": 0.45, + "learning_rate": 1.5546558704453443e-05, + "loss": 0.2698, + "step": 5720 + }, + { + "epoch": 0.45, + "learning_rate": 1.5538772967922765e-05, + "loss": 0.3376, + "step": 5730 + }, + { + "epoch": 0.45, + "learning_rate": 1.5530987231392093e-05, + "loss": 0.2416, + "step": 5740 + }, + { + "epoch": 0.45, + "learning_rate": 1.5523201494861415e-05, + "loss": 0.3062, + "step": 5750 + }, + { + "epoch": 0.45, + "learning_rate": 1.5515415758330737e-05, + "loss": 0.264, + "step": 5760 + }, + { + "epoch": 0.45, + "learning_rate": 1.5507630021800066e-05, + "loss": 0.2184, + "step": 5770 + }, + { + "epoch": 0.45, + "learning_rate": 1.5499844285269388e-05, + "loss": 0.2969, + "step": 5780 + }, + { + "epoch": 0.45, + "learning_rate": 1.549205854873871e-05, + "loss": 0.2512, + "step": 5790 + }, + { + "epoch": 0.45, + "learning_rate": 1.5484272812208038e-05, + "loss": 0.2614, + "step": 5800 + }, + { + "epoch": 0.45, + "learning_rate": 1.547648707567736e-05, + "loss": 0.2285, + "step": 5810 + }, + { + "epoch": 0.45, + "learning_rate": 1.5468701339146685e-05, + "loss": 0.254, + "step": 5820 + }, + { + "epoch": 0.45, + "learning_rate": 1.5460915602616007e-05, + "loss": 0.2227, + "step": 5830 + }, + { + "epoch": 0.45, + "learning_rate": 1.5453129866085333e-05, + "loss": 0.2734, + "step": 5840 + }, + { + "epoch": 0.46, + "learning_rate": 1.5445344129554658e-05, + "loss": 0.2874, + "step": 5850 + }, + { + "epoch": 0.46, + "learning_rate": 1.543755839302398e-05, + "loss": 0.2869, + "step": 5860 + }, + { + "epoch": 0.46, + "learning_rate": 1.5429772656493305e-05, + "loss": 0.2445, + "step": 5870 + }, + { + "epoch": 0.46, + "learning_rate": 1.542198691996263e-05, + "loss": 0.331, + "step": 5880 + }, + { + "epoch": 0.46, + "learning_rate": 1.5414201183431952e-05, + "loss": 0.2387, + "step": 5890 + }, + { + "epoch": 0.46, + "learning_rate": 1.5406415446901278e-05, + "loss": 0.2279, + "step": 5900 + }, + { + "epoch": 0.46, + "learning_rate": 1.5398629710370603e-05, + "loss": 0.2091, + "step": 5910 + }, + { + "epoch": 0.46, + "learning_rate": 1.5390843973839925e-05, + "loss": 0.2894, + "step": 5920 + }, + { + "epoch": 0.46, + "learning_rate": 1.538305823730925e-05, + "loss": 0.3092, + "step": 5930 + }, + { + "epoch": 0.46, + "learning_rate": 1.5375272500778576e-05, + "loss": 0.2876, + "step": 5940 + }, + { + "epoch": 0.46, + "learning_rate": 1.5367486764247897e-05, + "loss": 0.2665, + "step": 5950 + }, + { + "epoch": 0.46, + "learning_rate": 1.5359701027717223e-05, + "loss": 0.2591, + "step": 5960 + }, + { + "epoch": 0.46, + "learning_rate": 1.5351915291186548e-05, + "loss": 0.3199, + "step": 5970 + }, + { + "epoch": 0.47, + "learning_rate": 1.534412955465587e-05, + "loss": 0.2275, + "step": 5980 + }, + { + "epoch": 0.47, + "learning_rate": 1.5336343818125195e-05, + "loss": 0.2238, + "step": 5990 + }, + { + "epoch": 0.47, + "learning_rate": 1.532855808159452e-05, + "loss": 0.2585, + "step": 6000 + }, + { + "epoch": 0.47, + "learning_rate": 1.5320772345063842e-05, + "loss": 0.241, + "step": 6010 + }, + { + "epoch": 0.47, + "learning_rate": 1.5312986608533168e-05, + "loss": 0.2876, + "step": 6020 + }, + { + "epoch": 0.47, + "learning_rate": 1.5305200872002493e-05, + "loss": 0.2868, + "step": 6030 + }, + { + "epoch": 0.47, + "learning_rate": 1.5297415135471815e-05, + "loss": 0.2299, + "step": 6040 + }, + { + "epoch": 0.47, + "learning_rate": 1.528962939894114e-05, + "loss": 0.3401, + "step": 6050 + }, + { + "epoch": 0.47, + "learning_rate": 1.5281843662410466e-05, + "loss": 0.2593, + "step": 6060 + }, + { + "epoch": 0.47, + "learning_rate": 1.527405792587979e-05, + "loss": 0.2513, + "step": 6070 + }, + { + "epoch": 0.47, + "learning_rate": 1.5266272189349113e-05, + "loss": 0.3075, + "step": 6080 + }, + { + "epoch": 0.47, + "learning_rate": 1.5258486452818438e-05, + "loss": 0.3274, + "step": 6090 + }, + { + "epoch": 0.47, + "learning_rate": 1.5250700716287763e-05, + "loss": 0.2581, + "step": 6100 + }, + { + "epoch": 0.48, + "learning_rate": 1.5242914979757087e-05, + "loss": 0.2692, + "step": 6110 + }, + { + "epoch": 0.48, + "learning_rate": 1.523512924322641e-05, + "loss": 0.3244, + "step": 6120 + }, + { + "epoch": 0.48, + "learning_rate": 1.5227343506695736e-05, + "loss": 0.2415, + "step": 6130 + }, + { + "epoch": 0.48, + "learning_rate": 1.521955777016506e-05, + "loss": 0.2424, + "step": 6140 + }, + { + "epoch": 0.48, + "learning_rate": 1.5211772033634381e-05, + "loss": 0.2477, + "step": 6150 + }, + { + "epoch": 0.48, + "learning_rate": 1.5203986297103708e-05, + "loss": 0.2988, + "step": 6160 + }, + { + "epoch": 0.48, + "learning_rate": 1.5196200560573032e-05, + "loss": 0.2726, + "step": 6170 + }, + { + "epoch": 0.48, + "learning_rate": 1.5188414824042354e-05, + "loss": 0.3212, + "step": 6180 + }, + { + "epoch": 0.48, + "learning_rate": 1.5180629087511681e-05, + "loss": 0.2562, + "step": 6190 + }, + { + "epoch": 0.48, + "learning_rate": 1.5172843350981003e-05, + "loss": 0.2075, + "step": 6200 + }, + { + "epoch": 0.48, + "learning_rate": 1.516505761445033e-05, + "loss": 0.3107, + "step": 6210 + }, + { + "epoch": 0.48, + "learning_rate": 1.5157271877919653e-05, + "loss": 0.267, + "step": 6220 + }, + { + "epoch": 0.49, + "learning_rate": 1.5149486141388975e-05, + "loss": 0.2532, + "step": 6230 + }, + { + "epoch": 0.49, + "learning_rate": 1.5141700404858302e-05, + "loss": 0.2778, + "step": 6240 + }, + { + "epoch": 0.49, + "learning_rate": 1.5133914668327624e-05, + "loss": 0.3576, + "step": 6250 + }, + { + "epoch": 0.49, + "learning_rate": 1.5126128931796948e-05, + "loss": 0.2662, + "step": 6260 + }, + { + "epoch": 0.49, + "learning_rate": 1.5118343195266275e-05, + "loss": 0.2563, + "step": 6270 + }, + { + "epoch": 0.49, + "learning_rate": 1.5110557458735597e-05, + "loss": 0.293, + "step": 6280 + }, + { + "epoch": 0.49, + "learning_rate": 1.510277172220492e-05, + "loss": 0.279, + "step": 6290 + }, + { + "epoch": 0.49, + "learning_rate": 1.5094985985674246e-05, + "loss": 0.2837, + "step": 6300 + }, + { + "epoch": 0.49, + "learning_rate": 1.508720024914357e-05, + "loss": 0.2893, + "step": 6310 + }, + { + "epoch": 0.49, + "learning_rate": 1.5079414512612896e-05, + "loss": 0.3511, + "step": 6320 + }, + { + "epoch": 0.49, + "learning_rate": 1.5071628776082218e-05, + "loss": 0.378, + "step": 6330 + }, + { + "epoch": 0.49, + "learning_rate": 1.5063843039551542e-05, + "loss": 0.3159, + "step": 6340 + }, + { + "epoch": 0.49, + "learning_rate": 1.5056057303020867e-05, + "loss": 0.2108, + "step": 6350 + }, + { + "epoch": 0.5, + "learning_rate": 1.504827156649019e-05, + "loss": 0.2183, + "step": 6360 + }, + { + "epoch": 0.5, + "learning_rate": 1.5040485829959514e-05, + "loss": 0.2395, + "step": 6370 + }, + { + "epoch": 0.5, + "learning_rate": 1.503270009342884e-05, + "loss": 0.3082, + "step": 6380 + }, + { + "epoch": 0.5, + "learning_rate": 1.5024914356898163e-05, + "loss": 0.2743, + "step": 6390 + }, + { + "epoch": 0.5, + "learning_rate": 1.5017128620367487e-05, + "loss": 0.3109, + "step": 6400 + }, + { + "epoch": 0.5, + "learning_rate": 1.5009342883836812e-05, + "loss": 0.2441, + "step": 6410 + }, + { + "epoch": 0.5, + "learning_rate": 1.5001557147306136e-05, + "loss": 0.2637, + "step": 6420 + }, + { + "epoch": 0.5, + "learning_rate": 1.499377141077546e-05, + "loss": 0.2905, + "step": 6430 + }, + { + "epoch": 0.5, + "learning_rate": 1.4985985674244785e-05, + "loss": 0.2632, + "step": 6440 + }, + { + "epoch": 0.5, + "learning_rate": 1.4978199937714108e-05, + "loss": 0.217, + "step": 6450 + }, + { + "epoch": 0.5, + "learning_rate": 1.4970414201183433e-05, + "loss": 0.2873, + "step": 6460 + }, + { + "epoch": 0.5, + "learning_rate": 1.4962628464652757e-05, + "loss": 0.279, + "step": 6470 + }, + { + "epoch": 0.5, + "learning_rate": 1.495484272812208e-05, + "loss": 0.2589, + "step": 6480 + }, + { + "epoch": 0.51, + "learning_rate": 1.4947056991591406e-05, + "loss": 0.3188, + "step": 6490 + }, + { + "epoch": 0.51, + "learning_rate": 1.493927125506073e-05, + "loss": 0.2329, + "step": 6500 + }, + { + "epoch": 0.51, + "learning_rate": 1.4931485518530053e-05, + "loss": 0.2875, + "step": 6510 + }, + { + "epoch": 0.51, + "learning_rate": 1.4923699781999378e-05, + "loss": 0.2437, + "step": 6520 + }, + { + "epoch": 0.51, + "learning_rate": 1.4915914045468702e-05, + "loss": 0.2798, + "step": 6530 + }, + { + "epoch": 0.51, + "learning_rate": 1.4908128308938026e-05, + "loss": 0.2659, + "step": 6540 + }, + { + "epoch": 0.51, + "learning_rate": 1.4900342572407351e-05, + "loss": 0.2605, + "step": 6550 + }, + { + "epoch": 0.51, + "learning_rate": 1.4892556835876675e-05, + "loss": 0.3059, + "step": 6560 + }, + { + "epoch": 0.51, + "learning_rate": 1.4884771099346e-05, + "loss": 0.1881, + "step": 6570 + }, + { + "epoch": 0.51, + "learning_rate": 1.4876985362815324e-05, + "loss": 0.268, + "step": 6580 + }, + { + "epoch": 0.51, + "learning_rate": 1.4869199626284647e-05, + "loss": 0.2027, + "step": 6590 + }, + { + "epoch": 0.51, + "learning_rate": 1.4861413889753972e-05, + "loss": 0.2267, + "step": 6600 + }, + { + "epoch": 0.51, + "learning_rate": 1.4853628153223296e-05, + "loss": 0.2685, + "step": 6610 + }, + { + "epoch": 0.52, + "learning_rate": 1.484584241669262e-05, + "loss": 0.2352, + "step": 6620 + }, + { + "epoch": 0.52, + "learning_rate": 1.4838056680161945e-05, + "loss": 0.2105, + "step": 6630 + }, + { + "epoch": 0.52, + "learning_rate": 1.4830270943631269e-05, + "loss": 0.2554, + "step": 6640 + }, + { + "epoch": 0.52, + "learning_rate": 1.4822485207100592e-05, + "loss": 0.3014, + "step": 6650 + }, + { + "epoch": 0.52, + "learning_rate": 1.4814699470569917e-05, + "loss": 0.2456, + "step": 6660 + }, + { + "epoch": 0.52, + "learning_rate": 1.4806913734039241e-05, + "loss": 0.2588, + "step": 6670 + }, + { + "epoch": 0.52, + "learning_rate": 1.4799127997508565e-05, + "loss": 0.233, + "step": 6680 + }, + { + "epoch": 0.52, + "learning_rate": 1.479134226097789e-05, + "loss": 0.2118, + "step": 6690 + }, + { + "epoch": 0.52, + "learning_rate": 1.4783556524447214e-05, + "loss": 0.3248, + "step": 6700 + }, + { + "epoch": 0.52, + "learning_rate": 1.4775770787916539e-05, + "loss": 0.2562, + "step": 6710 + }, + { + "epoch": 0.52, + "learning_rate": 1.4767985051385862e-05, + "loss": 0.3039, + "step": 6720 + }, + { + "epoch": 0.52, + "learning_rate": 1.4760199314855186e-05, + "loss": 0.2707, + "step": 6730 + }, + { + "epoch": 0.52, + "learning_rate": 1.4752413578324511e-05, + "loss": 0.2062, + "step": 6740 + }, + { + "epoch": 0.53, + "learning_rate": 1.4744627841793835e-05, + "loss": 0.2664, + "step": 6750 + }, + { + "epoch": 0.53, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.2372, + "step": 6760 + }, + { + "epoch": 0.53, + "learning_rate": 1.4729056368732484e-05, + "loss": 0.3098, + "step": 6770 + }, + { + "epoch": 0.53, + "learning_rate": 1.4721270632201807e-05, + "loss": 0.2494, + "step": 6780 + }, + { + "epoch": 0.53, + "learning_rate": 1.4713484895671131e-05, + "loss": 0.3575, + "step": 6790 + }, + { + "epoch": 0.53, + "learning_rate": 1.4705699159140456e-05, + "loss": 0.2152, + "step": 6800 + }, + { + "epoch": 0.53, + "learning_rate": 1.469791342260978e-05, + "loss": 0.2933, + "step": 6810 + }, + { + "epoch": 0.53, + "learning_rate": 1.4690127686079105e-05, + "loss": 0.2765, + "step": 6820 + }, + { + "epoch": 0.53, + "learning_rate": 1.4682341949548429e-05, + "loss": 0.2998, + "step": 6830 + }, + { + "epoch": 0.53, + "learning_rate": 1.4674556213017752e-05, + "loss": 0.2421, + "step": 6840 + }, + { + "epoch": 0.53, + "learning_rate": 1.4666770476487078e-05, + "loss": 0.2567, + "step": 6850 + }, + { + "epoch": 0.53, + "learning_rate": 1.4658984739956401e-05, + "loss": 0.2217, + "step": 6860 + }, + { + "epoch": 0.53, + "learning_rate": 1.4651199003425725e-05, + "loss": 0.2064, + "step": 6870 + }, + { + "epoch": 0.54, + "learning_rate": 1.464341326689505e-05, + "loss": 0.2982, + "step": 6880 + }, + { + "epoch": 0.54, + "learning_rate": 1.4635627530364374e-05, + "loss": 0.2313, + "step": 6890 + }, + { + "epoch": 0.54, + "learning_rate": 1.4627841793833698e-05, + "loss": 0.3445, + "step": 6900 + }, + { + "epoch": 0.54, + "learning_rate": 1.4620056057303023e-05, + "loss": 0.3044, + "step": 6910 + }, + { + "epoch": 0.54, + "learning_rate": 1.4612270320772346e-05, + "loss": 0.2654, + "step": 6920 + }, + { + "epoch": 0.54, + "learning_rate": 1.460448458424167e-05, + "loss": 0.2996, + "step": 6930 + }, + { + "epoch": 0.54, + "learning_rate": 1.4596698847710995e-05, + "loss": 0.2435, + "step": 6940 + }, + { + "epoch": 0.54, + "learning_rate": 1.4588913111180319e-05, + "loss": 0.348, + "step": 6950 + }, + { + "epoch": 0.54, + "learning_rate": 1.4581127374649644e-05, + "loss": 0.2329, + "step": 6960 + }, + { + "epoch": 0.54, + "learning_rate": 1.4573341638118968e-05, + "loss": 0.2526, + "step": 6970 + }, + { + "epoch": 0.54, + "learning_rate": 1.4565555901588291e-05, + "loss": 0.1841, + "step": 6980 + }, + { + "epoch": 0.54, + "learning_rate": 1.4557770165057617e-05, + "loss": 0.2716, + "step": 6990 + }, + { + "epoch": 0.55, + "learning_rate": 1.454998442852694e-05, + "loss": 0.2528, + "step": 7000 + }, + { + "epoch": 0.55, + "learning_rate": 1.4542198691996264e-05, + "loss": 0.2268, + "step": 7010 + }, + { + "epoch": 0.55, + "learning_rate": 1.453441295546559e-05, + "loss": 0.2281, + "step": 7020 + }, + { + "epoch": 0.55, + "learning_rate": 1.4526627218934913e-05, + "loss": 0.2526, + "step": 7030 + }, + { + "epoch": 0.55, + "learning_rate": 1.4518841482404236e-05, + "loss": 0.3006, + "step": 7040 + }, + { + "epoch": 0.55, + "learning_rate": 1.4511055745873562e-05, + "loss": 0.3228, + "step": 7050 + }, + { + "epoch": 0.55, + "learning_rate": 1.4503270009342885e-05, + "loss": 0.2666, + "step": 7060 + }, + { + "epoch": 0.55, + "learning_rate": 1.449548427281221e-05, + "loss": 0.2406, + "step": 7070 + }, + { + "epoch": 0.55, + "learning_rate": 1.4487698536281534e-05, + "loss": 0.2733, + "step": 7080 + }, + { + "epoch": 0.55, + "learning_rate": 1.4479912799750858e-05, + "loss": 0.2883, + "step": 7090 + }, + { + "epoch": 0.55, + "learning_rate": 1.4472127063220183e-05, + "loss": 0.2873, + "step": 7100 + }, + { + "epoch": 0.55, + "learning_rate": 1.4464341326689507e-05, + "loss": 0.2031, + "step": 7110 + }, + { + "epoch": 0.55, + "learning_rate": 1.4456555590158829e-05, + "loss": 0.2494, + "step": 7120 + }, + { + "epoch": 0.56, + "learning_rate": 1.4448769853628156e-05, + "loss": 0.2951, + "step": 7130 + }, + { + "epoch": 0.56, + "learning_rate": 1.444098411709748e-05, + "loss": 0.3056, + "step": 7140 + }, + { + "epoch": 0.56, + "learning_rate": 1.4433198380566801e-05, + "loss": 0.3215, + "step": 7150 + }, + { + "epoch": 0.56, + "learning_rate": 1.4425412644036128e-05, + "loss": 0.2782, + "step": 7160 + }, + { + "epoch": 0.56, + "learning_rate": 1.441762690750545e-05, + "loss": 0.28, + "step": 7170 + }, + { + "epoch": 0.56, + "learning_rate": 1.4409841170974774e-05, + "loss": 0.2594, + "step": 7180 + }, + { + "epoch": 0.56, + "learning_rate": 1.44020554344441e-05, + "loss": 0.2574, + "step": 7190 + }, + { + "epoch": 0.56, + "learning_rate": 1.4394269697913423e-05, + "loss": 0.2778, + "step": 7200 + }, + { + "epoch": 0.56, + "learning_rate": 1.438648396138275e-05, + "loss": 0.3185, + "step": 7210 + }, + { + "epoch": 0.56, + "learning_rate": 1.4378698224852072e-05, + "loss": 0.2715, + "step": 7220 + }, + { + "epoch": 0.56, + "learning_rate": 1.4370912488321395e-05, + "loss": 0.2555, + "step": 7230 + }, + { + "epoch": 0.56, + "learning_rate": 1.4363126751790722e-05, + "loss": 0.2159, + "step": 7240 + }, + { + "epoch": 0.56, + "learning_rate": 1.4355341015260044e-05, + "loss": 0.3302, + "step": 7250 + }, + { + "epoch": 0.57, + "learning_rate": 1.4347555278729368e-05, + "loss": 0.1962, + "step": 7260 + }, + { + "epoch": 0.57, + "learning_rate": 1.4339769542198693e-05, + "loss": 0.223, + "step": 7270 + }, + { + "epoch": 0.57, + "learning_rate": 1.4331983805668017e-05, + "loss": 0.2492, + "step": 7280 + }, + { + "epoch": 0.57, + "learning_rate": 1.432419806913734e-05, + "loss": 0.2742, + "step": 7290 + }, + { + "epoch": 0.57, + "learning_rate": 1.4316412332606665e-05, + "loss": 0.2691, + "step": 7300 + }, + { + "epoch": 0.57, + "learning_rate": 1.4308626596075989e-05, + "loss": 0.2728, + "step": 7310 + }, + { + "epoch": 0.57, + "learning_rate": 1.4300840859545316e-05, + "loss": 0.2205, + "step": 7320 + }, + { + "epoch": 0.57, + "learning_rate": 1.4293055123014638e-05, + "loss": 0.2697, + "step": 7330 + }, + { + "epoch": 0.57, + "learning_rate": 1.4285269386483962e-05, + "loss": 0.2413, + "step": 7340 + }, + { + "epoch": 0.57, + "learning_rate": 1.4277483649953287e-05, + "loss": 0.2632, + "step": 7350 + }, + { + "epoch": 0.57, + "learning_rate": 1.426969791342261e-05, + "loss": 0.2231, + "step": 7360 + }, + { + "epoch": 0.57, + "learning_rate": 1.4261912176891934e-05, + "loss": 0.2651, + "step": 7370 + }, + { + "epoch": 0.57, + "learning_rate": 1.425412644036126e-05, + "loss": 0.3421, + "step": 7380 + }, + { + "epoch": 0.58, + "learning_rate": 1.4246340703830583e-05, + "loss": 0.2889, + "step": 7390 + }, + { + "epoch": 0.58, + "learning_rate": 1.4238554967299907e-05, + "loss": 0.2374, + "step": 7400 + }, + { + "epoch": 0.58, + "learning_rate": 1.4230769230769232e-05, + "loss": 0.3111, + "step": 7410 + }, + { + "epoch": 0.58, + "learning_rate": 1.4222983494238555e-05, + "loss": 0.2267, + "step": 7420 + }, + { + "epoch": 0.58, + "learning_rate": 1.4215197757707879e-05, + "loss": 0.3142, + "step": 7430 + }, + { + "epoch": 0.58, + "learning_rate": 1.4207412021177204e-05, + "loss": 0.2473, + "step": 7440 + }, + { + "epoch": 0.58, + "learning_rate": 1.4199626284646528e-05, + "loss": 0.3039, + "step": 7450 + }, + { + "epoch": 0.58, + "learning_rate": 1.4191840548115853e-05, + "loss": 0.2859, + "step": 7460 + }, + { + "epoch": 0.58, + "learning_rate": 1.4184054811585177e-05, + "loss": 0.1975, + "step": 7470 + }, + { + "epoch": 0.58, + "learning_rate": 1.41762690750545e-05, + "loss": 0.247, + "step": 7480 + }, + { + "epoch": 0.58, + "learning_rate": 1.4168483338523826e-05, + "loss": 0.1781, + "step": 7490 + }, + { + "epoch": 0.58, + "learning_rate": 1.416069760199315e-05, + "loss": 0.2485, + "step": 7500 + }, + { + "epoch": 0.58, + "learning_rate": 1.4152911865462473e-05, + "loss": 0.3146, + "step": 7510 + }, + { + "epoch": 0.59, + "learning_rate": 1.4145126128931798e-05, + "loss": 0.2644, + "step": 7520 + }, + { + "epoch": 0.59, + "learning_rate": 1.4137340392401122e-05, + "loss": 0.2583, + "step": 7530 + }, + { + "epoch": 0.59, + "learning_rate": 1.4129554655870446e-05, + "loss": 0.2273, + "step": 7540 + }, + { + "epoch": 0.59, + "learning_rate": 1.412176891933977e-05, + "loss": 0.2436, + "step": 7550 + }, + { + "epoch": 0.59, + "learning_rate": 1.4113983182809094e-05, + "loss": 0.2849, + "step": 7560 + }, + { + "epoch": 0.59, + "learning_rate": 1.410619744627842e-05, + "loss": 0.2887, + "step": 7570 + }, + { + "epoch": 0.59, + "learning_rate": 1.4098411709747743e-05, + "loss": 0.2867, + "step": 7580 + }, + { + "epoch": 0.59, + "learning_rate": 1.4090625973217067e-05, + "loss": 0.2833, + "step": 7590 + }, + { + "epoch": 0.59, + "learning_rate": 1.4082840236686392e-05, + "loss": 0.2558, + "step": 7600 + }, + { + "epoch": 0.59, + "learning_rate": 1.4075054500155716e-05, + "loss": 0.2488, + "step": 7610 + }, + { + "epoch": 0.59, + "learning_rate": 1.406726876362504e-05, + "loss": 0.2157, + "step": 7620 + }, + { + "epoch": 0.59, + "learning_rate": 1.4059483027094365e-05, + "loss": 0.2722, + "step": 7630 + }, + { + "epoch": 0.59, + "learning_rate": 1.4051697290563688e-05, + "loss": 0.2399, + "step": 7640 + }, + { + "epoch": 0.6, + "learning_rate": 1.4043911554033012e-05, + "loss": 0.254, + "step": 7650 + }, + { + "epoch": 0.6, + "learning_rate": 1.4036125817502337e-05, + "loss": 0.2834, + "step": 7660 + }, + { + "epoch": 0.6, + "learning_rate": 1.4028340080971661e-05, + "loss": 0.2653, + "step": 7670 + }, + { + "epoch": 0.6, + "learning_rate": 1.4020554344440984e-05, + "loss": 0.3492, + "step": 7680 + }, + { + "epoch": 0.6, + "learning_rate": 1.401276860791031e-05, + "loss": 0.2323, + "step": 7690 + }, + { + "epoch": 0.6, + "learning_rate": 1.4004982871379633e-05, + "loss": 0.2731, + "step": 7700 + }, + { + "epoch": 0.6, + "learning_rate": 1.3997197134848959e-05, + "loss": 0.2051, + "step": 7710 + }, + { + "epoch": 0.6, + "learning_rate": 1.3989411398318282e-05, + "loss": 0.2671, + "step": 7720 + }, + { + "epoch": 0.6, + "learning_rate": 1.3981625661787606e-05, + "loss": 0.2759, + "step": 7730 + }, + { + "epoch": 0.6, + "learning_rate": 1.3973839925256931e-05, + "loss": 0.2552, + "step": 7740 + }, + { + "epoch": 0.6, + "learning_rate": 1.3966054188726255e-05, + "loss": 0.2265, + "step": 7750 + }, + { + "epoch": 0.6, + "learning_rate": 1.3958268452195578e-05, + "loss": 0.2216, + "step": 7760 + }, + { + "epoch": 0.6, + "learning_rate": 1.3950482715664904e-05, + "loss": 0.2281, + "step": 7770 + }, + { + "epoch": 0.61, + "learning_rate": 1.3942696979134227e-05, + "loss": 0.1875, + "step": 7780 + }, + { + "epoch": 0.61, + "learning_rate": 1.3934911242603551e-05, + "loss": 0.2821, + "step": 7790 + }, + { + "epoch": 0.61, + "learning_rate": 1.3927125506072876e-05, + "loss": 0.2133, + "step": 7800 + }, + { + "epoch": 0.61, + "learning_rate": 1.39193397695422e-05, + "loss": 0.2294, + "step": 7810 + }, + { + "epoch": 0.61, + "learning_rate": 1.3911554033011525e-05, + "loss": 0.2857, + "step": 7820 + }, + { + "epoch": 0.61, + "learning_rate": 1.3903768296480849e-05, + "loss": 0.2717, + "step": 7830 + }, + { + "epoch": 0.61, + "learning_rate": 1.3895982559950172e-05, + "loss": 0.2796, + "step": 7840 + }, + { + "epoch": 0.61, + "learning_rate": 1.3888196823419498e-05, + "loss": 0.268, + "step": 7850 + }, + { + "epoch": 0.61, + "learning_rate": 1.3880411086888821e-05, + "loss": 0.2628, + "step": 7860 + }, + { + "epoch": 0.61, + "learning_rate": 1.3872625350358145e-05, + "loss": 0.2389, + "step": 7870 + }, + { + "epoch": 0.61, + "learning_rate": 1.386483961382747e-05, + "loss": 0.2932, + "step": 7880 + }, + { + "epoch": 0.61, + "learning_rate": 1.3857053877296794e-05, + "loss": 0.2538, + "step": 7890 + }, + { + "epoch": 0.62, + "learning_rate": 1.3849268140766117e-05, + "loss": 0.2351, + "step": 7900 + }, + { + "epoch": 0.62, + "learning_rate": 1.3841482404235443e-05, + "loss": 0.2099, + "step": 7910 + }, + { + "epoch": 0.62, + "learning_rate": 1.3833696667704766e-05, + "loss": 0.1891, + "step": 7920 + }, + { + "epoch": 0.62, + "learning_rate": 1.382591093117409e-05, + "loss": 0.2529, + "step": 7930 + }, + { + "epoch": 0.62, + "learning_rate": 1.3818125194643415e-05, + "loss": 0.2546, + "step": 7940 + }, + { + "epoch": 0.62, + "learning_rate": 1.3810339458112739e-05, + "loss": 0.2655, + "step": 7950 + }, + { + "epoch": 0.62, + "learning_rate": 1.3802553721582064e-05, + "loss": 0.243, + "step": 7960 + }, + { + "epoch": 0.62, + "learning_rate": 1.3794767985051388e-05, + "loss": 0.2579, + "step": 7970 + }, + { + "epoch": 0.62, + "learning_rate": 1.3786982248520711e-05, + "loss": 0.2958, + "step": 7980 + }, + { + "epoch": 0.62, + "learning_rate": 1.3779196511990037e-05, + "loss": 0.2424, + "step": 7990 + }, + { + "epoch": 0.62, + "learning_rate": 1.377141077545936e-05, + "loss": 0.2509, + "step": 8000 + }, + { + "epoch": 0.62, + "learning_rate": 1.3763625038928684e-05, + "loss": 0.2683, + "step": 8010 + }, + { + "epoch": 0.62, + "learning_rate": 1.3755839302398009e-05, + "loss": 0.1821, + "step": 8020 + }, + { + "epoch": 0.63, + "learning_rate": 1.3748053565867333e-05, + "loss": 0.2214, + "step": 8030 + }, + { + "epoch": 0.63, + "learning_rate": 1.3740267829336655e-05, + "loss": 0.258, + "step": 8040 + }, + { + "epoch": 0.63, + "learning_rate": 1.3732482092805982e-05, + "loss": 0.2458, + "step": 8050 + }, + { + "epoch": 0.63, + "learning_rate": 1.3724696356275305e-05, + "loss": 0.2039, + "step": 8060 + }, + { + "epoch": 0.63, + "learning_rate": 1.371691061974463e-05, + "loss": 0.2078, + "step": 8070 + }, + { + "epoch": 0.63, + "learning_rate": 1.3709124883213954e-05, + "loss": 0.2607, + "step": 8080 + }, + { + "epoch": 0.63, + "learning_rate": 1.3701339146683276e-05, + "loss": 0.2988, + "step": 8090 + }, + { + "epoch": 0.63, + "learning_rate": 1.3693553410152603e-05, + "loss": 0.2406, + "step": 8100 + }, + { + "epoch": 0.63, + "learning_rate": 1.3685767673621927e-05, + "loss": 0.2635, + "step": 8110 + }, + { + "epoch": 0.63, + "learning_rate": 1.3677981937091248e-05, + "loss": 0.2343, + "step": 8120 + }, + { + "epoch": 0.63, + "learning_rate": 1.3670196200560575e-05, + "loss": 0.2251, + "step": 8130 + }, + { + "epoch": 0.63, + "learning_rate": 1.3662410464029897e-05, + "loss": 0.2247, + "step": 8140 + }, + { + "epoch": 0.63, + "learning_rate": 1.3654624727499221e-05, + "loss": 0.2332, + "step": 8150 + }, + { + "epoch": 0.64, + "learning_rate": 1.3646838990968548e-05, + "loss": 0.2405, + "step": 8160 + }, + { + "epoch": 0.64, + "learning_rate": 1.363905325443787e-05, + "loss": 0.3167, + "step": 8170 + }, + { + "epoch": 0.64, + "learning_rate": 1.3631267517907194e-05, + "loss": 0.293, + "step": 8180 + }, + { + "epoch": 0.64, + "learning_rate": 1.362348178137652e-05, + "loss": 0.2745, + "step": 8190 + }, + { + "epoch": 0.64, + "learning_rate": 1.3615696044845842e-05, + "loss": 0.201, + "step": 8200 + }, + { + "epoch": 0.64, + "learning_rate": 1.360791030831517e-05, + "loss": 0.2546, + "step": 8210 + }, + { + "epoch": 0.64, + "learning_rate": 1.3600124571784491e-05, + "loss": 0.219, + "step": 8220 + }, + { + "epoch": 0.64, + "learning_rate": 1.3592338835253815e-05, + "loss": 0.2208, + "step": 8230 + }, + { + "epoch": 0.64, + "learning_rate": 1.3584553098723142e-05, + "loss": 0.2707, + "step": 8240 + }, + { + "epoch": 0.64, + "learning_rate": 1.3576767362192464e-05, + "loss": 0.3258, + "step": 8250 + }, + { + "epoch": 0.64, + "learning_rate": 1.3568981625661787e-05, + "loss": 0.2519, + "step": 8260 + }, + { + "epoch": 0.64, + "learning_rate": 1.3561195889131113e-05, + "loss": 0.236, + "step": 8270 + }, + { + "epoch": 0.64, + "learning_rate": 1.3553410152600436e-05, + "loss": 0.2893, + "step": 8280 + }, + { + "epoch": 0.65, + "learning_rate": 1.354562441606976e-05, + "loss": 0.2164, + "step": 8290 + }, + { + "epoch": 0.65, + "learning_rate": 1.3537838679539085e-05, + "loss": 0.2337, + "step": 8300 + }, + { + "epoch": 0.65, + "learning_rate": 1.3530052943008409e-05, + "loss": 0.2531, + "step": 8310 + }, + { + "epoch": 0.65, + "learning_rate": 1.3522267206477734e-05, + "loss": 0.2735, + "step": 8320 + }, + { + "epoch": 0.65, + "learning_rate": 1.3514481469947058e-05, + "loss": 0.2704, + "step": 8330 + }, + { + "epoch": 0.65, + "learning_rate": 1.3506695733416381e-05, + "loss": 0.2442, + "step": 8340 + }, + { + "epoch": 0.65, + "learning_rate": 1.3498909996885707e-05, + "loss": 0.2818, + "step": 8350 + }, + { + "epoch": 0.65, + "learning_rate": 1.349112426035503e-05, + "loss": 0.2394, + "step": 8360 + }, + { + "epoch": 0.65, + "learning_rate": 1.3483338523824354e-05, + "loss": 0.2207, + "step": 8370 + }, + { + "epoch": 0.65, + "learning_rate": 1.347555278729368e-05, + "loss": 0.2092, + "step": 8380 + }, + { + "epoch": 0.65, + "learning_rate": 1.3467767050763003e-05, + "loss": 0.2016, + "step": 8390 + }, + { + "epoch": 0.65, + "learning_rate": 1.3459981314232326e-05, + "loss": 0.2382, + "step": 8400 + }, + { + "epoch": 0.65, + "learning_rate": 1.3452195577701652e-05, + "loss": 0.2259, + "step": 8410 + }, + { + "epoch": 0.66, + "learning_rate": 1.3444409841170975e-05, + "loss": 0.2597, + "step": 8420 + }, + { + "epoch": 0.66, + "learning_rate": 1.3436624104640299e-05, + "loss": 0.1858, + "step": 8430 + }, + { + "epoch": 0.66, + "learning_rate": 1.3428838368109624e-05, + "loss": 0.2511, + "step": 8440 + }, + { + "epoch": 0.66, + "learning_rate": 1.3421052631578948e-05, + "loss": 0.2191, + "step": 8450 + }, + { + "epoch": 0.66, + "learning_rate": 1.3413266895048273e-05, + "loss": 0.259, + "step": 8460 + }, + { + "epoch": 0.66, + "learning_rate": 1.3405481158517597e-05, + "loss": 0.2004, + "step": 8470 + }, + { + "epoch": 0.66, + "learning_rate": 1.339769542198692e-05, + "loss": 0.2483, + "step": 8480 + }, + { + "epoch": 0.66, + "learning_rate": 1.3389909685456246e-05, + "loss": 0.214, + "step": 8490 + }, + { + "epoch": 0.66, + "learning_rate": 1.338212394892557e-05, + "loss": 0.2958, + "step": 8500 + }, + { + "epoch": 0.66, + "learning_rate": 1.3374338212394893e-05, + "loss": 0.3244, + "step": 8510 + }, + { + "epoch": 0.66, + "learning_rate": 1.3366552475864218e-05, + "loss": 0.2065, + "step": 8520 + }, + { + "epoch": 0.66, + "learning_rate": 1.3358766739333542e-05, + "loss": 0.249, + "step": 8530 + }, + { + "epoch": 0.66, + "learning_rate": 1.3350981002802865e-05, + "loss": 0.2386, + "step": 8540 + }, + { + "epoch": 0.67, + "learning_rate": 1.334319526627219e-05, + "loss": 0.1867, + "step": 8550 + }, + { + "epoch": 0.67, + "learning_rate": 1.3335409529741514e-05, + "loss": 0.2001, + "step": 8560 + }, + { + "epoch": 0.67, + "learning_rate": 1.332762379321084e-05, + "loss": 0.2559, + "step": 8570 + }, + { + "epoch": 0.67, + "learning_rate": 1.3319838056680163e-05, + "loss": 0.2514, + "step": 8580 + }, + { + "epoch": 0.67, + "learning_rate": 1.3312052320149487e-05, + "loss": 0.2386, + "step": 8590 + }, + { + "epoch": 0.67, + "learning_rate": 1.3304266583618812e-05, + "loss": 0.3099, + "step": 8600 + }, + { + "epoch": 0.67, + "learning_rate": 1.3296480847088136e-05, + "loss": 0.1941, + "step": 8610 + }, + { + "epoch": 0.67, + "learning_rate": 1.328869511055746e-05, + "loss": 0.1931, + "step": 8620 + }, + { + "epoch": 0.67, + "learning_rate": 1.3280909374026785e-05, + "loss": 0.2676, + "step": 8630 + }, + { + "epoch": 0.67, + "learning_rate": 1.3273123637496108e-05, + "loss": 0.2689, + "step": 8640 + }, + { + "epoch": 0.67, + "learning_rate": 1.3265337900965432e-05, + "loss": 0.2499, + "step": 8650 + }, + { + "epoch": 0.67, + "learning_rate": 1.3257552164434757e-05, + "loss": 0.1597, + "step": 8660 + }, + { + "epoch": 0.68, + "learning_rate": 1.324976642790408e-05, + "loss": 0.2385, + "step": 8670 + }, + { + "epoch": 0.68, + "learning_rate": 1.3241980691373404e-05, + "loss": 0.3206, + "step": 8680 + }, + { + "epoch": 0.68, + "learning_rate": 1.323419495484273e-05, + "loss": 0.2182, + "step": 8690 + }, + { + "epoch": 0.68, + "learning_rate": 1.3226409218312053e-05, + "loss": 0.2932, + "step": 8700 + }, + { + "epoch": 0.68, + "learning_rate": 1.3218623481781378e-05, + "loss": 0.1684, + "step": 8710 + }, + { + "epoch": 0.68, + "learning_rate": 1.3210837745250702e-05, + "loss": 0.2822, + "step": 8720 + }, + { + "epoch": 0.68, + "learning_rate": 1.3203052008720026e-05, + "loss": 0.2572, + "step": 8730 + }, + { + "epoch": 0.68, + "learning_rate": 1.3195266272189351e-05, + "loss": 0.2549, + "step": 8740 + }, + { + "epoch": 0.68, + "learning_rate": 1.3187480535658675e-05, + "loss": 0.2183, + "step": 8750 + }, + { + "epoch": 0.68, + "learning_rate": 1.3179694799127998e-05, + "loss": 0.3112, + "step": 8760 + }, + { + "epoch": 0.68, + "learning_rate": 1.3171909062597323e-05, + "loss": 0.2417, + "step": 8770 + }, + { + "epoch": 0.68, + "learning_rate": 1.3164123326066647e-05, + "loss": 0.2393, + "step": 8780 + }, + { + "epoch": 0.68, + "learning_rate": 1.315633758953597e-05, + "loss": 0.2481, + "step": 8790 + }, + { + "epoch": 0.69, + "learning_rate": 1.3148551853005296e-05, + "loss": 0.2245, + "step": 8800 + }, + { + "epoch": 0.69, + "learning_rate": 1.314076611647462e-05, + "loss": 0.2221, + "step": 8810 + }, + { + "epoch": 0.69, + "learning_rate": 1.3132980379943945e-05, + "loss": 0.2406, + "step": 8820 + }, + { + "epoch": 0.69, + "learning_rate": 1.3125194643413269e-05, + "loss": 0.3024, + "step": 8830 + }, + { + "epoch": 0.69, + "learning_rate": 1.3117408906882592e-05, + "loss": 0.2108, + "step": 8840 + }, + { + "epoch": 0.69, + "learning_rate": 1.3109623170351917e-05, + "loss": 0.281, + "step": 8850 + }, + { + "epoch": 0.69, + "learning_rate": 1.3101837433821241e-05, + "loss": 0.2789, + "step": 8860 + }, + { + "epoch": 0.69, + "learning_rate": 1.3094051697290565e-05, + "loss": 0.1919, + "step": 8870 + }, + { + "epoch": 0.69, + "learning_rate": 1.308626596075989e-05, + "loss": 0.3249, + "step": 8880 + }, + { + "epoch": 0.69, + "learning_rate": 1.3078480224229214e-05, + "loss": 0.2109, + "step": 8890 + }, + { + "epoch": 0.69, + "learning_rate": 1.3070694487698537e-05, + "loss": 0.3476, + "step": 8900 + }, + { + "epoch": 0.69, + "learning_rate": 1.3062908751167862e-05, + "loss": 0.2504, + "step": 8910 + }, + { + "epoch": 0.69, + "learning_rate": 1.3055123014637186e-05, + "loss": 0.2318, + "step": 8920 + }, + { + "epoch": 0.7, + "learning_rate": 1.304733727810651e-05, + "loss": 0.196, + "step": 8930 + }, + { + "epoch": 0.7, + "learning_rate": 1.3039551541575835e-05, + "loss": 0.2634, + "step": 8940 + }, + { + "epoch": 0.7, + "learning_rate": 1.3031765805045159e-05, + "loss": 0.2615, + "step": 8950 + }, + { + "epoch": 0.7, + "learning_rate": 1.3023980068514484e-05, + "loss": 0.1575, + "step": 8960 + }, + { + "epoch": 0.7, + "learning_rate": 1.3016194331983807e-05, + "loss": 0.2671, + "step": 8970 + }, + { + "epoch": 0.7, + "learning_rate": 1.3008408595453131e-05, + "loss": 0.2605, + "step": 8980 + }, + { + "epoch": 0.7, + "learning_rate": 1.3000622858922456e-05, + "loss": 0.2248, + "step": 8990 + }, + { + "epoch": 0.7, + "learning_rate": 1.299283712239178e-05, + "loss": 0.3365, + "step": 9000 + }, + { + "epoch": 0.7, + "learning_rate": 1.2985051385861102e-05, + "loss": 0.1897, + "step": 9010 + }, + { + "epoch": 0.7, + "learning_rate": 1.2977265649330429e-05, + "loss": 0.1951, + "step": 9020 + }, + { + "epoch": 0.7, + "learning_rate": 1.2969479912799752e-05, + "loss": 0.2807, + "step": 9030 + }, + { + "epoch": 0.7, + "learning_rate": 1.2961694176269074e-05, + "loss": 0.2551, + "step": 9040 + }, + { + "epoch": 0.7, + "learning_rate": 1.2953908439738401e-05, + "loss": 0.1983, + "step": 9050 + }, + { + "epoch": 0.71, + "learning_rate": 1.2946122703207723e-05, + "loss": 0.2754, + "step": 9060 + }, + { + "epoch": 0.71, + "learning_rate": 1.293833696667705e-05, + "loss": 0.2593, + "step": 9070 + }, + { + "epoch": 0.71, + "learning_rate": 1.2930551230146374e-05, + "loss": 0.1994, + "step": 9080 + }, + { + "epoch": 0.71, + "learning_rate": 1.2922765493615696e-05, + "loss": 0.2162, + "step": 9090 + }, + { + "epoch": 0.71, + "learning_rate": 1.2914979757085023e-05, + "loss": 0.2505, + "step": 9100 + }, + { + "epoch": 0.71, + "learning_rate": 1.2907194020554346e-05, + "loss": 0.2122, + "step": 9110 + }, + { + "epoch": 0.71, + "learning_rate": 1.2899408284023668e-05, + "loss": 0.2839, + "step": 9120 + }, + { + "epoch": 0.71, + "learning_rate": 1.2891622547492995e-05, + "loss": 0.2256, + "step": 9130 + }, + { + "epoch": 0.71, + "learning_rate": 1.2883836810962317e-05, + "loss": 0.2733, + "step": 9140 + }, + { + "epoch": 0.71, + "learning_rate": 1.287605107443164e-05, + "loss": 0.2434, + "step": 9150 + }, + { + "epoch": 0.71, + "learning_rate": 1.2868265337900968e-05, + "loss": 0.2805, + "step": 9160 + }, + { + "epoch": 0.71, + "learning_rate": 1.286047960137029e-05, + "loss": 0.235, + "step": 9170 + }, + { + "epoch": 0.71, + "learning_rate": 1.2852693864839613e-05, + "loss": 0.3376, + "step": 9180 + }, + { + "epoch": 0.72, + "learning_rate": 1.2844908128308939e-05, + "loss": 0.2328, + "step": 9190 + }, + { + "epoch": 0.72, + "learning_rate": 1.2837122391778262e-05, + "loss": 0.2354, + "step": 9200 + }, + { + "epoch": 0.72, + "learning_rate": 1.282933665524759e-05, + "loss": 0.1856, + "step": 9210 + }, + { + "epoch": 0.72, + "learning_rate": 1.2821550918716911e-05, + "loss": 0.2262, + "step": 9220 + }, + { + "epoch": 0.72, + "learning_rate": 1.2813765182186235e-05, + "loss": 0.2725, + "step": 9230 + }, + { + "epoch": 0.72, + "learning_rate": 1.280597944565556e-05, + "loss": 0.2022, + "step": 9240 + }, + { + "epoch": 0.72, + "learning_rate": 1.2798193709124884e-05, + "loss": 0.2515, + "step": 9250 + }, + { + "epoch": 0.72, + "learning_rate": 1.2790407972594207e-05, + "loss": 0.2786, + "step": 9260 + }, + { + "epoch": 0.72, + "learning_rate": 1.2782622236063533e-05, + "loss": 0.1973, + "step": 9270 + }, + { + "epoch": 0.72, + "learning_rate": 1.2774836499532856e-05, + "loss": 0.275, + "step": 9280 + }, + { + "epoch": 0.72, + "learning_rate": 1.276705076300218e-05, + "loss": 0.1834, + "step": 9290 + }, + { + "epoch": 0.72, + "learning_rate": 1.2759265026471505e-05, + "loss": 0.2149, + "step": 9300 + }, + { + "epoch": 0.72, + "learning_rate": 1.2751479289940829e-05, + "loss": 0.2387, + "step": 9310 + }, + { + "epoch": 0.73, + "learning_rate": 1.2743693553410154e-05, + "loss": 0.195, + "step": 9320 + }, + { + "epoch": 0.73, + "learning_rate": 1.2735907816879478e-05, + "loss": 0.2405, + "step": 9330 + }, + { + "epoch": 0.73, + "learning_rate": 1.2728122080348801e-05, + "loss": 0.2592, + "step": 9340 + }, + { + "epoch": 0.73, + "learning_rate": 1.2720336343818126e-05, + "loss": 0.3315, + "step": 9350 + }, + { + "epoch": 0.73, + "learning_rate": 1.271255060728745e-05, + "loss": 0.1888, + "step": 9360 + }, + { + "epoch": 0.73, + "learning_rate": 1.2704764870756774e-05, + "loss": 0.2336, + "step": 9370 + }, + { + "epoch": 0.73, + "learning_rate": 1.2696979134226099e-05, + "loss": 0.1942, + "step": 9380 + }, + { + "epoch": 0.73, + "learning_rate": 1.2689193397695423e-05, + "loss": 0.1785, + "step": 9390 + }, + { + "epoch": 0.73, + "learning_rate": 1.2681407661164746e-05, + "loss": 0.3373, + "step": 9400 + }, + { + "epoch": 0.73, + "learning_rate": 1.2673621924634071e-05, + "loss": 0.2567, + "step": 9410 + }, + { + "epoch": 0.73, + "learning_rate": 1.2665836188103395e-05, + "loss": 0.1999, + "step": 9420 + }, + { + "epoch": 0.73, + "learning_rate": 1.2658050451572719e-05, + "loss": 0.2082, + "step": 9430 + }, + { + "epoch": 0.73, + "learning_rate": 1.2650264715042044e-05, + "loss": 0.2549, + "step": 9440 + }, + { + "epoch": 0.74, + "learning_rate": 1.2642478978511368e-05, + "loss": 0.2606, + "step": 9450 + }, + { + "epoch": 0.74, + "learning_rate": 1.2634693241980693e-05, + "loss": 0.2784, + "step": 9460 + }, + { + "epoch": 0.74, + "learning_rate": 1.2626907505450017e-05, + "loss": 0.2298, + "step": 9470 + }, + { + "epoch": 0.74, + "learning_rate": 1.261912176891934e-05, + "loss": 0.2609, + "step": 9480 + }, + { + "epoch": 0.74, + "learning_rate": 1.2611336032388665e-05, + "loss": 0.229, + "step": 9490 + }, + { + "epoch": 0.74, + "learning_rate": 1.2603550295857989e-05, + "loss": 0.1955, + "step": 9500 + }, + { + "epoch": 0.74, + "learning_rate": 1.2595764559327313e-05, + "loss": 0.2197, + "step": 9510 + }, + { + "epoch": 0.74, + "learning_rate": 1.2587978822796638e-05, + "loss": 0.2211, + "step": 9520 + }, + { + "epoch": 0.74, + "learning_rate": 1.2580193086265962e-05, + "loss": 0.2508, + "step": 9530 + }, + { + "epoch": 0.74, + "learning_rate": 1.2572407349735285e-05, + "loss": 0.2029, + "step": 9540 + }, + { + "epoch": 0.74, + "learning_rate": 1.256462161320461e-05, + "loss": 0.2052, + "step": 9550 + }, + { + "epoch": 0.74, + "learning_rate": 1.2556835876673934e-05, + "loss": 0.1875, + "step": 9560 + }, + { + "epoch": 0.75, + "learning_rate": 1.254905014014326e-05, + "loss": 0.2552, + "step": 9570 + }, + { + "epoch": 0.75, + "learning_rate": 1.2541264403612583e-05, + "loss": 0.2349, + "step": 9580 + }, + { + "epoch": 0.75, + "learning_rate": 1.2533478667081907e-05, + "loss": 0.2103, + "step": 9590 + }, + { + "epoch": 0.75, + "learning_rate": 1.2525692930551232e-05, + "loss": 0.2292, + "step": 9600 + }, + { + "epoch": 0.75, + "learning_rate": 1.2517907194020555e-05, + "loss": 0.2763, + "step": 9610 + }, + { + "epoch": 0.75, + "learning_rate": 1.2510121457489879e-05, + "loss": 0.186, + "step": 9620 + }, + { + "epoch": 0.75, + "learning_rate": 1.2502335720959204e-05, + "loss": 0.1946, + "step": 9630 + }, + { + "epoch": 0.75, + "learning_rate": 1.2494549984428528e-05, + "loss": 0.2341, + "step": 9640 + }, + { + "epoch": 0.75, + "learning_rate": 1.2486764247897852e-05, + "loss": 0.2072, + "step": 9650 + }, + { + "epoch": 0.75, + "learning_rate": 1.2478978511367177e-05, + "loss": 0.2128, + "step": 9660 + }, + { + "epoch": 0.75, + "learning_rate": 1.24711927748365e-05, + "loss": 0.2115, + "step": 9670 + }, + { + "epoch": 0.75, + "learning_rate": 1.2463407038305824e-05, + "loss": 0.2096, + "step": 9680 + }, + { + "epoch": 0.75, + "learning_rate": 1.245562130177515e-05, + "loss": 0.2418, + "step": 9690 + }, + { + "epoch": 0.76, + "learning_rate": 1.2447835565244473e-05, + "loss": 0.2884, + "step": 9700 + }, + { + "epoch": 0.76, + "learning_rate": 1.2440049828713798e-05, + "loss": 0.3362, + "step": 9710 + }, + { + "epoch": 0.76, + "learning_rate": 1.2432264092183122e-05, + "loss": 0.1847, + "step": 9720 + }, + { + "epoch": 0.76, + "learning_rate": 1.2424478355652445e-05, + "loss": 0.2206, + "step": 9730 + }, + { + "epoch": 0.76, + "learning_rate": 1.241669261912177e-05, + "loss": 0.2059, + "step": 9740 + }, + { + "epoch": 0.76, + "learning_rate": 1.2408906882591094e-05, + "loss": 0.2209, + "step": 9750 + }, + { + "epoch": 0.76, + "learning_rate": 1.2401121146060418e-05, + "loss": 0.2191, + "step": 9760 + }, + { + "epoch": 0.76, + "learning_rate": 1.2393335409529743e-05, + "loss": 0.2439, + "step": 9770 + }, + { + "epoch": 0.76, + "learning_rate": 1.2385549672999067e-05, + "loss": 0.2058, + "step": 9780 + }, + { + "epoch": 0.76, + "learning_rate": 1.237776393646839e-05, + "loss": 0.1416, + "step": 9790 + }, + { + "epoch": 0.76, + "learning_rate": 1.2369978199937716e-05, + "loss": 0.2964, + "step": 9800 + }, + { + "epoch": 0.76, + "learning_rate": 1.236219246340704e-05, + "loss": 0.2656, + "step": 9810 + }, + { + "epoch": 0.76, + "learning_rate": 1.2354406726876365e-05, + "loss": 0.2732, + "step": 9820 + }, + { + "epoch": 0.77, + "learning_rate": 1.2346620990345688e-05, + "loss": 0.2273, + "step": 9830 + }, + { + "epoch": 0.77, + "learning_rate": 1.2338835253815012e-05, + "loss": 0.245, + "step": 9840 + }, + { + "epoch": 0.77, + "learning_rate": 1.2331049517284337e-05, + "loss": 0.1903, + "step": 9850 + }, + { + "epoch": 0.77, + "learning_rate": 1.232326378075366e-05, + "loss": 0.219, + "step": 9860 + }, + { + "epoch": 0.77, + "learning_rate": 1.2315478044222984e-05, + "loss": 0.2203, + "step": 9870 + }, + { + "epoch": 0.77, + "learning_rate": 1.230769230769231e-05, + "loss": 0.3253, + "step": 9880 + }, + { + "epoch": 0.77, + "learning_rate": 1.2299906571161633e-05, + "loss": 0.1897, + "step": 9890 + }, + { + "epoch": 0.77, + "learning_rate": 1.2292120834630957e-05, + "loss": 0.3111, + "step": 9900 + }, + { + "epoch": 0.77, + "learning_rate": 1.2284335098100282e-05, + "loss": 0.2446, + "step": 9910 + }, + { + "epoch": 0.77, + "learning_rate": 1.2276549361569606e-05, + "loss": 0.2078, + "step": 9920 + }, + { + "epoch": 0.77, + "learning_rate": 1.2268763625038928e-05, + "loss": 0.1927, + "step": 9930 + }, + { + "epoch": 0.77, + "learning_rate": 1.2260977888508255e-05, + "loss": 0.2107, + "step": 9940 + }, + { + "epoch": 0.77, + "learning_rate": 1.2253192151977578e-05, + "loss": 0.1688, + "step": 9950 + }, + { + "epoch": 0.78, + "learning_rate": 1.2245406415446904e-05, + "loss": 0.2085, + "step": 9960 + }, + { + "epoch": 0.78, + "learning_rate": 1.2237620678916227e-05, + "loss": 0.1623, + "step": 9970 + }, + { + "epoch": 0.78, + "learning_rate": 1.222983494238555e-05, + "loss": 0.2193, + "step": 9980 + }, + { + "epoch": 0.78, + "learning_rate": 1.2222049205854876e-05, + "loss": 0.2748, + "step": 9990 + }, + { + "epoch": 0.78, + "learning_rate": 1.22142634693242e-05, + "loss": 0.2054, + "step": 10000 + }, + { + "epoch": 0.78, + "learning_rate": 1.2206477732793522e-05, + "loss": 0.1815, + "step": 10010 + }, + { + "epoch": 0.78, + "learning_rate": 1.2198691996262849e-05, + "loss": 0.2605, + "step": 10020 + }, + { + "epoch": 0.78, + "learning_rate": 1.2190906259732172e-05, + "loss": 0.2719, + "step": 10030 + }, + { + "epoch": 0.78, + "learning_rate": 1.2183120523201494e-05, + "loss": 0.2284, + "step": 10040 + }, + { + "epoch": 0.78, + "learning_rate": 1.2175334786670821e-05, + "loss": 0.2895, + "step": 10050 + }, + { + "epoch": 0.78, + "learning_rate": 1.2167549050140143e-05, + "loss": 0.2278, + "step": 10060 + }, + { + "epoch": 0.78, + "learning_rate": 1.215976331360947e-05, + "loss": 0.2383, + "step": 10070 + }, + { + "epoch": 0.78, + "learning_rate": 1.2151977577078794e-05, + "loss": 0.2191, + "step": 10080 + }, + { + "epoch": 0.79, + "learning_rate": 1.2144191840548116e-05, + "loss": 0.1865, + "step": 10090 + }, + { + "epoch": 0.79, + "learning_rate": 1.2136406104017443e-05, + "loss": 0.3054, + "step": 10100 + }, + { + "epoch": 0.79, + "learning_rate": 1.2128620367486765e-05, + "loss": 0.245, + "step": 10110 + }, + { + "epoch": 0.79, + "learning_rate": 1.2120834630956088e-05, + "loss": 0.2497, + "step": 10120 + }, + { + "epoch": 0.79, + "learning_rate": 1.2113048894425415e-05, + "loss": 0.2181, + "step": 10130 + }, + { + "epoch": 0.79, + "learning_rate": 1.2105263157894737e-05, + "loss": 0.1896, + "step": 10140 + }, + { + "epoch": 0.79, + "learning_rate": 1.209747742136406e-05, + "loss": 0.2177, + "step": 10150 + }, + { + "epoch": 0.79, + "learning_rate": 1.2089691684833386e-05, + "loss": 0.2672, + "step": 10160 + }, + { + "epoch": 0.79, + "learning_rate": 1.208190594830271e-05, + "loss": 0.2449, + "step": 10170 + }, + { + "epoch": 0.79, + "learning_rate": 1.2074120211772033e-05, + "loss": 0.2086, + "step": 10180 + }, + { + "epoch": 0.79, + "learning_rate": 1.2066334475241358e-05, + "loss": 0.2144, + "step": 10190 + }, + { + "epoch": 0.79, + "learning_rate": 1.2058548738710682e-05, + "loss": 0.212, + "step": 10200 + }, + { + "epoch": 0.79, + "learning_rate": 1.2050763002180007e-05, + "loss": 0.3234, + "step": 10210 + }, + { + "epoch": 0.8, + "learning_rate": 1.2042977265649331e-05, + "loss": 0.23, + "step": 10220 + }, + { + "epoch": 0.8, + "learning_rate": 1.2035191529118655e-05, + "loss": 0.2608, + "step": 10230 + }, + { + "epoch": 0.8, + "learning_rate": 1.202740579258798e-05, + "loss": 0.2053, + "step": 10240 + }, + { + "epoch": 0.8, + "learning_rate": 1.2019620056057303e-05, + "loss": 0.1606, + "step": 10250 + }, + { + "epoch": 0.8, + "learning_rate": 1.2011834319526627e-05, + "loss": 0.2766, + "step": 10260 + }, + { + "epoch": 0.8, + "learning_rate": 1.2004048582995952e-05, + "loss": 0.2145, + "step": 10270 + }, + { + "epoch": 0.8, + "learning_rate": 1.1996262846465276e-05, + "loss": 0.2709, + "step": 10280 + }, + { + "epoch": 0.8, + "learning_rate": 1.19884771099346e-05, + "loss": 0.2038, + "step": 10290 + }, + { + "epoch": 0.8, + "learning_rate": 1.1980691373403925e-05, + "loss": 0.2132, + "step": 10300 + }, + { + "epoch": 0.8, + "learning_rate": 1.1972905636873248e-05, + "loss": 0.2122, + "step": 10310 + }, + { + "epoch": 0.8, + "learning_rate": 1.1965119900342574e-05, + "loss": 0.2436, + "step": 10320 + }, + { + "epoch": 0.8, + "learning_rate": 1.1957334163811897e-05, + "loss": 0.2474, + "step": 10330 + }, + { + "epoch": 0.81, + "learning_rate": 1.1949548427281221e-05, + "loss": 0.2912, + "step": 10340 + }, + { + "epoch": 0.81, + "learning_rate": 1.1941762690750546e-05, + "loss": 0.1962, + "step": 10350 + }, + { + "epoch": 0.81, + "learning_rate": 1.193397695421987e-05, + "loss": 0.1368, + "step": 10360 + }, + { + "epoch": 0.81, + "learning_rate": 1.1926191217689194e-05, + "loss": 0.2864, + "step": 10370 + }, + { + "epoch": 0.81, + "learning_rate": 1.1918405481158519e-05, + "loss": 0.152, + "step": 10380 + }, + { + "epoch": 0.81, + "learning_rate": 1.1910619744627842e-05, + "loss": 0.1998, + "step": 10390 + }, + { + "epoch": 0.81, + "learning_rate": 1.1902834008097166e-05, + "loss": 0.256, + "step": 10400 + }, + { + "epoch": 0.81, + "learning_rate": 1.1895048271566491e-05, + "loss": 0.1882, + "step": 10410 + }, + { + "epoch": 0.81, + "learning_rate": 1.1887262535035815e-05, + "loss": 0.1613, + "step": 10420 + }, + { + "epoch": 0.81, + "learning_rate": 1.1879476798505139e-05, + "loss": 0.257, + "step": 10430 + }, + { + "epoch": 0.81, + "learning_rate": 1.1871691061974464e-05, + "loss": 0.253, + "step": 10440 + }, + { + "epoch": 0.81, + "learning_rate": 1.1863905325443787e-05, + "loss": 0.2488, + "step": 10450 + }, + { + "epoch": 0.81, + "learning_rate": 1.1856119588913113e-05, + "loss": 0.2032, + "step": 10460 + }, + { + "epoch": 0.82, + "learning_rate": 1.1848333852382436e-05, + "loss": 0.1911, + "step": 10470 + }, + { + "epoch": 0.82, + "learning_rate": 1.184054811585176e-05, + "loss": 0.1919, + "step": 10480 + }, + { + "epoch": 0.82, + "learning_rate": 1.1832762379321085e-05, + "loss": 0.2179, + "step": 10490 + }, + { + "epoch": 0.82, + "learning_rate": 1.1824976642790409e-05, + "loss": 0.1749, + "step": 10500 + }, + { + "epoch": 0.82, + "learning_rate": 1.1817190906259732e-05, + "loss": 0.2601, + "step": 10510 + }, + { + "epoch": 0.82, + "learning_rate": 1.1809405169729058e-05, + "loss": 0.1821, + "step": 10520 + }, + { + "epoch": 0.82, + "learning_rate": 1.1801619433198381e-05, + "loss": 0.1973, + "step": 10530 + }, + { + "epoch": 0.82, + "learning_rate": 1.1793833696667705e-05, + "loss": 0.2062, + "step": 10540 + }, + { + "epoch": 0.82, + "learning_rate": 1.178604796013703e-05, + "loss": 0.2057, + "step": 10550 + }, + { + "epoch": 0.82, + "learning_rate": 1.1778262223606354e-05, + "loss": 0.1592, + "step": 10560 + }, + { + "epoch": 0.82, + "learning_rate": 1.177047648707568e-05, + "loss": 0.2175, + "step": 10570 + }, + { + "epoch": 0.82, + "learning_rate": 1.1762690750545003e-05, + "loss": 0.1738, + "step": 10580 + }, + { + "epoch": 0.82, + "learning_rate": 1.1754905014014326e-05, + "loss": 0.2378, + "step": 10590 + }, + { + "epoch": 0.83, + "learning_rate": 1.1747119277483652e-05, + "loss": 0.1743, + "step": 10600 + }, + { + "epoch": 0.83, + "learning_rate": 1.1739333540952975e-05, + "loss": 0.2354, + "step": 10610 + }, + { + "epoch": 0.83, + "learning_rate": 1.1731547804422299e-05, + "loss": 0.1741, + "step": 10620 + }, + { + "epoch": 0.83, + "learning_rate": 1.1723762067891624e-05, + "loss": 0.1684, + "step": 10630 + }, + { + "epoch": 0.83, + "learning_rate": 1.1715976331360948e-05, + "loss": 0.2199, + "step": 10640 + }, + { + "epoch": 0.83, + "learning_rate": 1.1708190594830271e-05, + "loss": 0.2124, + "step": 10650 + }, + { + "epoch": 0.83, + "learning_rate": 1.1700404858299597e-05, + "loss": 0.2659, + "step": 10660 + }, + { + "epoch": 0.83, + "learning_rate": 1.169261912176892e-05, + "loss": 0.2294, + "step": 10670 + }, + { + "epoch": 0.83, + "learning_rate": 1.1684833385238244e-05, + "loss": 0.1339, + "step": 10680 + }, + { + "epoch": 0.83, + "learning_rate": 1.167704764870757e-05, + "loss": 0.2392, + "step": 10690 + }, + { + "epoch": 0.83, + "learning_rate": 1.1669261912176893e-05, + "loss": 0.1526, + "step": 10700 + }, + { + "epoch": 0.83, + "learning_rate": 1.1661476175646218e-05, + "loss": 0.1836, + "step": 10710 + }, + { + "epoch": 0.83, + "learning_rate": 1.1653690439115542e-05, + "loss": 0.2258, + "step": 10720 + }, + { + "epoch": 0.84, + "learning_rate": 1.1645904702584865e-05, + "loss": 0.1856, + "step": 10730 + }, + { + "epoch": 0.84, + "learning_rate": 1.163811896605419e-05, + "loss": 0.2287, + "step": 10740 + }, + { + "epoch": 0.84, + "learning_rate": 1.1630333229523514e-05, + "loss": 0.2017, + "step": 10750 + }, + { + "epoch": 0.84, + "learning_rate": 1.1622547492992838e-05, + "loss": 0.1682, + "step": 10760 + }, + { + "epoch": 0.84, + "learning_rate": 1.1614761756462163e-05, + "loss": 0.2564, + "step": 10770 + }, + { + "epoch": 0.84, + "learning_rate": 1.1606976019931487e-05, + "loss": 0.1868, + "step": 10780 + }, + { + "epoch": 0.84, + "learning_rate": 1.159919028340081e-05, + "loss": 0.1721, + "step": 10790 + }, + { + "epoch": 0.84, + "learning_rate": 1.1591404546870136e-05, + "loss": 0.2094, + "step": 10800 + }, + { + "epoch": 0.84, + "learning_rate": 1.158361881033946e-05, + "loss": 0.2318, + "step": 10810 + }, + { + "epoch": 0.84, + "learning_rate": 1.1575833073808785e-05, + "loss": 0.2198, + "step": 10820 + }, + { + "epoch": 0.84, + "learning_rate": 1.1568047337278108e-05, + "loss": 0.2162, + "step": 10830 + }, + { + "epoch": 0.84, + "learning_rate": 1.1560261600747432e-05, + "loss": 0.2424, + "step": 10840 + }, + { + "epoch": 0.84, + "learning_rate": 1.1552475864216757e-05, + "loss": 0.2163, + "step": 10850 + }, + { + "epoch": 0.85, + "learning_rate": 1.154469012768608e-05, + "loss": 0.1945, + "step": 10860 + }, + { + "epoch": 0.85, + "learning_rate": 1.1536904391155404e-05, + "loss": 0.1453, + "step": 10870 + }, + { + "epoch": 0.85, + "learning_rate": 1.152911865462473e-05, + "loss": 0.2658, + "step": 10880 + }, + { + "epoch": 0.85, + "learning_rate": 1.1521332918094053e-05, + "loss": 0.1744, + "step": 10890 + }, + { + "epoch": 0.85, + "learning_rate": 1.1513547181563375e-05, + "loss": 0.1922, + "step": 10900 + }, + { + "epoch": 0.85, + "learning_rate": 1.1505761445032702e-05, + "loss": 0.2594, + "step": 10910 + }, + { + "epoch": 0.85, + "learning_rate": 1.1497975708502026e-05, + "loss": 0.2348, + "step": 10920 + }, + { + "epoch": 0.85, + "learning_rate": 1.1490189971971348e-05, + "loss": 0.2265, + "step": 10930 + }, + { + "epoch": 0.85, + "learning_rate": 1.1482404235440675e-05, + "loss": 0.231, + "step": 10940 + }, + { + "epoch": 0.85, + "learning_rate": 1.1474618498909998e-05, + "loss": 0.2046, + "step": 10950 + }, + { + "epoch": 0.85, + "learning_rate": 1.1466832762379323e-05, + "loss": 0.2101, + "step": 10960 + }, + { + "epoch": 0.85, + "learning_rate": 1.1459047025848647e-05, + "loss": 0.224, + "step": 10970 + }, + { + "epoch": 0.85, + "learning_rate": 1.1451261289317969e-05, + "loss": 0.1564, + "step": 10980 + }, + { + "epoch": 0.86, + "learning_rate": 1.1443475552787296e-05, + "loss": 0.2457, + "step": 10990 + }, + { + "epoch": 0.86, + "learning_rate": 1.143568981625662e-05, + "loss": 0.2481, + "step": 11000 + }, + { + "epoch": 0.86, + "learning_rate": 1.1427904079725942e-05, + "loss": 0.2572, + "step": 11010 + }, + { + "epoch": 0.86, + "learning_rate": 1.1420118343195268e-05, + "loss": 0.1696, + "step": 11020 + }, + { + "epoch": 0.86, + "learning_rate": 1.141233260666459e-05, + "loss": 0.247, + "step": 11030 + }, + { + "epoch": 0.86, + "learning_rate": 1.1404546870133914e-05, + "loss": 0.2109, + "step": 11040 + }, + { + "epoch": 0.86, + "learning_rate": 1.1396761133603241e-05, + "loss": 0.1912, + "step": 11050 + }, + { + "epoch": 0.86, + "learning_rate": 1.1388975397072563e-05, + "loss": 0.2224, + "step": 11060 + }, + { + "epoch": 0.86, + "learning_rate": 1.138118966054189e-05, + "loss": 0.1924, + "step": 11070 + }, + { + "epoch": 0.86, + "learning_rate": 1.1373403924011212e-05, + "loss": 0.2314, + "step": 11080 + }, + { + "epoch": 0.86, + "learning_rate": 1.1365618187480535e-05, + "loss": 0.256, + "step": 11090 + }, + { + "epoch": 0.86, + "learning_rate": 1.1357832450949862e-05, + "loss": 0.2141, + "step": 11100 + }, + { + "epoch": 0.86, + "learning_rate": 1.1350046714419184e-05, + "loss": 0.1559, + "step": 11110 + }, + { + "epoch": 0.87, + "learning_rate": 1.1342260977888508e-05, + "loss": 0.1879, + "step": 11120 + }, + { + "epoch": 0.87, + "learning_rate": 1.1334475241357833e-05, + "loss": 0.1849, + "step": 11130 + }, + { + "epoch": 0.87, + "learning_rate": 1.1326689504827157e-05, + "loss": 0.1812, + "step": 11140 + }, + { + "epoch": 0.87, + "learning_rate": 1.131890376829648e-05, + "loss": 0.1766, + "step": 11150 + }, + { + "epoch": 0.87, + "learning_rate": 1.1311118031765806e-05, + "loss": 0.1857, + "step": 11160 + }, + { + "epoch": 0.87, + "learning_rate": 1.130333229523513e-05, + "loss": 0.1891, + "step": 11170 + }, + { + "epoch": 0.87, + "learning_rate": 1.1295546558704453e-05, + "loss": 0.2172, + "step": 11180 + }, + { + "epoch": 0.87, + "learning_rate": 1.1287760822173778e-05, + "loss": 0.2251, + "step": 11190 + }, + { + "epoch": 0.87, + "learning_rate": 1.1279975085643102e-05, + "loss": 0.2082, + "step": 11200 + }, + { + "epoch": 0.87, + "learning_rate": 1.1272189349112427e-05, + "loss": 0.1885, + "step": 11210 + }, + { + "epoch": 0.87, + "learning_rate": 1.126440361258175e-05, + "loss": 0.2178, + "step": 11220 + }, + { + "epoch": 0.87, + "learning_rate": 1.1256617876051074e-05, + "loss": 0.1842, + "step": 11230 + }, + { + "epoch": 0.88, + "learning_rate": 1.12488321395204e-05, + "loss": 0.221, + "step": 11240 + }, + { + "epoch": 0.88, + "learning_rate": 1.1241046402989723e-05, + "loss": 0.2308, + "step": 11250 + }, + { + "epoch": 0.88, + "learning_rate": 1.1233260666459047e-05, + "loss": 0.178, + "step": 11260 + }, + { + "epoch": 0.88, + "learning_rate": 1.1225474929928372e-05, + "loss": 0.266, + "step": 11270 + }, + { + "epoch": 0.88, + "learning_rate": 1.1217689193397696e-05, + "loss": 0.2185, + "step": 11280 + }, + { + "epoch": 0.88, + "learning_rate": 1.120990345686702e-05, + "loss": 0.1754, + "step": 11290 + }, + { + "epoch": 0.88, + "learning_rate": 1.1202117720336345e-05, + "loss": 0.181, + "step": 11300 + }, + { + "epoch": 0.88, + "learning_rate": 1.1194331983805668e-05, + "loss": 0.2556, + "step": 11310 + }, + { + "epoch": 0.88, + "learning_rate": 1.1186546247274994e-05, + "loss": 0.1735, + "step": 11320 + }, + { + "epoch": 0.88, + "learning_rate": 1.1178760510744317e-05, + "loss": 0.1829, + "step": 11330 + }, + { + "epoch": 0.88, + "learning_rate": 1.117097477421364e-05, + "loss": 0.2738, + "step": 11340 + }, + { + "epoch": 0.88, + "learning_rate": 1.1163189037682966e-05, + "loss": 0.2314, + "step": 11350 + }, + { + "epoch": 0.88, + "learning_rate": 1.115540330115229e-05, + "loss": 0.2023, + "step": 11360 + }, + { + "epoch": 0.89, + "learning_rate": 1.1147617564621613e-05, + "loss": 0.2612, + "step": 11370 + }, + { + "epoch": 0.89, + "learning_rate": 1.1139831828090939e-05, + "loss": 0.298, + "step": 11380 + }, + { + "epoch": 0.89, + "learning_rate": 1.1132046091560262e-05, + "loss": 0.2385, + "step": 11390 + }, + { + "epoch": 0.89, + "learning_rate": 1.1124260355029586e-05, + "loss": 0.2713, + "step": 11400 + }, + { + "epoch": 0.89, + "learning_rate": 1.1116474618498911e-05, + "loss": 0.1887, + "step": 11410 + }, + { + "epoch": 0.89, + "learning_rate": 1.1108688881968235e-05, + "loss": 0.2056, + "step": 11420 + }, + { + "epoch": 0.89, + "learning_rate": 1.1100903145437558e-05, + "loss": 0.2475, + "step": 11430 + }, + { + "epoch": 0.89, + "learning_rate": 1.1093117408906884e-05, + "loss": 0.1947, + "step": 11440 + }, + { + "epoch": 0.89, + "learning_rate": 1.1085331672376207e-05, + "loss": 0.2345, + "step": 11450 + }, + { + "epoch": 0.89, + "learning_rate": 1.1077545935845533e-05, + "loss": 0.2265, + "step": 11460 + }, + { + "epoch": 0.89, + "learning_rate": 1.1069760199314856e-05, + "loss": 0.2264, + "step": 11470 + }, + { + "epoch": 0.89, + "learning_rate": 1.106197446278418e-05, + "loss": 0.1302, + "step": 11480 + }, + { + "epoch": 0.89, + "learning_rate": 1.1054188726253505e-05, + "loss": 0.2255, + "step": 11490 + }, + { + "epoch": 0.9, + "learning_rate": 1.1046402989722829e-05, + "loss": 0.1996, + "step": 11500 + }, + { + "epoch": 0.9, + "learning_rate": 1.1038617253192152e-05, + "loss": 0.1856, + "step": 11510 + }, + { + "epoch": 0.9, + "learning_rate": 1.1030831516661478e-05, + "loss": 0.2494, + "step": 11520 + }, + { + "epoch": 0.9, + "learning_rate": 1.1023045780130801e-05, + "loss": 0.1817, + "step": 11530 + }, + { + "epoch": 0.9, + "learning_rate": 1.1015260043600125e-05, + "loss": 0.2428, + "step": 11540 + }, + { + "epoch": 0.9, + "learning_rate": 1.100747430706945e-05, + "loss": 0.1772, + "step": 11550 + }, + { + "epoch": 0.9, + "learning_rate": 1.0999688570538774e-05, + "loss": 0.1547, + "step": 11560 + }, + { + "epoch": 0.9, + "learning_rate": 1.0991902834008099e-05, + "loss": 0.1982, + "step": 11570 + }, + { + "epoch": 0.9, + "learning_rate": 1.0984117097477423e-05, + "loss": 0.1974, + "step": 11580 + }, + { + "epoch": 0.9, + "learning_rate": 1.0976331360946746e-05, + "loss": 0.2009, + "step": 11590 + }, + { + "epoch": 0.9, + "learning_rate": 1.0968545624416071e-05, + "loss": 0.1843, + "step": 11600 + }, + { + "epoch": 0.9, + "learning_rate": 1.0960759887885395e-05, + "loss": 0.2159, + "step": 11610 + }, + { + "epoch": 0.9, + "learning_rate": 1.0952974151354719e-05, + "loss": 0.2529, + "step": 11620 + }, + { + "epoch": 0.91, + "learning_rate": 1.0945188414824044e-05, + "loss": 0.1819, + "step": 11630 + }, + { + "epoch": 0.91, + "learning_rate": 1.0937402678293368e-05, + "loss": 0.1801, + "step": 11640 + }, + { + "epoch": 0.91, + "learning_rate": 1.0929616941762691e-05, + "loss": 0.1975, + "step": 11650 + }, + { + "epoch": 0.91, + "learning_rate": 1.0921831205232017e-05, + "loss": 0.2526, + "step": 11660 + }, + { + "epoch": 0.91, + "learning_rate": 1.091404546870134e-05, + "loss": 0.1886, + "step": 11670 + }, + { + "epoch": 0.91, + "learning_rate": 1.0906259732170664e-05, + "loss": 0.2286, + "step": 11680 + }, + { + "epoch": 0.91, + "learning_rate": 1.0898473995639989e-05, + "loss": 0.1451, + "step": 11690 + }, + { + "epoch": 0.91, + "learning_rate": 1.0890688259109313e-05, + "loss": 0.2952, + "step": 11700 + }, + { + "epoch": 0.91, + "learning_rate": 1.0882902522578638e-05, + "loss": 0.2533, + "step": 11710 + }, + { + "epoch": 0.91, + "learning_rate": 1.0875116786047962e-05, + "loss": 0.2076, + "step": 11720 + }, + { + "epoch": 0.91, + "learning_rate": 1.0867331049517285e-05, + "loss": 0.1807, + "step": 11730 + }, + { + "epoch": 0.91, + "learning_rate": 1.085954531298661e-05, + "loss": 0.2167, + "step": 11740 + }, + { + "epoch": 0.91, + "learning_rate": 1.0851759576455934e-05, + "loss": 0.2054, + "step": 11750 + }, + { + "epoch": 0.92, + "learning_rate": 1.0843973839925258e-05, + "loss": 0.2179, + "step": 11760 + }, + { + "epoch": 0.92, + "learning_rate": 1.0836188103394583e-05, + "loss": 0.2269, + "step": 11770 + }, + { + "epoch": 0.92, + "learning_rate": 1.0828402366863907e-05, + "loss": 0.166, + "step": 11780 + }, + { + "epoch": 0.92, + "learning_rate": 1.082061663033323e-05, + "loss": 0.2171, + "step": 11790 + }, + { + "epoch": 0.92, + "learning_rate": 1.0812830893802555e-05, + "loss": 0.1731, + "step": 11800 + }, + { + "epoch": 0.92, + "learning_rate": 1.0805045157271879e-05, + "loss": 0.2032, + "step": 11810 + }, + { + "epoch": 0.92, + "learning_rate": 1.0797259420741204e-05, + "loss": 0.1816, + "step": 11820 + }, + { + "epoch": 0.92, + "learning_rate": 1.0789473684210528e-05, + "loss": 0.1449, + "step": 11830 + }, + { + "epoch": 0.92, + "learning_rate": 1.0781687947679852e-05, + "loss": 0.275, + "step": 11840 + }, + { + "epoch": 0.92, + "learning_rate": 1.0773902211149177e-05, + "loss": 0.2051, + "step": 11850 + }, + { + "epoch": 0.92, + "learning_rate": 1.07661164746185e-05, + "loss": 0.2313, + "step": 11860 + }, + { + "epoch": 0.92, + "learning_rate": 1.0758330738087824e-05, + "loss": 0.218, + "step": 11870 + }, + { + "epoch": 0.92, + "learning_rate": 1.075054500155715e-05, + "loss": 0.2417, + "step": 11880 + }, + { + "epoch": 0.93, + "learning_rate": 1.0742759265026473e-05, + "loss": 0.2445, + "step": 11890 + }, + { + "epoch": 0.93, + "learning_rate": 1.0734973528495795e-05, + "loss": 0.1577, + "step": 11900 + }, + { + "epoch": 0.93, + "learning_rate": 1.0727187791965122e-05, + "loss": 0.1642, + "step": 11910 + }, + { + "epoch": 0.93, + "learning_rate": 1.0719402055434445e-05, + "loss": 0.1732, + "step": 11920 + }, + { + "epoch": 0.93, + "learning_rate": 1.0711616318903767e-05, + "loss": 0.2135, + "step": 11930 + }, + { + "epoch": 0.93, + "learning_rate": 1.0703830582373094e-05, + "loss": 0.2901, + "step": 11940 + }, + { + "epoch": 0.93, + "learning_rate": 1.0696044845842416e-05, + "loss": 0.205, + "step": 11950 + }, + { + "epoch": 0.93, + "learning_rate": 1.0688259109311743e-05, + "loss": 0.1833, + "step": 11960 + }, + { + "epoch": 0.93, + "learning_rate": 1.0680473372781067e-05, + "loss": 0.1664, + "step": 11970 + }, + { + "epoch": 0.93, + "learning_rate": 1.0672687636250389e-05, + "loss": 0.1935, + "step": 11980 + }, + { + "epoch": 0.93, + "learning_rate": 1.0664901899719716e-05, + "loss": 0.2205, + "step": 11990 + }, + { + "epoch": 0.93, + "learning_rate": 1.0657116163189038e-05, + "loss": 0.1953, + "step": 12000 + }, + { + "epoch": 0.94, + "learning_rate": 1.0649330426658361e-05, + "loss": 0.1638, + "step": 12010 + }, + { + "epoch": 0.94, + "learning_rate": 1.0641544690127688e-05, + "loss": 0.2432, + "step": 12020 + }, + { + "epoch": 0.94, + "learning_rate": 1.063375895359701e-05, + "loss": 0.1405, + "step": 12030 + }, + { + "epoch": 0.94, + "learning_rate": 1.0625973217066334e-05, + "loss": 0.1838, + "step": 12040 + }, + { + "epoch": 0.94, + "learning_rate": 1.0618187480535659e-05, + "loss": 0.2223, + "step": 12050 + }, + { + "epoch": 0.94, + "learning_rate": 1.0610401744004983e-05, + "loss": 0.1746, + "step": 12060 + }, + { + "epoch": 0.94, + "learning_rate": 1.060261600747431e-05, + "loss": 0.2268, + "step": 12070 + }, + { + "epoch": 0.94, + "learning_rate": 1.0594830270943632e-05, + "loss": 0.1739, + "step": 12080 + }, + { + "epoch": 0.94, + "learning_rate": 1.0587044534412955e-05, + "loss": 0.1762, + "step": 12090 + }, + { + "epoch": 0.94, + "learning_rate": 1.057925879788228e-05, + "loss": 0.2067, + "step": 12100 + }, + { + "epoch": 0.94, + "learning_rate": 1.0571473061351604e-05, + "loss": 0.1499, + "step": 12110 + }, + { + "epoch": 0.94, + "learning_rate": 1.0563687324820928e-05, + "loss": 0.1828, + "step": 12120 + }, + { + "epoch": 0.94, + "learning_rate": 1.0555901588290253e-05, + "loss": 0.1995, + "step": 12130 + }, + { + "epoch": 0.95, + "learning_rate": 1.0548115851759577e-05, + "loss": 0.2062, + "step": 12140 + }, + { + "epoch": 0.95, + "learning_rate": 1.05403301152289e-05, + "loss": 0.2472, + "step": 12150 + }, + { + "epoch": 0.95, + "learning_rate": 1.0532544378698226e-05, + "loss": 0.2155, + "step": 12160 + }, + { + "epoch": 0.95, + "learning_rate": 1.052475864216755e-05, + "loss": 0.2805, + "step": 12170 + }, + { + "epoch": 0.95, + "learning_rate": 1.0516972905636873e-05, + "loss": 0.1677, + "step": 12180 + }, + { + "epoch": 0.95, + "learning_rate": 1.0509187169106198e-05, + "loss": 0.1805, + "step": 12190 + }, + { + "epoch": 0.95, + "learning_rate": 1.0501401432575522e-05, + "loss": 0.2232, + "step": 12200 + }, + { + "epoch": 0.95, + "learning_rate": 1.0493615696044847e-05, + "loss": 0.2086, + "step": 12210 + }, + { + "epoch": 0.95, + "learning_rate": 1.048582995951417e-05, + "loss": 0.1905, + "step": 12220 + }, + { + "epoch": 0.95, + "learning_rate": 1.0478044222983494e-05, + "loss": 0.2523, + "step": 12230 + }, + { + "epoch": 0.95, + "learning_rate": 1.047025848645282e-05, + "loss": 0.1813, + "step": 12240 + }, + { + "epoch": 0.95, + "learning_rate": 1.0462472749922143e-05, + "loss": 0.1759, + "step": 12250 + }, + { + "epoch": 0.95, + "learning_rate": 1.0454687013391467e-05, + "loss": 0.1958, + "step": 12260 + }, + { + "epoch": 0.96, + "learning_rate": 1.0446901276860792e-05, + "loss": 0.143, + "step": 12270 + }, + { + "epoch": 0.96, + "learning_rate": 1.0439115540330116e-05, + "loss": 0.2086, + "step": 12280 + }, + { + "epoch": 0.96, + "learning_rate": 1.043132980379944e-05, + "loss": 0.2147, + "step": 12290 + }, + { + "epoch": 0.96, + "learning_rate": 1.0423544067268765e-05, + "loss": 0.2334, + "step": 12300 + }, + { + "epoch": 0.96, + "learning_rate": 1.0415758330738088e-05, + "loss": 0.1718, + "step": 12310 + }, + { + "epoch": 0.96, + "learning_rate": 1.0407972594207413e-05, + "loss": 0.2122, + "step": 12320 + }, + { + "epoch": 0.96, + "learning_rate": 1.0400186857676737e-05, + "loss": 0.2273, + "step": 12330 + }, + { + "epoch": 0.96, + "learning_rate": 1.039240112114606e-05, + "loss": 0.1885, + "step": 12340 + }, + { + "epoch": 0.96, + "learning_rate": 1.0384615384615386e-05, + "loss": 0.217, + "step": 12350 + }, + { + "epoch": 0.96, + "learning_rate": 1.037682964808471e-05, + "loss": 0.2304, + "step": 12360 + }, + { + "epoch": 0.96, + "learning_rate": 1.0369043911554033e-05, + "loss": 0.178, + "step": 12370 + }, + { + "epoch": 0.96, + "learning_rate": 1.0361258175023358e-05, + "loss": 0.1575, + "step": 12380 + }, + { + "epoch": 0.96, + "learning_rate": 1.0353472438492682e-05, + "loss": 0.227, + "step": 12390 + }, + { + "epoch": 0.97, + "learning_rate": 1.0345686701962006e-05, + "loss": 0.2079, + "step": 12400 + }, + { + "epoch": 0.97, + "learning_rate": 1.0337900965431331e-05, + "loss": 0.1606, + "step": 12410 + }, + { + "epoch": 0.97, + "learning_rate": 1.0330115228900655e-05, + "loss": 0.2617, + "step": 12420 + }, + { + "epoch": 0.97, + "learning_rate": 1.0322329492369978e-05, + "loss": 0.1823, + "step": 12430 + }, + { + "epoch": 0.97, + "learning_rate": 1.0314543755839303e-05, + "loss": 0.1926, + "step": 12440 + }, + { + "epoch": 0.97, + "learning_rate": 1.0306758019308627e-05, + "loss": 0.183, + "step": 12450 + }, + { + "epoch": 0.97, + "learning_rate": 1.0298972282777952e-05, + "loss": 0.1285, + "step": 12460 + }, + { + "epoch": 0.97, + "learning_rate": 1.0291186546247276e-05, + "loss": 0.1676, + "step": 12470 + }, + { + "epoch": 0.97, + "learning_rate": 1.02834008097166e-05, + "loss": 0.2193, + "step": 12480 + }, + { + "epoch": 0.97, + "learning_rate": 1.0275615073185925e-05, + "loss": 0.1855, + "step": 12490 + }, + { + "epoch": 0.97, + "learning_rate": 1.0267829336655248e-05, + "loss": 0.1543, + "step": 12500 + }, + { + "epoch": 0.97, + "learning_rate": 1.0260043600124572e-05, + "loss": 0.1837, + "step": 12510 + }, + { + "epoch": 0.97, + "learning_rate": 1.0252257863593897e-05, + "loss": 0.2156, + "step": 12520 + }, + { + "epoch": 0.98, + "learning_rate": 1.0244472127063221e-05, + "loss": 0.2071, + "step": 12530 + }, + { + "epoch": 0.98, + "learning_rate": 1.0236686390532545e-05, + "loss": 0.2326, + "step": 12540 + }, + { + "epoch": 0.98, + "learning_rate": 1.022890065400187e-05, + "loss": 0.1803, + "step": 12550 + }, + { + "epoch": 0.98, + "learning_rate": 1.0221114917471193e-05, + "loss": 0.206, + "step": 12560 + }, + { + "epoch": 0.98, + "learning_rate": 1.0213329180940519e-05, + "loss": 0.2131, + "step": 12570 + }, + { + "epoch": 0.98, + "learning_rate": 1.0205543444409842e-05, + "loss": 0.2522, + "step": 12580 + }, + { + "epoch": 0.98, + "learning_rate": 1.0197757707879166e-05, + "loss": 0.1994, + "step": 12590 + }, + { + "epoch": 0.98, + "learning_rate": 1.0189971971348491e-05, + "loss": 0.1764, + "step": 12600 + }, + { + "epoch": 0.98, + "learning_rate": 1.0182186234817815e-05, + "loss": 0.1961, + "step": 12610 + }, + { + "epoch": 0.98, + "learning_rate": 1.0174400498287139e-05, + "loss": 0.2062, + "step": 12620 + }, + { + "epoch": 0.98, + "learning_rate": 1.0166614761756464e-05, + "loss": 0.1866, + "step": 12630 + }, + { + "epoch": 0.98, + "learning_rate": 1.0158829025225787e-05, + "loss": 0.2196, + "step": 12640 + }, + { + "epoch": 0.98, + "learning_rate": 1.0151043288695111e-05, + "loss": 0.1873, + "step": 12650 + }, + { + "epoch": 0.99, + "learning_rate": 1.0143257552164436e-05, + "loss": 0.1491, + "step": 12660 + }, + { + "epoch": 0.99, + "learning_rate": 1.013547181563376e-05, + "loss": 0.2145, + "step": 12670 + }, + { + "epoch": 0.99, + "learning_rate": 1.0127686079103084e-05, + "loss": 0.2338, + "step": 12680 + }, + { + "epoch": 0.99, + "learning_rate": 1.0119900342572409e-05, + "loss": 0.1597, + "step": 12690 + }, + { + "epoch": 0.99, + "learning_rate": 1.0112114606041732e-05, + "loss": 0.1908, + "step": 12700 + }, + { + "epoch": 0.99, + "learning_rate": 1.0104328869511058e-05, + "loss": 0.1368, + "step": 12710 + }, + { + "epoch": 0.99, + "learning_rate": 1.0096543132980381e-05, + "loss": 0.2238, + "step": 12720 + }, + { + "epoch": 0.99, + "learning_rate": 1.0088757396449705e-05, + "loss": 0.1706, + "step": 12730 + }, + { + "epoch": 0.99, + "learning_rate": 1.008097165991903e-05, + "loss": 0.1616, + "step": 12740 + }, + { + "epoch": 0.99, + "learning_rate": 1.0073185923388354e-05, + "loss": 0.1904, + "step": 12750 + }, + { + "epoch": 0.99, + "learning_rate": 1.0065400186857677e-05, + "loss": 0.2075, + "step": 12760 + }, + { + "epoch": 0.99, + "learning_rate": 1.0057614450327003e-05, + "loss": 0.1753, + "step": 12770 + }, + { + "epoch": 1.0, + "learning_rate": 1.0049828713796326e-05, + "loss": 0.2173, + "step": 12780 + }, + { + "epoch": 1.0, + "learning_rate": 1.004204297726565e-05, + "loss": 0.1809, + "step": 12790 + }, + { + "epoch": 1.0, + "learning_rate": 1.0034257240734975e-05, + "loss": 0.1903, + "step": 12800 + }, + { + "epoch": 1.0, + "learning_rate": 1.0026471504204299e-05, + "loss": 0.1887, + "step": 12810 + }, + { + "epoch": 1.0, + "learning_rate": 1.0018685767673624e-05, + "loss": 0.203, + "step": 12820 + }, + { + "epoch": 1.0, + "learning_rate": 1.0010900031142948e-05, + "loss": 0.2308, + "step": 12830 + }, + { + "epoch": 1.0, + "learning_rate": 1.0003114294612271e-05, + "loss": 0.1726, + "step": 12840 + }, + { + "epoch": 1.0, + "learning_rate": 9.995328558081595e-06, + "loss": 0.1402, + "step": 12850 + }, + { + "epoch": 1.0, + "learning_rate": 9.98754282155092e-06, + "loss": 0.0868, + "step": 12860 + }, + { + "epoch": 1.0, + "learning_rate": 9.979757085020244e-06, + "loss": 0.1362, + "step": 12870 + }, + { + "epoch": 1.0, + "learning_rate": 9.971971348489567e-06, + "loss": 0.0793, + "step": 12880 + }, + { + "epoch": 1.0, + "learning_rate": 9.964185611958893e-06, + "loss": 0.1042, + "step": 12890 + }, + { + "epoch": 1.0, + "learning_rate": 9.956399875428216e-06, + "loss": 0.0904, + "step": 12900 + }, + { + "epoch": 1.01, + "learning_rate": 9.948614138897542e-06, + "loss": 0.1634, + "step": 12910 + }, + { + "epoch": 1.01, + "learning_rate": 9.940828402366864e-06, + "loss": 0.1025, + "step": 12920 + }, + { + "epoch": 1.01, + "learning_rate": 9.933042665836189e-06, + "loss": 0.1222, + "step": 12930 + }, + { + "epoch": 1.01, + "learning_rate": 9.925256929305514e-06, + "loss": 0.1175, + "step": 12940 + }, + { + "epoch": 1.01, + "learning_rate": 9.917471192774836e-06, + "loss": 0.084, + "step": 12950 + }, + { + "epoch": 1.01, + "learning_rate": 9.909685456244161e-06, + "loss": 0.0964, + "step": 12960 + }, + { + "epoch": 1.01, + "learning_rate": 9.901899719713485e-06, + "loss": 0.0858, + "step": 12970 + }, + { + "epoch": 1.01, + "learning_rate": 9.89411398318281e-06, + "loss": 0.117, + "step": 12980 + }, + { + "epoch": 1.01, + "learning_rate": 9.886328246652134e-06, + "loss": 0.089, + "step": 12990 + }, + { + "epoch": 1.01, + "learning_rate": 9.878542510121458e-06, + "loss": 0.0906, + "step": 13000 + }, + { + "epoch": 1.01, + "learning_rate": 9.870756773590783e-06, + "loss": 0.0955, + "step": 13010 + }, + { + "epoch": 1.01, + "learning_rate": 9.862971037060106e-06, + "loss": 0.0976, + "step": 13020 + }, + { + "epoch": 1.01, + "learning_rate": 9.85518530052943e-06, + "loss": 0.1087, + "step": 13030 + }, + { + "epoch": 1.02, + "learning_rate": 9.847399563998755e-06, + "loss": 0.1139, + "step": 13040 + }, + { + "epoch": 1.02, + "learning_rate": 9.839613827468079e-06, + "loss": 0.1174, + "step": 13050 + }, + { + "epoch": 1.02, + "learning_rate": 9.831828090937403e-06, + "loss": 0.1103, + "step": 13060 + }, + { + "epoch": 1.02, + "learning_rate": 9.824042354406728e-06, + "loss": 0.148, + "step": 13070 + }, + { + "epoch": 1.02, + "learning_rate": 9.816256617876051e-06, + "loss": 0.0997, + "step": 13080 + }, + { + "epoch": 1.02, + "learning_rate": 9.808470881345377e-06, + "loss": 0.0874, + "step": 13090 + }, + { + "epoch": 1.02, + "learning_rate": 9.8006851448147e-06, + "loss": 0.1347, + "step": 13100 + }, + { + "epoch": 1.02, + "learning_rate": 9.792899408284024e-06, + "loss": 0.1064, + "step": 13110 + }, + { + "epoch": 1.02, + "learning_rate": 9.78511367175335e-06, + "loss": 0.09, + "step": 13120 + }, + { + "epoch": 1.02, + "learning_rate": 9.777327935222673e-06, + "loss": 0.1006, + "step": 13130 + }, + { + "epoch": 1.02, + "learning_rate": 9.769542198691996e-06, + "loss": 0.0901, + "step": 13140 + }, + { + "epoch": 1.02, + "learning_rate": 9.761756462161322e-06, + "loss": 0.0818, + "step": 13150 + }, + { + "epoch": 1.02, + "learning_rate": 9.753970725630645e-06, + "loss": 0.1102, + "step": 13160 + }, + { + "epoch": 1.03, + "learning_rate": 9.746184989099969e-06, + "loss": 0.1036, + "step": 13170 + }, + { + "epoch": 1.03, + "learning_rate": 9.738399252569294e-06, + "loss": 0.0712, + "step": 13180 + }, + { + "epoch": 1.03, + "learning_rate": 9.730613516038618e-06, + "loss": 0.1214, + "step": 13190 + }, + { + "epoch": 1.03, + "learning_rate": 9.722827779507941e-06, + "loss": 0.0889, + "step": 13200 + }, + { + "epoch": 1.03, + "learning_rate": 9.715042042977267e-06, + "loss": 0.1578, + "step": 13210 + }, + { + "epoch": 1.03, + "learning_rate": 9.70725630644659e-06, + "loss": 0.1033, + "step": 13220 + }, + { + "epoch": 1.03, + "learning_rate": 9.699470569915916e-06, + "loss": 0.0833, + "step": 13230 + }, + { + "epoch": 1.03, + "learning_rate": 9.69168483338524e-06, + "loss": 0.0876, + "step": 13240 + }, + { + "epoch": 1.03, + "learning_rate": 9.683899096854563e-06, + "loss": 0.0916, + "step": 13250 + }, + { + "epoch": 1.03, + "learning_rate": 9.676113360323888e-06, + "loss": 0.0962, + "step": 13260 + }, + { + "epoch": 1.03, + "learning_rate": 9.668327623793212e-06, + "loss": 0.075, + "step": 13270 + }, + { + "epoch": 1.03, + "learning_rate": 9.660541887262535e-06, + "loss": 0.1243, + "step": 13280 + }, + { + "epoch": 1.03, + "learning_rate": 9.65275615073186e-06, + "loss": 0.1184, + "step": 13290 + }, + { + "epoch": 1.04, + "learning_rate": 9.644970414201184e-06, + "loss": 0.0915, + "step": 13300 + }, + { + "epoch": 1.04, + "learning_rate": 9.637184677670508e-06, + "loss": 0.0818, + "step": 13310 + }, + { + "epoch": 1.04, + "learning_rate": 9.629398941139833e-06, + "loss": 0.0907, + "step": 13320 + }, + { + "epoch": 1.04, + "learning_rate": 9.621613204609157e-06, + "loss": 0.1174, + "step": 13330 + }, + { + "epoch": 1.04, + "learning_rate": 9.613827468078482e-06, + "loss": 0.0875, + "step": 13340 + }, + { + "epoch": 1.04, + "learning_rate": 9.606041731547806e-06, + "loss": 0.1012, + "step": 13350 + }, + { + "epoch": 1.04, + "learning_rate": 9.59825599501713e-06, + "loss": 0.1012, + "step": 13360 + }, + { + "epoch": 1.04, + "learning_rate": 9.590470258486455e-06, + "loss": 0.1122, + "step": 13370 + }, + { + "epoch": 1.04, + "learning_rate": 9.582684521955777e-06, + "loss": 0.1086, + "step": 13380 + }, + { + "epoch": 1.04, + "learning_rate": 9.574898785425102e-06, + "loss": 0.1006, + "step": 13390 + }, + { + "epoch": 1.04, + "learning_rate": 9.567113048894427e-06, + "loss": 0.0776, + "step": 13400 + }, + { + "epoch": 1.04, + "learning_rate": 9.55932731236375e-06, + "loss": 0.0899, + "step": 13410 + }, + { + "epoch": 1.04, + "learning_rate": 9.551541575833074e-06, + "loss": 0.1003, + "step": 13420 + }, + { + "epoch": 1.05, + "learning_rate": 9.543755839302398e-06, + "loss": 0.0925, + "step": 13430 + }, + { + "epoch": 1.05, + "learning_rate": 9.535970102771723e-06, + "loss": 0.1014, + "step": 13440 + }, + { + "epoch": 1.05, + "learning_rate": 9.528184366241047e-06, + "loss": 0.098, + "step": 13450 + }, + { + "epoch": 1.05, + "learning_rate": 9.52039862971037e-06, + "loss": 0.1273, + "step": 13460 + }, + { + "epoch": 1.05, + "learning_rate": 9.512612893179696e-06, + "loss": 0.101, + "step": 13470 + }, + { + "epoch": 1.05, + "learning_rate": 9.50482715664902e-06, + "loss": 0.0918, + "step": 13480 + }, + { + "epoch": 1.05, + "learning_rate": 9.497041420118343e-06, + "loss": 0.1491, + "step": 13490 + }, + { + "epoch": 1.05, + "learning_rate": 9.489255683587668e-06, + "loss": 0.096, + "step": 13500 + }, + { + "epoch": 1.05, + "learning_rate": 9.481469947056992e-06, + "loss": 0.1107, + "step": 13510 + }, + { + "epoch": 1.05, + "learning_rate": 9.473684210526315e-06, + "loss": 0.0973, + "step": 13520 + }, + { + "epoch": 1.05, + "learning_rate": 9.46589847399564e-06, + "loss": 0.1056, + "step": 13530 + }, + { + "epoch": 1.05, + "learning_rate": 9.458112737464964e-06, + "loss": 0.1128, + "step": 13540 + }, + { + "epoch": 1.05, + "learning_rate": 9.45032700093429e-06, + "loss": 0.1049, + "step": 13550 + }, + { + "epoch": 1.06, + "learning_rate": 9.442541264403613e-06, + "loss": 0.1248, + "step": 13560 + }, + { + "epoch": 1.06, + "learning_rate": 9.434755527872937e-06, + "loss": 0.1062, + "step": 13570 + }, + { + "epoch": 1.06, + "learning_rate": 9.426969791342262e-06, + "loss": 0.1301, + "step": 13580 + }, + { + "epoch": 1.06, + "learning_rate": 9.419184054811586e-06, + "loss": 0.0943, + "step": 13590 + }, + { + "epoch": 1.06, + "learning_rate": 9.41139831828091e-06, + "loss": 0.099, + "step": 13600 + }, + { + "epoch": 1.06, + "learning_rate": 9.403612581750235e-06, + "loss": 0.0961, + "step": 13610 + }, + { + "epoch": 1.06, + "learning_rate": 9.395826845219558e-06, + "loss": 0.0944, + "step": 13620 + }, + { + "epoch": 1.06, + "learning_rate": 9.388041108688882e-06, + "loss": 0.0837, + "step": 13630 + }, + { + "epoch": 1.06, + "learning_rate": 9.380255372158207e-06, + "loss": 0.117, + "step": 13640 + }, + { + "epoch": 1.06, + "learning_rate": 9.37246963562753e-06, + "loss": 0.1119, + "step": 13650 + }, + { + "epoch": 1.06, + "learning_rate": 9.364683899096856e-06, + "loss": 0.1023, + "step": 13660 + }, + { + "epoch": 1.06, + "learning_rate": 9.35689816256618e-06, + "loss": 0.1201, + "step": 13670 + }, + { + "epoch": 1.07, + "learning_rate": 9.349112426035503e-06, + "loss": 0.0881, + "step": 13680 + }, + { + "epoch": 1.07, + "learning_rate": 9.341326689504829e-06, + "loss": 0.1116, + "step": 13690 + }, + { + "epoch": 1.07, + "learning_rate": 9.333540952974152e-06, + "loss": 0.1148, + "step": 13700 + }, + { + "epoch": 1.07, + "learning_rate": 9.325755216443476e-06, + "loss": 0.1727, + "step": 13710 + }, + { + "epoch": 1.07, + "learning_rate": 9.317969479912801e-06, + "loss": 0.0856, + "step": 13720 + }, + { + "epoch": 1.07, + "learning_rate": 9.310183743382125e-06, + "loss": 0.109, + "step": 13730 + }, + { + "epoch": 1.07, + "learning_rate": 9.302398006851448e-06, + "loss": 0.1113, + "step": 13740 + }, + { + "epoch": 1.07, + "learning_rate": 9.294612270320774e-06, + "loss": 0.1035, + "step": 13750 + }, + { + "epoch": 1.07, + "learning_rate": 9.286826533790097e-06, + "loss": 0.1176, + "step": 13760 + }, + { + "epoch": 1.07, + "learning_rate": 9.279040797259421e-06, + "loss": 0.1036, + "step": 13770 + }, + { + "epoch": 1.07, + "learning_rate": 9.271255060728746e-06, + "loss": 0.0757, + "step": 13780 + }, + { + "epoch": 1.07, + "learning_rate": 9.26346932419807e-06, + "loss": 0.0792, + "step": 13790 + }, + { + "epoch": 1.07, + "learning_rate": 9.255683587667395e-06, + "loss": 0.0927, + "step": 13800 + }, + { + "epoch": 1.08, + "learning_rate": 9.247897851136719e-06, + "loss": 0.0981, + "step": 13810 + }, + { + "epoch": 1.08, + "learning_rate": 9.240112114606042e-06, + "loss": 0.0994, + "step": 13820 + }, + { + "epoch": 1.08, + "learning_rate": 9.232326378075368e-06, + "loss": 0.1099, + "step": 13830 + }, + { + "epoch": 1.08, + "learning_rate": 9.224540641544691e-06, + "loss": 0.0999, + "step": 13840 + }, + { + "epoch": 1.08, + "learning_rate": 9.216754905014015e-06, + "loss": 0.0755, + "step": 13850 + }, + { + "epoch": 1.08, + "learning_rate": 9.20896916848334e-06, + "loss": 0.1006, + "step": 13860 + }, + { + "epoch": 1.08, + "learning_rate": 9.201183431952664e-06, + "loss": 0.0706, + "step": 13870 + }, + { + "epoch": 1.08, + "learning_rate": 9.193397695421987e-06, + "loss": 0.0903, + "step": 13880 + }, + { + "epoch": 1.08, + "learning_rate": 9.185611958891311e-06, + "loss": 0.1197, + "step": 13890 + }, + { + "epoch": 1.08, + "learning_rate": 9.177826222360636e-06, + "loss": 0.1052, + "step": 13900 + }, + { + "epoch": 1.08, + "learning_rate": 9.170040485829962e-06, + "loss": 0.0887, + "step": 13910 + }, + { + "epoch": 1.08, + "learning_rate": 9.162254749299283e-06, + "loss": 0.1081, + "step": 13920 + }, + { + "epoch": 1.08, + "learning_rate": 9.154469012768609e-06, + "loss": 0.1074, + "step": 13930 + }, + { + "epoch": 1.09, + "learning_rate": 9.146683276237934e-06, + "loss": 0.072, + "step": 13940 + }, + { + "epoch": 1.09, + "learning_rate": 9.138897539707256e-06, + "loss": 0.1217, + "step": 13950 + }, + { + "epoch": 1.09, + "learning_rate": 9.131111803176581e-06, + "loss": 0.0872, + "step": 13960 + }, + { + "epoch": 1.09, + "learning_rate": 9.123326066645905e-06, + "loss": 0.0789, + "step": 13970 + }, + { + "epoch": 1.09, + "learning_rate": 9.11554033011523e-06, + "loss": 0.0576, + "step": 13980 + }, + { + "epoch": 1.09, + "learning_rate": 9.107754593584554e-06, + "loss": 0.0967, + "step": 13990 + }, + { + "epoch": 1.09, + "learning_rate": 9.099968857053877e-06, + "loss": 0.0982, + "step": 14000 + }, + { + "epoch": 1.09, + "learning_rate": 9.092183120523203e-06, + "loss": 0.1194, + "step": 14010 + }, + { + "epoch": 1.09, + "learning_rate": 9.084397383992526e-06, + "loss": 0.0902, + "step": 14020 + }, + { + "epoch": 1.09, + "learning_rate": 9.07661164746185e-06, + "loss": 0.0964, + "step": 14030 + }, + { + "epoch": 1.09, + "learning_rate": 9.068825910931175e-06, + "loss": 0.1205, + "step": 14040 + }, + { + "epoch": 1.09, + "learning_rate": 9.061040174400499e-06, + "loss": 0.1087, + "step": 14050 + }, + { + "epoch": 1.09, + "learning_rate": 9.053254437869822e-06, + "loss": 0.1053, + "step": 14060 + }, + { + "epoch": 1.1, + "learning_rate": 9.045468701339148e-06, + "loss": 0.1332, + "step": 14070 + }, + { + "epoch": 1.1, + "learning_rate": 9.037682964808471e-06, + "loss": 0.1147, + "step": 14080 + }, + { + "epoch": 1.1, + "learning_rate": 9.029897228277797e-06, + "loss": 0.1198, + "step": 14090 + }, + { + "epoch": 1.1, + "learning_rate": 9.02211149174712e-06, + "loss": 0.0964, + "step": 14100 + }, + { + "epoch": 1.1, + "learning_rate": 9.014325755216444e-06, + "loss": 0.0887, + "step": 14110 + }, + { + "epoch": 1.1, + "learning_rate": 9.006540018685769e-06, + "loss": 0.0665, + "step": 14120 + }, + { + "epoch": 1.1, + "learning_rate": 8.998754282155093e-06, + "loss": 0.1118, + "step": 14130 + }, + { + "epoch": 1.1, + "learning_rate": 8.990968545624416e-06, + "loss": 0.1073, + "step": 14140 + }, + { + "epoch": 1.1, + "learning_rate": 8.983182809093742e-06, + "loss": 0.1199, + "step": 14150 + }, + { + "epoch": 1.1, + "learning_rate": 8.975397072563065e-06, + "loss": 0.1028, + "step": 14160 + }, + { + "epoch": 1.1, + "learning_rate": 8.967611336032389e-06, + "loss": 0.0827, + "step": 14170 + }, + { + "epoch": 1.1, + "learning_rate": 8.959825599501714e-06, + "loss": 0.0956, + "step": 14180 + }, + { + "epoch": 1.1, + "learning_rate": 8.952039862971038e-06, + "loss": 0.0765, + "step": 14190 + }, + { + "epoch": 1.11, + "learning_rate": 8.944254126440361e-06, + "loss": 0.0938, + "step": 14200 + }, + { + "epoch": 1.11, + "learning_rate": 8.936468389909687e-06, + "loss": 0.0877, + "step": 14210 + }, + { + "epoch": 1.11, + "learning_rate": 8.92868265337901e-06, + "loss": 0.0644, + "step": 14220 + }, + { + "epoch": 1.11, + "learning_rate": 8.920896916848336e-06, + "loss": 0.1061, + "step": 14230 + }, + { + "epoch": 1.11, + "learning_rate": 8.913111180317659e-06, + "loss": 0.0974, + "step": 14240 + }, + { + "epoch": 1.11, + "learning_rate": 8.905325443786983e-06, + "loss": 0.096, + "step": 14250 + }, + { + "epoch": 1.11, + "learning_rate": 8.897539707256308e-06, + "loss": 0.0791, + "step": 14260 + }, + { + "epoch": 1.11, + "learning_rate": 8.889753970725632e-06, + "loss": 0.0857, + "step": 14270 + }, + { + "epoch": 1.11, + "learning_rate": 8.881968234194955e-06, + "loss": 0.1289, + "step": 14280 + }, + { + "epoch": 1.11, + "learning_rate": 8.87418249766428e-06, + "loss": 0.0815, + "step": 14290 + }, + { + "epoch": 1.11, + "learning_rate": 8.866396761133604e-06, + "loss": 0.0872, + "step": 14300 + }, + { + "epoch": 1.11, + "learning_rate": 8.858611024602928e-06, + "loss": 0.1081, + "step": 14310 + }, + { + "epoch": 1.11, + "learning_rate": 8.850825288072253e-06, + "loss": 0.0825, + "step": 14320 + }, + { + "epoch": 1.12, + "learning_rate": 8.843039551541577e-06, + "loss": 0.0949, + "step": 14330 + }, + { + "epoch": 1.12, + "learning_rate": 8.835253815010902e-06, + "loss": 0.1213, + "step": 14340 + }, + { + "epoch": 1.12, + "learning_rate": 8.827468078480224e-06, + "loss": 0.0725, + "step": 14350 + }, + { + "epoch": 1.12, + "learning_rate": 8.81968234194955e-06, + "loss": 0.0984, + "step": 14360 + }, + { + "epoch": 1.12, + "learning_rate": 8.811896605418874e-06, + "loss": 0.1204, + "step": 14370 + }, + { + "epoch": 1.12, + "learning_rate": 8.804110868888196e-06, + "loss": 0.1016, + "step": 14380 + }, + { + "epoch": 1.12, + "learning_rate": 8.796325132357522e-06, + "loss": 0.0758, + "step": 14390 + }, + { + "epoch": 1.12, + "learning_rate": 8.788539395826847e-06, + "loss": 0.0979, + "step": 14400 + }, + { + "epoch": 1.12, + "learning_rate": 8.78075365929617e-06, + "loss": 0.0934, + "step": 14410 + }, + { + "epoch": 1.12, + "learning_rate": 8.772967922765494e-06, + "loss": 0.1046, + "step": 14420 + }, + { + "epoch": 1.12, + "learning_rate": 8.765182186234818e-06, + "loss": 0.0962, + "step": 14430 + }, + { + "epoch": 1.12, + "learning_rate": 8.757396449704143e-06, + "loss": 0.1194, + "step": 14440 + }, + { + "epoch": 1.13, + "learning_rate": 8.749610713173467e-06, + "loss": 0.1216, + "step": 14450 + }, + { + "epoch": 1.13, + "learning_rate": 8.74182497664279e-06, + "loss": 0.1329, + "step": 14460 + }, + { + "epoch": 1.13, + "learning_rate": 8.734039240112116e-06, + "loss": 0.0969, + "step": 14470 + }, + { + "epoch": 1.13, + "learning_rate": 8.72625350358144e-06, + "loss": 0.0835, + "step": 14480 + }, + { + "epoch": 1.13, + "learning_rate": 8.718467767050763e-06, + "loss": 0.1152, + "step": 14490 + }, + { + "epoch": 1.13, + "learning_rate": 8.710682030520088e-06, + "loss": 0.0784, + "step": 14500 + }, + { + "epoch": 1.13, + "learning_rate": 8.702896293989412e-06, + "loss": 0.1241, + "step": 14510 + }, + { + "epoch": 1.13, + "learning_rate": 8.695110557458735e-06, + "loss": 0.1151, + "step": 14520 + }, + { + "epoch": 1.13, + "learning_rate": 8.68732482092806e-06, + "loss": 0.0946, + "step": 14530 + }, + { + "epoch": 1.13, + "learning_rate": 8.679539084397384e-06, + "loss": 0.1031, + "step": 14540 + }, + { + "epoch": 1.13, + "learning_rate": 8.67175334786671e-06, + "loss": 0.1101, + "step": 14550 + }, + { + "epoch": 1.13, + "learning_rate": 8.663967611336033e-06, + "loss": 0.0964, + "step": 14560 + }, + { + "epoch": 1.13, + "learning_rate": 8.656181874805357e-06, + "loss": 0.106, + "step": 14570 + }, + { + "epoch": 1.14, + "learning_rate": 8.648396138274682e-06, + "loss": 0.0775, + "step": 14580 + }, + { + "epoch": 1.14, + "learning_rate": 8.640610401744006e-06, + "loss": 0.0869, + "step": 14590 + }, + { + "epoch": 1.14, + "learning_rate": 8.63282466521333e-06, + "loss": 0.096, + "step": 14600 + }, + { + "epoch": 1.14, + "learning_rate": 8.625038928682655e-06, + "loss": 0.0728, + "step": 14610 + }, + { + "epoch": 1.14, + "learning_rate": 8.617253192151978e-06, + "loss": 0.0959, + "step": 14620 + }, + { + "epoch": 1.14, + "learning_rate": 8.609467455621302e-06, + "loss": 0.0824, + "step": 14630 + }, + { + "epoch": 1.14, + "learning_rate": 8.601681719090627e-06, + "loss": 0.0838, + "step": 14640 + }, + { + "epoch": 1.14, + "learning_rate": 8.59389598255995e-06, + "loss": 0.1209, + "step": 14650 + }, + { + "epoch": 1.14, + "learning_rate": 8.586110246029276e-06, + "loss": 0.1263, + "step": 14660 + }, + { + "epoch": 1.14, + "learning_rate": 8.5783245094986e-06, + "loss": 0.0975, + "step": 14670 + }, + { + "epoch": 1.14, + "learning_rate": 8.570538772967923e-06, + "loss": 0.0879, + "step": 14680 + }, + { + "epoch": 1.14, + "learning_rate": 8.562753036437248e-06, + "loss": 0.0806, + "step": 14690 + }, + { + "epoch": 1.14, + "learning_rate": 8.554967299906572e-06, + "loss": 0.0818, + "step": 14700 + }, + { + "epoch": 1.15, + "learning_rate": 8.547181563375896e-06, + "loss": 0.1075, + "step": 14710 + }, + { + "epoch": 1.15, + "learning_rate": 8.539395826845221e-06, + "loss": 0.1059, + "step": 14720 + }, + { + "epoch": 1.15, + "learning_rate": 8.531610090314545e-06, + "loss": 0.0742, + "step": 14730 + }, + { + "epoch": 1.15, + "learning_rate": 8.523824353783868e-06, + "loss": 0.1018, + "step": 14740 + }, + { + "epoch": 1.15, + "learning_rate": 8.516038617253193e-06, + "loss": 0.1284, + "step": 14750 + }, + { + "epoch": 1.15, + "learning_rate": 8.508252880722517e-06, + "loss": 0.0703, + "step": 14760 + }, + { + "epoch": 1.15, + "learning_rate": 8.50046714419184e-06, + "loss": 0.1002, + "step": 14770 + }, + { + "epoch": 1.15, + "learning_rate": 8.492681407661166e-06, + "loss": 0.0786, + "step": 14780 + }, + { + "epoch": 1.15, + "learning_rate": 8.48489567113049e-06, + "loss": 0.0997, + "step": 14790 + }, + { + "epoch": 1.15, + "learning_rate": 8.477109934599815e-06, + "loss": 0.0951, + "step": 14800 + }, + { + "epoch": 1.15, + "learning_rate": 8.469324198069137e-06, + "loss": 0.0839, + "step": 14810 + }, + { + "epoch": 1.15, + "learning_rate": 8.461538461538462e-06, + "loss": 0.0955, + "step": 14820 + }, + { + "epoch": 1.15, + "learning_rate": 8.453752725007787e-06, + "loss": 0.11, + "step": 14830 + }, + { + "epoch": 1.16, + "learning_rate": 8.445966988477111e-06, + "loss": 0.137, + "step": 14840 + }, + { + "epoch": 1.16, + "learning_rate": 8.438181251946435e-06, + "loss": 0.0971, + "step": 14850 + }, + { + "epoch": 1.16, + "learning_rate": 8.43039551541576e-06, + "loss": 0.0825, + "step": 14860 + }, + { + "epoch": 1.16, + "learning_rate": 8.422609778885084e-06, + "loss": 0.0767, + "step": 14870 + }, + { + "epoch": 1.16, + "learning_rate": 8.414824042354407e-06, + "loss": 0.0661, + "step": 14880 + }, + { + "epoch": 1.16, + "learning_rate": 8.40703830582373e-06, + "loss": 0.1102, + "step": 14890 + }, + { + "epoch": 1.16, + "learning_rate": 8.399252569293056e-06, + "loss": 0.1012, + "step": 14900 + }, + { + "epoch": 1.16, + "learning_rate": 8.391466832762381e-06, + "loss": 0.1119, + "step": 14910 + }, + { + "epoch": 1.16, + "learning_rate": 8.383681096231703e-06, + "loss": 0.1084, + "step": 14920 + }, + { + "epoch": 1.16, + "learning_rate": 8.375895359701029e-06, + "loss": 0.1127, + "step": 14930 + }, + { + "epoch": 1.16, + "learning_rate": 8.368109623170352e-06, + "loss": 0.0896, + "step": 14940 + }, + { + "epoch": 1.16, + "learning_rate": 8.360323886639676e-06, + "loss": 0.132, + "step": 14950 + }, + { + "epoch": 1.16, + "learning_rate": 8.352538150109001e-06, + "loss": 0.1157, + "step": 14960 + }, + { + "epoch": 1.17, + "learning_rate": 8.344752413578325e-06, + "loss": 0.079, + "step": 14970 + }, + { + "epoch": 1.17, + "learning_rate": 8.33696667704765e-06, + "loss": 0.0762, + "step": 14980 + }, + { + "epoch": 1.17, + "learning_rate": 8.329180940516974e-06, + "loss": 0.116, + "step": 14990 + }, + { + "epoch": 1.17, + "learning_rate": 8.321395203986297e-06, + "loss": 0.1088, + "step": 15000 + }, + { + "epoch": 1.17, + "learning_rate": 8.313609467455622e-06, + "loss": 0.081, + "step": 15010 + }, + { + "epoch": 1.17, + "learning_rate": 8.305823730924946e-06, + "loss": 0.1092, + "step": 15020 + }, + { + "epoch": 1.17, + "learning_rate": 8.29803799439427e-06, + "loss": 0.0923, + "step": 15030 + }, + { + "epoch": 1.17, + "learning_rate": 8.290252257863595e-06, + "loss": 0.0755, + "step": 15040 + }, + { + "epoch": 1.17, + "learning_rate": 8.282466521332919e-06, + "loss": 0.0827, + "step": 15050 + }, + { + "epoch": 1.17, + "learning_rate": 8.274680784802242e-06, + "loss": 0.0851, + "step": 15060 + }, + { + "epoch": 1.17, + "learning_rate": 8.266895048271567e-06, + "loss": 0.1001, + "step": 15070 + }, + { + "epoch": 1.17, + "learning_rate": 8.259109311740891e-06, + "loss": 0.0813, + "step": 15080 + }, + { + "epoch": 1.17, + "learning_rate": 8.251323575210216e-06, + "loss": 0.0875, + "step": 15090 + }, + { + "epoch": 1.18, + "learning_rate": 8.24353783867954e-06, + "loss": 0.0938, + "step": 15100 + }, + { + "epoch": 1.18, + "learning_rate": 8.235752102148864e-06, + "loss": 0.0932, + "step": 15110 + }, + { + "epoch": 1.18, + "learning_rate": 8.227966365618189e-06, + "loss": 0.1039, + "step": 15120 + }, + { + "epoch": 1.18, + "learning_rate": 8.220180629087512e-06, + "loss": 0.1193, + "step": 15130 + }, + { + "epoch": 1.18, + "learning_rate": 8.212394892556836e-06, + "loss": 0.0989, + "step": 15140 + }, + { + "epoch": 1.18, + "learning_rate": 8.204609156026161e-06, + "loss": 0.1035, + "step": 15150 + }, + { + "epoch": 1.18, + "learning_rate": 8.196823419495485e-06, + "loss": 0.1034, + "step": 15160 + }, + { + "epoch": 1.18, + "learning_rate": 8.189037682964809e-06, + "loss": 0.0728, + "step": 15170 + }, + { + "epoch": 1.18, + "learning_rate": 8.181251946434134e-06, + "loss": 0.0792, + "step": 15180 + }, + { + "epoch": 1.18, + "learning_rate": 8.173466209903458e-06, + "loss": 0.0954, + "step": 15190 + }, + { + "epoch": 1.18, + "learning_rate": 8.165680473372781e-06, + "loss": 0.0944, + "step": 15200 + }, + { + "epoch": 1.18, + "learning_rate": 8.157894736842106e-06, + "loss": 0.1044, + "step": 15210 + }, + { + "epoch": 1.18, + "learning_rate": 8.15010900031143e-06, + "loss": 0.0921, + "step": 15220 + }, + { + "epoch": 1.19, + "learning_rate": 8.142323263780755e-06, + "loss": 0.107, + "step": 15230 + }, + { + "epoch": 1.19, + "learning_rate": 8.134537527250079e-06, + "loss": 0.0823, + "step": 15240 + }, + { + "epoch": 1.19, + "learning_rate": 8.126751790719403e-06, + "loss": 0.0961, + "step": 15250 + }, + { + "epoch": 1.19, + "learning_rate": 8.118966054188728e-06, + "loss": 0.0944, + "step": 15260 + }, + { + "epoch": 1.19, + "learning_rate": 8.11118031765805e-06, + "loss": 0.1194, + "step": 15270 + }, + { + "epoch": 1.19, + "learning_rate": 8.103394581127375e-06, + "loss": 0.0987, + "step": 15280 + }, + { + "epoch": 1.19, + "learning_rate": 8.0956088445967e-06, + "loss": 0.1077, + "step": 15290 + }, + { + "epoch": 1.19, + "learning_rate": 8.087823108066024e-06, + "loss": 0.0814, + "step": 15300 + }, + { + "epoch": 1.19, + "learning_rate": 8.080037371535348e-06, + "loss": 0.0841, + "step": 15310 + }, + { + "epoch": 1.19, + "learning_rate": 8.072251635004673e-06, + "loss": 0.0788, + "step": 15320 + }, + { + "epoch": 1.19, + "learning_rate": 8.064465898473996e-06, + "loss": 0.099, + "step": 15330 + }, + { + "epoch": 1.19, + "learning_rate": 8.056680161943322e-06, + "loss": 0.0757, + "step": 15340 + }, + { + "epoch": 1.2, + "learning_rate": 8.048894425412644e-06, + "loss": 0.0975, + "step": 15350 + }, + { + "epoch": 1.2, + "learning_rate": 8.041108688881969e-06, + "loss": 0.1129, + "step": 15360 + }, + { + "epoch": 1.2, + "learning_rate": 8.033322952351294e-06, + "loss": 0.0953, + "step": 15370 + }, + { + "epoch": 1.2, + "learning_rate": 8.025537215820616e-06, + "loss": 0.0825, + "step": 15380 + }, + { + "epoch": 1.2, + "learning_rate": 8.017751479289941e-06, + "loss": 0.1078, + "step": 15390 + }, + { + "epoch": 1.2, + "learning_rate": 8.009965742759265e-06, + "loss": 0.1187, + "step": 15400 + }, + { + "epoch": 1.2, + "learning_rate": 8.00218000622859e-06, + "loss": 0.0976, + "step": 15410 + }, + { + "epoch": 1.2, + "learning_rate": 7.994394269697914e-06, + "loss": 0.0899, + "step": 15420 + }, + { + "epoch": 1.2, + "learning_rate": 7.986608533167238e-06, + "loss": 0.0967, + "step": 15430 + }, + { + "epoch": 1.2, + "learning_rate": 7.978822796636563e-06, + "loss": 0.0962, + "step": 15440 + }, + { + "epoch": 1.2, + "learning_rate": 7.971037060105886e-06, + "loss": 0.0989, + "step": 15450 + }, + { + "epoch": 1.2, + "learning_rate": 7.96325132357521e-06, + "loss": 0.1049, + "step": 15460 + }, + { + "epoch": 1.2, + "learning_rate": 7.955465587044535e-06, + "loss": 0.0862, + "step": 15470 + }, + { + "epoch": 1.21, + "learning_rate": 7.947679850513859e-06, + "loss": 0.0736, + "step": 15480 + }, + { + "epoch": 1.21, + "learning_rate": 7.939894113983183e-06, + "loss": 0.0788, + "step": 15490 + }, + { + "epoch": 1.21, + "learning_rate": 7.932108377452508e-06, + "loss": 0.1044, + "step": 15500 + }, + { + "epoch": 1.21, + "learning_rate": 7.924322640921832e-06, + "loss": 0.0755, + "step": 15510 + }, + { + "epoch": 1.21, + "learning_rate": 7.916536904391155e-06, + "loss": 0.102, + "step": 15520 + }, + { + "epoch": 1.21, + "learning_rate": 7.90875116786048e-06, + "loss": 0.0904, + "step": 15530 + }, + { + "epoch": 1.21, + "learning_rate": 7.900965431329804e-06, + "loss": 0.0756, + "step": 15540 + }, + { + "epoch": 1.21, + "learning_rate": 7.89317969479913e-06, + "loss": 0.133, + "step": 15550 + }, + { + "epoch": 1.21, + "learning_rate": 7.885393958268453e-06, + "loss": 0.0754, + "step": 15560 + }, + { + "epoch": 1.21, + "learning_rate": 7.877608221737777e-06, + "loss": 0.0811, + "step": 15570 + }, + { + "epoch": 1.21, + "learning_rate": 7.869822485207102e-06, + "loss": 0.0692, + "step": 15580 + }, + { + "epoch": 1.21, + "learning_rate": 7.862036748676425e-06, + "loss": 0.0918, + "step": 15590 + }, + { + "epoch": 1.21, + "learning_rate": 7.854251012145749e-06, + "loss": 0.0908, + "step": 15600 + }, + { + "epoch": 1.22, + "learning_rate": 7.846465275615074e-06, + "loss": 0.1203, + "step": 15610 + }, + { + "epoch": 1.22, + "learning_rate": 7.838679539084398e-06, + "loss": 0.1036, + "step": 15620 + }, + { + "epoch": 1.22, + "learning_rate": 7.830893802553722e-06, + "loss": 0.089, + "step": 15630 + }, + { + "epoch": 1.22, + "learning_rate": 7.823108066023047e-06, + "loss": 0.0854, + "step": 15640 + }, + { + "epoch": 1.22, + "learning_rate": 7.81532232949237e-06, + "loss": 0.0985, + "step": 15650 + }, + { + "epoch": 1.22, + "learning_rate": 7.807536592961696e-06, + "loss": 0.1006, + "step": 15660 + }, + { + "epoch": 1.22, + "learning_rate": 7.79975085643102e-06, + "loss": 0.0798, + "step": 15670 + }, + { + "epoch": 1.22, + "learning_rate": 7.791965119900343e-06, + "loss": 0.0875, + "step": 15680 + }, + { + "epoch": 1.22, + "learning_rate": 7.784179383369668e-06, + "loss": 0.0901, + "step": 15690 + }, + { + "epoch": 1.22, + "learning_rate": 7.776393646838992e-06, + "loss": 0.0961, + "step": 15700 + }, + { + "epoch": 1.22, + "learning_rate": 7.768607910308315e-06, + "loss": 0.0895, + "step": 15710 + }, + { + "epoch": 1.22, + "learning_rate": 7.76082217377764e-06, + "loss": 0.1057, + "step": 15720 + }, + { + "epoch": 1.22, + "learning_rate": 7.753036437246964e-06, + "loss": 0.088, + "step": 15730 + }, + { + "epoch": 1.23, + "learning_rate": 7.745250700716288e-06, + "loss": 0.0826, + "step": 15740 + }, + { + "epoch": 1.23, + "learning_rate": 7.737464964185613e-06, + "loss": 0.0609, + "step": 15750 + }, + { + "epoch": 1.23, + "learning_rate": 7.729679227654937e-06, + "loss": 0.1069, + "step": 15760 + }, + { + "epoch": 1.23, + "learning_rate": 7.72189349112426e-06, + "loss": 0.0939, + "step": 15770 + }, + { + "epoch": 1.23, + "learning_rate": 7.714107754593586e-06, + "loss": 0.0867, + "step": 15780 + }, + { + "epoch": 1.23, + "learning_rate": 7.70632201806291e-06, + "loss": 0.0757, + "step": 15790 + }, + { + "epoch": 1.23, + "learning_rate": 7.698536281532235e-06, + "loss": 0.1131, + "step": 15800 + }, + { + "epoch": 1.23, + "learning_rate": 7.690750545001557e-06, + "loss": 0.086, + "step": 15810 + }, + { + "epoch": 1.23, + "learning_rate": 7.682964808470882e-06, + "loss": 0.0866, + "step": 15820 + }, + { + "epoch": 1.23, + "learning_rate": 7.675179071940207e-06, + "loss": 0.0794, + "step": 15830 + }, + { + "epoch": 1.23, + "learning_rate": 7.66739333540953e-06, + "loss": 0.064, + "step": 15840 + }, + { + "epoch": 1.23, + "learning_rate": 7.659607598878854e-06, + "loss": 0.1027, + "step": 15850 + }, + { + "epoch": 1.23, + "learning_rate": 7.651821862348178e-06, + "loss": 0.09, + "step": 15860 + }, + { + "epoch": 1.24, + "learning_rate": 7.644036125817503e-06, + "loss": 0.0792, + "step": 15870 + }, + { + "epoch": 1.24, + "learning_rate": 7.636250389286827e-06, + "loss": 0.1208, + "step": 15880 + }, + { + "epoch": 1.24, + "learning_rate": 7.628464652756151e-06, + "loss": 0.0938, + "step": 15890 + }, + { + "epoch": 1.24, + "learning_rate": 7.620678916225476e-06, + "loss": 0.0949, + "step": 15900 + }, + { + "epoch": 1.24, + "learning_rate": 7.6128931796948e-06, + "loss": 0.094, + "step": 15910 + }, + { + "epoch": 1.24, + "learning_rate": 7.605107443164124e-06, + "loss": 0.1024, + "step": 15920 + }, + { + "epoch": 1.24, + "learning_rate": 7.597321706633448e-06, + "loss": 0.1052, + "step": 15930 + }, + { + "epoch": 1.24, + "learning_rate": 7.589535970102773e-06, + "loss": 0.1041, + "step": 15940 + }, + { + "epoch": 1.24, + "learning_rate": 7.581750233572096e-06, + "loss": 0.0907, + "step": 15950 + }, + { + "epoch": 1.24, + "learning_rate": 7.573964497041421e-06, + "loss": 0.1208, + "step": 15960 + }, + { + "epoch": 1.24, + "learning_rate": 7.566178760510745e-06, + "loss": 0.0868, + "step": 15970 + }, + { + "epoch": 1.24, + "learning_rate": 7.55839302398007e-06, + "loss": 0.0865, + "step": 15980 + }, + { + "epoch": 1.24, + "learning_rate": 7.550607287449393e-06, + "loss": 0.0884, + "step": 15990 + }, + { + "epoch": 1.25, + "learning_rate": 7.542821550918718e-06, + "loss": 0.066, + "step": 16000 + }, + { + "epoch": 1.25, + "learning_rate": 7.535035814388042e-06, + "loss": 0.0754, + "step": 16010 + }, + { + "epoch": 1.25, + "learning_rate": 7.527250077857365e-06, + "loss": 0.0815, + "step": 16020 + }, + { + "epoch": 1.25, + "learning_rate": 7.5194643413266895e-06, + "loss": 0.0702, + "step": 16030 + }, + { + "epoch": 1.25, + "learning_rate": 7.511678604796015e-06, + "loss": 0.1028, + "step": 16040 + }, + { + "epoch": 1.25, + "learning_rate": 7.503892868265339e-06, + "loss": 0.0928, + "step": 16050 + }, + { + "epoch": 1.25, + "learning_rate": 7.496107131734662e-06, + "loss": 0.1022, + "step": 16060 + }, + { + "epoch": 1.25, + "learning_rate": 7.4883213952039864e-06, + "loss": 0.0959, + "step": 16070 + }, + { + "epoch": 1.25, + "learning_rate": 7.480535658673312e-06, + "loss": 0.0732, + "step": 16080 + }, + { + "epoch": 1.25, + "learning_rate": 7.472749922142636e-06, + "loss": 0.0803, + "step": 16090 + }, + { + "epoch": 1.25, + "learning_rate": 7.464964185611959e-06, + "loss": 0.0852, + "step": 16100 + }, + { + "epoch": 1.25, + "learning_rate": 7.457178449081283e-06, + "loss": 0.1012, + "step": 16110 + }, + { + "epoch": 1.26, + "learning_rate": 7.449392712550608e-06, + "loss": 0.0867, + "step": 16120 + }, + { + "epoch": 1.26, + "learning_rate": 7.4416069760199315e-06, + "loss": 0.075, + "step": 16130 + }, + { + "epoch": 1.26, + "learning_rate": 7.433821239489256e-06, + "loss": 0.0927, + "step": 16140 + }, + { + "epoch": 1.26, + "learning_rate": 7.42603550295858e-06, + "loss": 0.111, + "step": 16150 + }, + { + "epoch": 1.26, + "learning_rate": 7.418249766427905e-06, + "loss": 0.0784, + "step": 16160 + }, + { + "epoch": 1.26, + "learning_rate": 7.4104640298972284e-06, + "loss": 0.0985, + "step": 16170 + }, + { + "epoch": 1.26, + "learning_rate": 7.402678293366553e-06, + "loss": 0.0681, + "step": 16180 + }, + { + "epoch": 1.26, + "learning_rate": 7.394892556835877e-06, + "loss": 0.1078, + "step": 16190 + }, + { + "epoch": 1.26, + "learning_rate": 7.387106820305201e-06, + "loss": 0.0726, + "step": 16200 + }, + { + "epoch": 1.26, + "learning_rate": 7.379321083774525e-06, + "loss": 0.1075, + "step": 16210 + }, + { + "epoch": 1.26, + "learning_rate": 7.37153534724385e-06, + "loss": 0.0938, + "step": 16220 + }, + { + "epoch": 1.26, + "learning_rate": 7.363749610713174e-06, + "loss": 0.1013, + "step": 16230 + }, + { + "epoch": 1.26, + "learning_rate": 7.355963874182498e-06, + "loss": 0.1037, + "step": 16240 + }, + { + "epoch": 1.27, + "learning_rate": 7.348178137651822e-06, + "loss": 0.0574, + "step": 16250 + }, + { + "epoch": 1.27, + "learning_rate": 7.340392401121147e-06, + "loss": 0.0783, + "step": 16260 + }, + { + "epoch": 1.27, + "learning_rate": 7.33260666459047e-06, + "loss": 0.0768, + "step": 16270 + }, + { + "epoch": 1.27, + "learning_rate": 7.324820928059795e-06, + "loss": 0.0784, + "step": 16280 + }, + { + "epoch": 1.27, + "learning_rate": 7.317035191529119e-06, + "loss": 0.1078, + "step": 16290 + }, + { + "epoch": 1.27, + "learning_rate": 7.309249454998444e-06, + "loss": 0.0976, + "step": 16300 + }, + { + "epoch": 1.27, + "learning_rate": 7.301463718467767e-06, + "loss": 0.0803, + "step": 16310 + }, + { + "epoch": 1.27, + "learning_rate": 7.293677981937092e-06, + "loss": 0.0937, + "step": 16320 + }, + { + "epoch": 1.27, + "learning_rate": 7.285892245406416e-06, + "loss": 0.1217, + "step": 16330 + }, + { + "epoch": 1.27, + "learning_rate": 7.278106508875741e-06, + "loss": 0.0702, + "step": 16340 + }, + { + "epoch": 1.27, + "learning_rate": 7.270320772345064e-06, + "loss": 0.0911, + "step": 16350 + }, + { + "epoch": 1.27, + "learning_rate": 7.262535035814389e-06, + "loss": 0.097, + "step": 16360 + }, + { + "epoch": 1.27, + "learning_rate": 7.254749299283713e-06, + "loss": 0.0741, + "step": 16370 + }, + { + "epoch": 1.28, + "learning_rate": 7.246963562753037e-06, + "loss": 0.1093, + "step": 16380 + }, + { + "epoch": 1.28, + "learning_rate": 7.239177826222361e-06, + "loss": 0.0761, + "step": 16390 + }, + { + "epoch": 1.28, + "learning_rate": 7.231392089691686e-06, + "loss": 0.0818, + "step": 16400 + }, + { + "epoch": 1.28, + "learning_rate": 7.22360635316101e-06, + "loss": 0.102, + "step": 16410 + }, + { + "epoch": 1.28, + "learning_rate": 7.215820616630334e-06, + "loss": 0.0706, + "step": 16420 + }, + { + "epoch": 1.28, + "learning_rate": 7.208034880099658e-06, + "loss": 0.0957, + "step": 16430 + }, + { + "epoch": 1.28, + "learning_rate": 7.200249143568983e-06, + "loss": 0.1091, + "step": 16440 + }, + { + "epoch": 1.28, + "learning_rate": 7.192463407038306e-06, + "loss": 0.0743, + "step": 16450 + }, + { + "epoch": 1.28, + "learning_rate": 7.184677670507631e-06, + "loss": 0.0681, + "step": 16460 + }, + { + "epoch": 1.28, + "learning_rate": 7.176891933976955e-06, + "loss": 0.0797, + "step": 16470 + }, + { + "epoch": 1.28, + "learning_rate": 7.16910619744628e-06, + "loss": 0.0789, + "step": 16480 + }, + { + "epoch": 1.28, + "learning_rate": 7.1613204609156024e-06, + "loss": 0.0784, + "step": 16490 + }, + { + "epoch": 1.28, + "learning_rate": 7.153534724384928e-06, + "loss": 0.1042, + "step": 16500 + }, + { + "epoch": 1.29, + "learning_rate": 7.145748987854252e-06, + "loss": 0.0847, + "step": 16510 + }, + { + "epoch": 1.29, + "learning_rate": 7.137963251323575e-06, + "loss": 0.062, + "step": 16520 + }, + { + "epoch": 1.29, + "learning_rate": 7.130177514792899e-06, + "loss": 0.1022, + "step": 16530 + }, + { + "epoch": 1.29, + "learning_rate": 7.122391778262225e-06, + "loss": 0.0677, + "step": 16540 + }, + { + "epoch": 1.29, + "learning_rate": 7.114606041731549e-06, + "loss": 0.0943, + "step": 16550 + }, + { + "epoch": 1.29, + "learning_rate": 7.106820305200872e-06, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 1.29, + "learning_rate": 7.099034568670196e-06, + "loss": 0.0905, + "step": 16570 + }, + { + "epoch": 1.29, + "learning_rate": 7.091248832139521e-06, + "loss": 0.1186, + "step": 16580 + }, + { + "epoch": 1.29, + "learning_rate": 7.083463095608846e-06, + "loss": 0.0707, + "step": 16590 + }, + { + "epoch": 1.29, + "learning_rate": 7.075677359078169e-06, + "loss": 0.1026, + "step": 16600 + }, + { + "epoch": 1.29, + "learning_rate": 7.067891622547493e-06, + "loss": 0.0956, + "step": 16610 + }, + { + "epoch": 1.29, + "learning_rate": 7.060105886016818e-06, + "loss": 0.0849, + "step": 16620 + }, + { + "epoch": 1.29, + "learning_rate": 7.052320149486141e-06, + "loss": 0.1157, + "step": 16630 + }, + { + "epoch": 1.3, + "learning_rate": 7.044534412955466e-06, + "loss": 0.1055, + "step": 16640 + }, + { + "epoch": 1.3, + "learning_rate": 7.03674867642479e-06, + "loss": 0.0805, + "step": 16650 + }, + { + "epoch": 1.3, + "learning_rate": 7.028962939894115e-06, + "loss": 0.0785, + "step": 16660 + }, + { + "epoch": 1.3, + "learning_rate": 7.021177203363438e-06, + "loss": 0.0964, + "step": 16670 + }, + { + "epoch": 1.3, + "learning_rate": 7.013391466832763e-06, + "loss": 0.0818, + "step": 16680 + }, + { + "epoch": 1.3, + "learning_rate": 7.005605730302087e-06, + "loss": 0.0897, + "step": 16690 + }, + { + "epoch": 1.3, + "learning_rate": 6.997819993771411e-06, + "loss": 0.0734, + "step": 16700 + }, + { + "epoch": 1.3, + "learning_rate": 6.990034257240735e-06, + "loss": 0.0798, + "step": 16710 + }, + { + "epoch": 1.3, + "learning_rate": 6.98224852071006e-06, + "loss": 0.1029, + "step": 16720 + }, + { + "epoch": 1.3, + "learning_rate": 6.974462784179384e-06, + "loss": 0.0951, + "step": 16730 + }, + { + "epoch": 1.3, + "learning_rate": 6.966677047648708e-06, + "loss": 0.0863, + "step": 16740 + }, + { + "epoch": 1.3, + "learning_rate": 6.958891311118032e-06, + "loss": 0.1014, + "step": 16750 + }, + { + "epoch": 1.3, + "learning_rate": 6.951105574587357e-06, + "loss": 0.0684, + "step": 16760 + }, + { + "epoch": 1.31, + "learning_rate": 6.94331983805668e-06, + "loss": 0.0848, + "step": 16770 + }, + { + "epoch": 1.31, + "learning_rate": 6.935534101526005e-06, + "loss": 0.1228, + "step": 16780 + }, + { + "epoch": 1.31, + "learning_rate": 6.927748364995329e-06, + "loss": 0.0871, + "step": 16790 + }, + { + "epoch": 1.31, + "learning_rate": 6.919962628464654e-06, + "loss": 0.0718, + "step": 16800 + }, + { + "epoch": 1.31, + "learning_rate": 6.912176891933977e-06, + "loss": 0.0939, + "step": 16810 + }, + { + "epoch": 1.31, + "learning_rate": 6.904391155403302e-06, + "loss": 0.0972, + "step": 16820 + }, + { + "epoch": 1.31, + "learning_rate": 6.896605418872626e-06, + "loss": 0.0572, + "step": 16830 + }, + { + "epoch": 1.31, + "learning_rate": 6.888819682341951e-06, + "loss": 0.0753, + "step": 16840 + }, + { + "epoch": 1.31, + "learning_rate": 6.881033945811274e-06, + "loss": 0.0941, + "step": 16850 + }, + { + "epoch": 1.31, + "learning_rate": 6.873248209280599e-06, + "loss": 0.1028, + "step": 16860 + }, + { + "epoch": 1.31, + "learning_rate": 6.865462472749923e-06, + "loss": 0.0869, + "step": 16870 + }, + { + "epoch": 1.31, + "learning_rate": 6.857676736219247e-06, + "loss": 0.0681, + "step": 16880 + }, + { + "epoch": 1.32, + "learning_rate": 6.849890999688571e-06, + "loss": 0.0623, + "step": 16890 + }, + { + "epoch": 1.32, + "learning_rate": 6.842105263157896e-06, + "loss": 0.0851, + "step": 16900 + }, + { + "epoch": 1.32, + "learning_rate": 6.83431952662722e-06, + "loss": 0.0821, + "step": 16910 + }, + { + "epoch": 1.32, + "learning_rate": 6.826533790096544e-06, + "loss": 0.0962, + "step": 16920 + }, + { + "epoch": 1.32, + "learning_rate": 6.818748053565868e-06, + "loss": 0.0576, + "step": 16930 + }, + { + "epoch": 1.32, + "learning_rate": 6.810962317035193e-06, + "loss": 0.0677, + "step": 16940 + }, + { + "epoch": 1.32, + "learning_rate": 6.803176580504515e-06, + "loss": 0.0845, + "step": 16950 + }, + { + "epoch": 1.32, + "learning_rate": 6.795390843973841e-06, + "loss": 0.0849, + "step": 16960 + }, + { + "epoch": 1.32, + "learning_rate": 6.787605107443165e-06, + "loss": 0.0757, + "step": 16970 + }, + { + "epoch": 1.32, + "learning_rate": 6.77981937091249e-06, + "loss": 0.0836, + "step": 16980 + }, + { + "epoch": 1.32, + "learning_rate": 6.772033634381812e-06, + "loss": 0.0912, + "step": 16990 + }, + { + "epoch": 1.32, + "learning_rate": 6.764247897851138e-06, + "loss": 0.1112, + "step": 17000 + }, + { + "epoch": 1.32, + "learning_rate": 6.756462161320462e-06, + "loss": 0.0829, + "step": 17010 + }, + { + "epoch": 1.33, + "learning_rate": 6.748676424789785e-06, + "loss": 0.0699, + "step": 17020 + }, + { + "epoch": 1.33, + "learning_rate": 6.740890688259109e-06, + "loss": 0.0592, + "step": 17030 + }, + { + "epoch": 1.33, + "learning_rate": 6.733104951728434e-06, + "loss": 0.091, + "step": 17040 + }, + { + "epoch": 1.33, + "learning_rate": 6.725319215197759e-06, + "loss": 0.0726, + "step": 17050 + }, + { + "epoch": 1.33, + "learning_rate": 6.717533478667082e-06, + "loss": 0.0809, + "step": 17060 + }, + { + "epoch": 1.33, + "learning_rate": 6.709747742136406e-06, + "loss": 0.1029, + "step": 17070 + }, + { + "epoch": 1.33, + "learning_rate": 6.701962005605731e-06, + "loss": 0.0697, + "step": 17080 + }, + { + "epoch": 1.33, + "learning_rate": 6.694176269075055e-06, + "loss": 0.1216, + "step": 17090 + }, + { + "epoch": 1.33, + "learning_rate": 6.686390532544379e-06, + "loss": 0.0974, + "step": 17100 + }, + { + "epoch": 1.33, + "learning_rate": 6.678604796013703e-06, + "loss": 0.063, + "step": 17110 + }, + { + "epoch": 1.33, + "learning_rate": 6.670819059483028e-06, + "loss": 0.0926, + "step": 17120 + }, + { + "epoch": 1.33, + "learning_rate": 6.663033322952351e-06, + "loss": 0.0712, + "step": 17130 + }, + { + "epoch": 1.33, + "learning_rate": 6.655247586421676e-06, + "loss": 0.076, + "step": 17140 + }, + { + "epoch": 1.34, + "learning_rate": 6.647461849891e-06, + "loss": 0.0821, + "step": 17150 + }, + { + "epoch": 1.34, + "learning_rate": 6.639676113360325e-06, + "loss": 0.0927, + "step": 17160 + }, + { + "epoch": 1.34, + "learning_rate": 6.631890376829648e-06, + "loss": 0.0978, + "step": 17170 + }, + { + "epoch": 1.34, + "learning_rate": 6.624104640298973e-06, + "loss": 0.0637, + "step": 17180 + }, + { + "epoch": 1.34, + "learning_rate": 6.616318903768297e-06, + "loss": 0.1203, + "step": 17190 + }, + { + "epoch": 1.34, + "learning_rate": 6.608533167237621e-06, + "loss": 0.0763, + "step": 17200 + }, + { + "epoch": 1.34, + "learning_rate": 6.600747430706945e-06, + "loss": 0.0763, + "step": 17210 + }, + { + "epoch": 1.34, + "learning_rate": 6.59296169417627e-06, + "loss": 0.0827, + "step": 17220 + }, + { + "epoch": 1.34, + "learning_rate": 6.585175957645594e-06, + "loss": 0.1012, + "step": 17230 + }, + { + "epoch": 1.34, + "learning_rate": 6.577390221114918e-06, + "loss": 0.0608, + "step": 17240 + }, + { + "epoch": 1.34, + "learning_rate": 6.569604484584242e-06, + "loss": 0.0909, + "step": 17250 + }, + { + "epoch": 1.34, + "learning_rate": 6.561818748053567e-06, + "loss": 0.0718, + "step": 17260 + }, + { + "epoch": 1.34, + "learning_rate": 6.55403301152289e-06, + "loss": 0.0828, + "step": 17270 + }, + { + "epoch": 1.35, + "learning_rate": 6.546247274992215e-06, + "loss": 0.0703, + "step": 17280 + }, + { + "epoch": 1.35, + "learning_rate": 6.538461538461539e-06, + "loss": 0.112, + "step": 17290 + }, + { + "epoch": 1.35, + "learning_rate": 6.530675801930864e-06, + "loss": 0.0572, + "step": 17300 + }, + { + "epoch": 1.35, + "learning_rate": 6.522890065400187e-06, + "loss": 0.0934, + "step": 17310 + }, + { + "epoch": 1.35, + "learning_rate": 6.515104328869512e-06, + "loss": 0.0591, + "step": 17320 + }, + { + "epoch": 1.35, + "learning_rate": 6.507318592338836e-06, + "loss": 0.0934, + "step": 17330 + }, + { + "epoch": 1.35, + "learning_rate": 6.4995328558081605e-06, + "loss": 0.092, + "step": 17340 + }, + { + "epoch": 1.35, + "learning_rate": 6.491747119277484e-06, + "loss": 0.0971, + "step": 17350 + }, + { + "epoch": 1.35, + "learning_rate": 6.483961382746809e-06, + "loss": 0.0846, + "step": 17360 + }, + { + "epoch": 1.35, + "learning_rate": 6.476175646216133e-06, + "loss": 0.0944, + "step": 17370 + }, + { + "epoch": 1.35, + "learning_rate": 6.468389909685457e-06, + "loss": 0.0879, + "step": 17380 + }, + { + "epoch": 1.35, + "learning_rate": 6.460604173154781e-06, + "loss": 0.0787, + "step": 17390 + }, + { + "epoch": 1.35, + "learning_rate": 6.4528184366241056e-06, + "loss": 0.0859, + "step": 17400 + }, + { + "epoch": 1.36, + "learning_rate": 6.44503270009343e-06, + "loss": 0.1081, + "step": 17410 + }, + { + "epoch": 1.36, + "learning_rate": 6.437246963562754e-06, + "loss": 0.0907, + "step": 17420 + }, + { + "epoch": 1.36, + "learning_rate": 6.429461227032078e-06, + "loss": 0.0704, + "step": 17430 + }, + { + "epoch": 1.36, + "learning_rate": 6.4216754905014025e-06, + "loss": 0.0748, + "step": 17440 + }, + { + "epoch": 1.36, + "learning_rate": 6.413889753970725e-06, + "loss": 0.0854, + "step": 17450 + }, + { + "epoch": 1.36, + "learning_rate": 6.406104017440051e-06, + "loss": 0.1008, + "step": 17460 + }, + { + "epoch": 1.36, + "learning_rate": 6.398318280909375e-06, + "loss": 0.0992, + "step": 17470 + }, + { + "epoch": 1.36, + "learning_rate": 6.3905325443786995e-06, + "loss": 0.1136, + "step": 17480 + }, + { + "epoch": 1.36, + "learning_rate": 6.382746807848022e-06, + "loss": 0.0753, + "step": 17490 + }, + { + "epoch": 1.36, + "learning_rate": 6.374961071317347e-06, + "loss": 0.0752, + "step": 17500 + }, + { + "epoch": 1.36, + "learning_rate": 6.367175334786672e-06, + "loss": 0.1051, + "step": 17510 + }, + { + "epoch": 1.36, + "learning_rate": 6.359389598255995e-06, + "loss": 0.0922, + "step": 17520 + }, + { + "epoch": 1.36, + "learning_rate": 6.351603861725319e-06, + "loss": 0.1027, + "step": 17530 + }, + { + "epoch": 1.37, + "learning_rate": 6.343818125194644e-06, + "loss": 0.1113, + "step": 17540 + }, + { + "epoch": 1.37, + "learning_rate": 6.336032388663968e-06, + "loss": 0.1094, + "step": 17550 + }, + { + "epoch": 1.37, + "learning_rate": 6.328246652133292e-06, + "loss": 0.0837, + "step": 17560 + }, + { + "epoch": 1.37, + "learning_rate": 6.320460915602616e-06, + "loss": 0.0749, + "step": 17570 + }, + { + "epoch": 1.37, + "learning_rate": 6.312675179071941e-06, + "loss": 0.1025, + "step": 17580 + }, + { + "epoch": 1.37, + "learning_rate": 6.304889442541265e-06, + "loss": 0.0718, + "step": 17590 + }, + { + "epoch": 1.37, + "learning_rate": 6.297103706010589e-06, + "loss": 0.0668, + "step": 17600 + }, + { + "epoch": 1.37, + "learning_rate": 6.289317969479913e-06, + "loss": 0.0659, + "step": 17610 + }, + { + "epoch": 1.37, + "learning_rate": 6.281532232949238e-06, + "loss": 0.096, + "step": 17620 + }, + { + "epoch": 1.37, + "learning_rate": 6.273746496418561e-06, + "loss": 0.0612, + "step": 17630 + }, + { + "epoch": 1.37, + "learning_rate": 6.265960759887886e-06, + "loss": 0.0721, + "step": 17640 + }, + { + "epoch": 1.37, + "learning_rate": 6.25817502335721e-06, + "loss": 0.0804, + "step": 17650 + }, + { + "epoch": 1.37, + "learning_rate": 6.2503892868265345e-06, + "loss": 0.1048, + "step": 17660 + }, + { + "epoch": 1.38, + "learning_rate": 6.242603550295858e-06, + "loss": 0.0688, + "step": 17670 + }, + { + "epoch": 1.38, + "learning_rate": 6.234817813765183e-06, + "loss": 0.072, + "step": 17680 + }, + { + "epoch": 1.38, + "learning_rate": 6.227032077234507e-06, + "loss": 0.0769, + "step": 17690 + }, + { + "epoch": 1.38, + "learning_rate": 6.219246340703831e-06, + "loss": 0.0929, + "step": 17700 + }, + { + "epoch": 1.38, + "learning_rate": 6.211460604173155e-06, + "loss": 0.0908, + "step": 17710 + }, + { + "epoch": 1.38, + "learning_rate": 6.2036748676424796e-06, + "loss": 0.0712, + "step": 17720 + }, + { + "epoch": 1.38, + "learning_rate": 6.195889131111804e-06, + "loss": 0.0786, + "step": 17730 + }, + { + "epoch": 1.38, + "learning_rate": 6.188103394581128e-06, + "loss": 0.1326, + "step": 17740 + }, + { + "epoch": 1.38, + "learning_rate": 6.180317658050452e-06, + "loss": 0.0685, + "step": 17750 + }, + { + "epoch": 1.38, + "learning_rate": 6.1725319215197765e-06, + "loss": 0.0841, + "step": 17760 + }, + { + "epoch": 1.38, + "learning_rate": 6.1647461849891e-06, + "loss": 0.096, + "step": 17770 + }, + { + "epoch": 1.38, + "learning_rate": 6.156960448458425e-06, + "loss": 0.0683, + "step": 17780 + }, + { + "epoch": 1.39, + "learning_rate": 6.149174711927749e-06, + "loss": 0.0958, + "step": 17790 + }, + { + "epoch": 1.39, + "learning_rate": 6.1413889753970735e-06, + "loss": 0.0853, + "step": 17800 + }, + { + "epoch": 1.39, + "learning_rate": 6.133603238866397e-06, + "loss": 0.0882, + "step": 17810 + }, + { + "epoch": 1.39, + "learning_rate": 6.1258175023357215e-06, + "loss": 0.0825, + "step": 17820 + }, + { + "epoch": 1.39, + "learning_rate": 6.118031765805046e-06, + "loss": 0.0723, + "step": 17830 + }, + { + "epoch": 1.39, + "learning_rate": 6.1102460292743704e-06, + "loss": 0.1124, + "step": 17840 + }, + { + "epoch": 1.39, + "learning_rate": 6.102460292743694e-06, + "loss": 0.0929, + "step": 17850 + }, + { + "epoch": 1.39, + "learning_rate": 6.0946745562130185e-06, + "loss": 0.0874, + "step": 17860 + }, + { + "epoch": 1.39, + "learning_rate": 6.086888819682343e-06, + "loss": 0.073, + "step": 17870 + }, + { + "epoch": 1.39, + "learning_rate": 6.0791030831516666e-06, + "loss": 0.079, + "step": 17880 + }, + { + "epoch": 1.39, + "learning_rate": 6.071317346620991e-06, + "loss": 0.0863, + "step": 17890 + }, + { + "epoch": 1.39, + "learning_rate": 6.0635316100903155e-06, + "loss": 0.1066, + "step": 17900 + }, + { + "epoch": 1.39, + "learning_rate": 6.05574587355964e-06, + "loss": 0.1092, + "step": 17910 + }, + { + "epoch": 1.4, + "learning_rate": 6.0479601370289635e-06, + "loss": 0.0614, + "step": 17920 + }, + { + "epoch": 1.4, + "learning_rate": 6.040174400498288e-06, + "loss": 0.0825, + "step": 17930 + }, + { + "epoch": 1.4, + "learning_rate": 6.0323886639676124e-06, + "loss": 0.0989, + "step": 17940 + }, + { + "epoch": 1.4, + "learning_rate": 6.024602927436935e-06, + "loss": 0.0559, + "step": 17950 + }, + { + "epoch": 1.4, + "learning_rate": 6.01681719090626e-06, + "loss": 0.0648, + "step": 17960 + }, + { + "epoch": 1.4, + "learning_rate": 6.009031454375585e-06, + "loss": 0.0726, + "step": 17970 + }, + { + "epoch": 1.4, + "learning_rate": 6.001245717844909e-06, + "loss": 0.0872, + "step": 17980 + }, + { + "epoch": 1.4, + "learning_rate": 5.993459981314232e-06, + "loss": 0.0937, + "step": 17990 + }, + { + "epoch": 1.4, + "learning_rate": 5.985674244783557e-06, + "loss": 0.0985, + "step": 18000 + }, + { + "epoch": 1.4, + "learning_rate": 5.977888508252881e-06, + "loss": 0.0808, + "step": 18010 + }, + { + "epoch": 1.4, + "learning_rate": 5.970102771722205e-06, + "loss": 0.0798, + "step": 18020 + }, + { + "epoch": 1.4, + "learning_rate": 5.962317035191529e-06, + "loss": 0.0668, + "step": 18030 + }, + { + "epoch": 1.4, + "learning_rate": 5.9545312986608536e-06, + "loss": 0.075, + "step": 18040 + }, + { + "epoch": 1.41, + "learning_rate": 5.946745562130178e-06, + "loss": 0.0632, + "step": 18050 + }, + { + "epoch": 1.41, + "learning_rate": 5.938959825599502e-06, + "loss": 0.0637, + "step": 18060 + }, + { + "epoch": 1.41, + "learning_rate": 5.931174089068826e-06, + "loss": 0.0821, + "step": 18070 + }, + { + "epoch": 1.41, + "learning_rate": 5.9233883525381505e-06, + "loss": 0.0734, + "step": 18080 + }, + { + "epoch": 1.41, + "learning_rate": 5.915602616007475e-06, + "loss": 0.0938, + "step": 18090 + }, + { + "epoch": 1.41, + "learning_rate": 5.907816879476799e-06, + "loss": 0.0668, + "step": 18100 + }, + { + "epoch": 1.41, + "learning_rate": 5.900031142946123e-06, + "loss": 0.0949, + "step": 18110 + }, + { + "epoch": 1.41, + "learning_rate": 5.8922454064154475e-06, + "loss": 0.0734, + "step": 18120 + }, + { + "epoch": 1.41, + "learning_rate": 5.884459669884771e-06, + "loss": 0.1135, + "step": 18130 + }, + { + "epoch": 1.41, + "learning_rate": 5.8766739333540955e-06, + "loss": 0.0805, + "step": 18140 + }, + { + "epoch": 1.41, + "learning_rate": 5.86888819682342e-06, + "loss": 0.0736, + "step": 18150 + }, + { + "epoch": 1.41, + "learning_rate": 5.8611024602927444e-06, + "loss": 0.0609, + "step": 18160 + }, + { + "epoch": 1.41, + "learning_rate": 5.853316723762068e-06, + "loss": 0.0842, + "step": 18170 + }, + { + "epoch": 1.42, + "learning_rate": 5.8455309872313925e-06, + "loss": 0.0629, + "step": 18180 + }, + { + "epoch": 1.42, + "learning_rate": 5.837745250700717e-06, + "loss": 0.0642, + "step": 18190 + }, + { + "epoch": 1.42, + "learning_rate": 5.8299595141700406e-06, + "loss": 0.077, + "step": 18200 + }, + { + "epoch": 1.42, + "learning_rate": 5.822173777639365e-06, + "loss": 0.069, + "step": 18210 + }, + { + "epoch": 1.42, + "learning_rate": 5.8143880411086895e-06, + "loss": 0.0733, + "step": 18220 + }, + { + "epoch": 1.42, + "learning_rate": 5.806602304578014e-06, + "loss": 0.0902, + "step": 18230 + }, + { + "epoch": 1.42, + "learning_rate": 5.7988165680473375e-06, + "loss": 0.0662, + "step": 18240 + }, + { + "epoch": 1.42, + "learning_rate": 5.791030831516662e-06, + "loss": 0.0762, + "step": 18250 + }, + { + "epoch": 1.42, + "learning_rate": 5.7832450949859864e-06, + "loss": 0.0728, + "step": 18260 + }, + { + "epoch": 1.42, + "learning_rate": 5.77545935845531e-06, + "loss": 0.0933, + "step": 18270 + }, + { + "epoch": 1.42, + "learning_rate": 5.7676736219246345e-06, + "loss": 0.0765, + "step": 18280 + }, + { + "epoch": 1.42, + "learning_rate": 5.759887885393959e-06, + "loss": 0.0787, + "step": 18290 + }, + { + "epoch": 1.42, + "learning_rate": 5.752102148863283e-06, + "loss": 0.0859, + "step": 18300 + }, + { + "epoch": 1.43, + "learning_rate": 5.744316412332607e-06, + "loss": 0.0879, + "step": 18310 + }, + { + "epoch": 1.43, + "learning_rate": 5.7365306758019315e-06, + "loss": 0.0838, + "step": 18320 + }, + { + "epoch": 1.43, + "learning_rate": 5.728744939271256e-06, + "loss": 0.0876, + "step": 18330 + }, + { + "epoch": 1.43, + "learning_rate": 5.72095920274058e-06, + "loss": 0.0734, + "step": 18340 + }, + { + "epoch": 1.43, + "learning_rate": 5.713173466209904e-06, + "loss": 0.0803, + "step": 18350 + }, + { + "epoch": 1.43, + "learning_rate": 5.705387729679228e-06, + "loss": 0.076, + "step": 18360 + }, + { + "epoch": 1.43, + "learning_rate": 5.697601993148553e-06, + "loss": 0.0562, + "step": 18370 + }, + { + "epoch": 1.43, + "learning_rate": 5.6898162566178765e-06, + "loss": 0.0943, + "step": 18380 + }, + { + "epoch": 1.43, + "learning_rate": 5.682030520087201e-06, + "loss": 0.0991, + "step": 18390 + }, + { + "epoch": 1.43, + "learning_rate": 5.674244783556525e-06, + "loss": 0.0848, + "step": 18400 + }, + { + "epoch": 1.43, + "learning_rate": 5.66645904702585e-06, + "loss": 0.0821, + "step": 18410 + }, + { + "epoch": 1.43, + "learning_rate": 5.658673310495173e-06, + "loss": 0.0894, + "step": 18420 + }, + { + "epoch": 1.43, + "learning_rate": 5.650887573964498e-06, + "loss": 0.0679, + "step": 18430 + }, + { + "epoch": 1.44, + "learning_rate": 5.643101837433822e-06, + "loss": 0.1185, + "step": 18440 + }, + { + "epoch": 1.44, + "learning_rate": 5.635316100903145e-06, + "loss": 0.0814, + "step": 18450 + }, + { + "epoch": 1.44, + "learning_rate": 5.6275303643724695e-06, + "loss": 0.1106, + "step": 18460 + }, + { + "epoch": 1.44, + "learning_rate": 5.619744627841794e-06, + "loss": 0.1136, + "step": 18470 + }, + { + "epoch": 1.44, + "learning_rate": 5.611958891311119e-06, + "loss": 0.0627, + "step": 18480 + }, + { + "epoch": 1.44, + "learning_rate": 5.604173154780442e-06, + "loss": 0.0728, + "step": 18490 + }, + { + "epoch": 1.44, + "learning_rate": 5.5963874182497665e-06, + "loss": 0.0866, + "step": 18500 + }, + { + "epoch": 1.44, + "learning_rate": 5.588601681719091e-06, + "loss": 0.0613, + "step": 18510 + }, + { + "epoch": 1.44, + "learning_rate": 5.5808159451884146e-06, + "loss": 0.0974, + "step": 18520 + }, + { + "epoch": 1.44, + "learning_rate": 5.573030208657739e-06, + "loss": 0.0717, + "step": 18530 + }, + { + "epoch": 1.44, + "learning_rate": 5.5652444721270635e-06, + "loss": 0.0602, + "step": 18540 + }, + { + "epoch": 1.44, + "learning_rate": 5.557458735596388e-06, + "loss": 0.071, + "step": 18550 + }, + { + "epoch": 1.45, + "learning_rate": 5.5496729990657115e-06, + "loss": 0.0881, + "step": 18560 + }, + { + "epoch": 1.45, + "learning_rate": 5.541887262535036e-06, + "loss": 0.0925, + "step": 18570 + }, + { + "epoch": 1.45, + "learning_rate": 5.5341015260043604e-06, + "loss": 0.0794, + "step": 18580 + }, + { + "epoch": 1.45, + "learning_rate": 5.526315789473685e-06, + "loss": 0.06, + "step": 18590 + }, + { + "epoch": 1.45, + "learning_rate": 5.5185300529430085e-06, + "loss": 0.0724, + "step": 18600 + }, + { + "epoch": 1.45, + "learning_rate": 5.510744316412333e-06, + "loss": 0.083, + "step": 18610 + }, + { + "epoch": 1.45, + "learning_rate": 5.502958579881657e-06, + "loss": 0.0701, + "step": 18620 + }, + { + "epoch": 1.45, + "learning_rate": 5.495172843350981e-06, + "loss": 0.0612, + "step": 18630 + }, + { + "epoch": 1.45, + "learning_rate": 5.4873871068203055e-06, + "loss": 0.0599, + "step": 18640 + }, + { + "epoch": 1.45, + "learning_rate": 5.47960137028963e-06, + "loss": 0.108, + "step": 18650 + }, + { + "epoch": 1.45, + "learning_rate": 5.471815633758954e-06, + "loss": 0.0808, + "step": 18660 + }, + { + "epoch": 1.45, + "learning_rate": 5.464029897228278e-06, + "loss": 0.0859, + "step": 18670 + }, + { + "epoch": 1.45, + "learning_rate": 5.456244160697602e-06, + "loss": 0.075, + "step": 18680 + }, + { + "epoch": 1.46, + "learning_rate": 5.448458424166927e-06, + "loss": 0.0868, + "step": 18690 + }, + { + "epoch": 1.46, + "learning_rate": 5.4406726876362505e-06, + "loss": 0.0535, + "step": 18700 + }, + { + "epoch": 1.46, + "learning_rate": 5.432886951105575e-06, + "loss": 0.0705, + "step": 18710 + }, + { + "epoch": 1.46, + "learning_rate": 5.425101214574899e-06, + "loss": 0.0977, + "step": 18720 + }, + { + "epoch": 1.46, + "learning_rate": 5.417315478044224e-06, + "loss": 0.0805, + "step": 18730 + }, + { + "epoch": 1.46, + "learning_rate": 5.4095297415135474e-06, + "loss": 0.0824, + "step": 18740 + }, + { + "epoch": 1.46, + "learning_rate": 5.401744004982872e-06, + "loss": 0.0734, + "step": 18750 + }, + { + "epoch": 1.46, + "learning_rate": 5.393958268452196e-06, + "loss": 0.0822, + "step": 18760 + }, + { + "epoch": 1.46, + "learning_rate": 5.38617253192152e-06, + "loss": 0.0888, + "step": 18770 + }, + { + "epoch": 1.46, + "learning_rate": 5.378386795390844e-06, + "loss": 0.0985, + "step": 18780 + }, + { + "epoch": 1.46, + "learning_rate": 5.370601058860169e-06, + "loss": 0.0934, + "step": 18790 + }, + { + "epoch": 1.46, + "learning_rate": 5.362815322329493e-06, + "loss": 0.0651, + "step": 18800 + }, + { + "epoch": 1.46, + "learning_rate": 5.355029585798817e-06, + "loss": 0.0787, + "step": 18810 + }, + { + "epoch": 1.47, + "learning_rate": 5.347243849268141e-06, + "loss": 0.0637, + "step": 18820 + }, + { + "epoch": 1.47, + "learning_rate": 5.339458112737466e-06, + "loss": 0.0994, + "step": 18830 + }, + { + "epoch": 1.47, + "learning_rate": 5.33167237620679e-06, + "loss": 0.1053, + "step": 18840 + }, + { + "epoch": 1.47, + "learning_rate": 5.323886639676114e-06, + "loss": 0.0963, + "step": 18850 + }, + { + "epoch": 1.47, + "learning_rate": 5.316100903145438e-06, + "loss": 0.0792, + "step": 18860 + }, + { + "epoch": 1.47, + "learning_rate": 5.308315166614763e-06, + "loss": 0.0894, + "step": 18870 + }, + { + "epoch": 1.47, + "learning_rate": 5.3005294300840855e-06, + "loss": 0.0799, + "step": 18880 + }, + { + "epoch": 1.47, + "learning_rate": 5.292743693553411e-06, + "loss": 0.0885, + "step": 18890 + }, + { + "epoch": 1.47, + "learning_rate": 5.284957957022735e-06, + "loss": 0.0689, + "step": 18900 + }, + { + "epoch": 1.47, + "learning_rate": 5.27717222049206e-06, + "loss": 0.0646, + "step": 18910 + }, + { + "epoch": 1.47, + "learning_rate": 5.2693864839613825e-06, + "loss": 0.0984, + "step": 18920 + }, + { + "epoch": 1.47, + "learning_rate": 5.261600747430707e-06, + "loss": 0.0806, + "step": 18930 + }, + { + "epoch": 1.47, + "learning_rate": 5.253815010900032e-06, + "loss": 0.0835, + "step": 18940 + }, + { + "epoch": 1.48, + "learning_rate": 5.246029274369355e-06, + "loss": 0.0663, + "step": 18950 + }, + { + "epoch": 1.48, + "learning_rate": 5.2382435378386795e-06, + "loss": 0.0779, + "step": 18960 + }, + { + "epoch": 1.48, + "learning_rate": 5.230457801308004e-06, + "loss": 0.0765, + "step": 18970 + }, + { + "epoch": 1.48, + "learning_rate": 5.222672064777329e-06, + "loss": 0.0733, + "step": 18980 + }, + { + "epoch": 1.48, + "learning_rate": 5.214886328246652e-06, + "loss": 0.0609, + "step": 18990 + }, + { + "epoch": 1.48, + "learning_rate": 5.207100591715976e-06, + "loss": 0.0666, + "step": 19000 + }, + { + "epoch": 1.48, + "learning_rate": 5.199314855185301e-06, + "loss": 0.0554, + "step": 19010 + }, + { + "epoch": 1.48, + "learning_rate": 5.1915291186546245e-06, + "loss": 0.1113, + "step": 19020 + }, + { + "epoch": 1.48, + "learning_rate": 5.183743382123949e-06, + "loss": 0.0761, + "step": 19030 + }, + { + "epoch": 1.48, + "learning_rate": 5.175957645593273e-06, + "loss": 0.0847, + "step": 19040 + }, + { + "epoch": 1.48, + "learning_rate": 5.168171909062598e-06, + "loss": 0.0709, + "step": 19050 + }, + { + "epoch": 1.48, + "learning_rate": 5.1603861725319214e-06, + "loss": 0.09, + "step": 19060 + }, + { + "epoch": 1.48, + "learning_rate": 5.152600436001246e-06, + "loss": 0.0829, + "step": 19070 + }, + { + "epoch": 1.49, + "learning_rate": 5.14481469947057e-06, + "loss": 0.0785, + "step": 19080 + }, + { + "epoch": 1.49, + "learning_rate": 5.137028962939895e-06, + "loss": 0.083, + "step": 19090 + }, + { + "epoch": 1.49, + "learning_rate": 5.129243226409218e-06, + "loss": 0.0889, + "step": 19100 + }, + { + "epoch": 1.49, + "learning_rate": 5.121457489878543e-06, + "loss": 0.0777, + "step": 19110 + }, + { + "epoch": 1.49, + "learning_rate": 5.113671753347867e-06, + "loss": 0.0712, + "step": 19120 + }, + { + "epoch": 1.49, + "learning_rate": 5.105886016817191e-06, + "loss": 0.0776, + "step": 19130 + }, + { + "epoch": 1.49, + "learning_rate": 5.098100280286515e-06, + "loss": 0.082, + "step": 19140 + }, + { + "epoch": 1.49, + "learning_rate": 5.09031454375584e-06, + "loss": 0.0936, + "step": 19150 + }, + { + "epoch": 1.49, + "learning_rate": 5.082528807225164e-06, + "loss": 0.0829, + "step": 19160 + }, + { + "epoch": 1.49, + "learning_rate": 5.074743070694488e-06, + "loss": 0.0602, + "step": 19170 + }, + { + "epoch": 1.49, + "learning_rate": 5.066957334163812e-06, + "loss": 0.0754, + "step": 19180 + }, + { + "epoch": 1.49, + "learning_rate": 5.059171597633137e-06, + "loss": 0.0742, + "step": 19190 + }, + { + "epoch": 1.49, + "learning_rate": 5.05138586110246e-06, + "loss": 0.0701, + "step": 19200 + }, + { + "epoch": 1.5, + "learning_rate": 5.043600124571785e-06, + "loss": 0.0626, + "step": 19210 + }, + { + "epoch": 1.5, + "learning_rate": 5.035814388041109e-06, + "loss": 0.0862, + "step": 19220 + }, + { + "epoch": 1.5, + "learning_rate": 5.028028651510434e-06, + "loss": 0.0889, + "step": 19230 + }, + { + "epoch": 1.5, + "learning_rate": 5.020242914979757e-06, + "loss": 0.0613, + "step": 19240 + }, + { + "epoch": 1.5, + "learning_rate": 5.012457178449082e-06, + "loss": 0.0709, + "step": 19250 + }, + { + "epoch": 1.5, + "learning_rate": 5.004671441918406e-06, + "loss": 0.0782, + "step": 19260 + }, + { + "epoch": 1.5, + "learning_rate": 4.99688570538773e-06, + "loss": 0.0766, + "step": 19270 + }, + { + "epoch": 1.5, + "learning_rate": 4.989099968857054e-06, + "loss": 0.0827, + "step": 19280 + }, + { + "epoch": 1.5, + "learning_rate": 4.981314232326379e-06, + "loss": 0.0721, + "step": 19290 + }, + { + "epoch": 1.5, + "learning_rate": 4.973528495795702e-06, + "loss": 0.0705, + "step": 19300 + }, + { + "epoch": 1.5, + "learning_rate": 4.965742759265027e-06, + "loss": 0.0664, + "step": 19310 + }, + { + "epoch": 1.5, + "learning_rate": 4.957957022734351e-06, + "loss": 0.0729, + "step": 19320 + }, + { + "epoch": 1.5, + "learning_rate": 4.950171286203676e-06, + "loss": 0.0542, + "step": 19330 + }, + { + "epoch": 1.51, + "learning_rate": 4.942385549672999e-06, + "loss": 0.0768, + "step": 19340 + }, + { + "epoch": 1.51, + "learning_rate": 4.934599813142324e-06, + "loss": 0.0725, + "step": 19350 + }, + { + "epoch": 1.51, + "learning_rate": 4.926814076611648e-06, + "loss": 0.0694, + "step": 19360 + }, + { + "epoch": 1.51, + "learning_rate": 4.919028340080972e-06, + "loss": 0.0858, + "step": 19370 + }, + { + "epoch": 1.51, + "learning_rate": 4.911242603550296e-06, + "loss": 0.072, + "step": 19380 + }, + { + "epoch": 1.51, + "learning_rate": 4.903456867019621e-06, + "loss": 0.0589, + "step": 19390 + }, + { + "epoch": 1.51, + "learning_rate": 4.895671130488945e-06, + "loss": 0.0718, + "step": 19400 + }, + { + "epoch": 1.51, + "learning_rate": 4.887885393958269e-06, + "loss": 0.06, + "step": 19410 + }, + { + "epoch": 1.51, + "learning_rate": 4.880099657427593e-06, + "loss": 0.0718, + "step": 19420 + }, + { + "epoch": 1.51, + "learning_rate": 4.872313920896917e-06, + "loss": 0.0861, + "step": 19430 + }, + { + "epoch": 1.51, + "learning_rate": 4.864528184366241e-06, + "loss": 0.1108, + "step": 19440 + }, + { + "epoch": 1.51, + "learning_rate": 4.856742447835566e-06, + "loss": 0.093, + "step": 19450 + }, + { + "epoch": 1.52, + "learning_rate": 4.848956711304889e-06, + "loss": 0.0642, + "step": 19460 + }, + { + "epoch": 1.52, + "learning_rate": 4.841170974774214e-06, + "loss": 0.06, + "step": 19470 + }, + { + "epoch": 1.52, + "learning_rate": 4.833385238243538e-06, + "loss": 0.0794, + "step": 19480 + }, + { + "epoch": 1.52, + "learning_rate": 4.825599501712863e-06, + "loss": 0.0722, + "step": 19490 + }, + { + "epoch": 1.52, + "learning_rate": 4.817813765182186e-06, + "loss": 0.0821, + "step": 19500 + }, + { + "epoch": 1.52, + "learning_rate": 4.810028028651511e-06, + "loss": 0.0646, + "step": 19510 + }, + { + "epoch": 1.52, + "learning_rate": 4.802242292120835e-06, + "loss": 0.0491, + "step": 19520 + }, + { + "epoch": 1.52, + "learning_rate": 4.794456555590159e-06, + "loss": 0.0593, + "step": 19530 + }, + { + "epoch": 1.52, + "learning_rate": 4.786670819059483e-06, + "loss": 0.0873, + "step": 19540 + }, + { + "epoch": 1.52, + "learning_rate": 4.778885082528808e-06, + "loss": 0.0623, + "step": 19550 + }, + { + "epoch": 1.52, + "learning_rate": 4.771099345998132e-06, + "loss": 0.0547, + "step": 19560 + }, + { + "epoch": 1.52, + "learning_rate": 4.763313609467456e-06, + "loss": 0.0675, + "step": 19570 + }, + { + "epoch": 1.52, + "learning_rate": 4.75552787293678e-06, + "loss": 0.0766, + "step": 19580 + }, + { + "epoch": 1.53, + "learning_rate": 4.747742136406105e-06, + "loss": 0.0863, + "step": 19590 + }, + { + "epoch": 1.53, + "learning_rate": 4.739956399875428e-06, + "loss": 0.0744, + "step": 19600 + }, + { + "epoch": 1.53, + "learning_rate": 4.732170663344753e-06, + "loss": 0.07, + "step": 19610 + }, + { + "epoch": 1.53, + "learning_rate": 4.724384926814077e-06, + "loss": 0.0729, + "step": 19620 + }, + { + "epoch": 1.53, + "learning_rate": 4.716599190283402e-06, + "loss": 0.0503, + "step": 19630 + }, + { + "epoch": 1.53, + "learning_rate": 4.708813453752725e-06, + "loss": 0.1092, + "step": 19640 + }, + { + "epoch": 1.53, + "learning_rate": 4.70102771722205e-06, + "loss": 0.0741, + "step": 19650 + }, + { + "epoch": 1.53, + "learning_rate": 4.693241980691373e-06, + "loss": 0.0693, + "step": 19660 + }, + { + "epoch": 1.53, + "learning_rate": 4.685456244160699e-06, + "loss": 0.0544, + "step": 19670 + }, + { + "epoch": 1.53, + "learning_rate": 4.677670507630022e-06, + "loss": 0.097, + "step": 19680 + }, + { + "epoch": 1.53, + "learning_rate": 4.669884771099346e-06, + "loss": 0.0594, + "step": 19690 + }, + { + "epoch": 1.53, + "learning_rate": 4.66209903456867e-06, + "loss": 0.0591, + "step": 19700 + }, + { + "epoch": 1.53, + "learning_rate": 4.654313298037995e-06, + "loss": 0.0565, + "step": 19710 + }, + { + "epoch": 1.54, + "learning_rate": 4.646527561507319e-06, + "loss": 0.0606, + "step": 19720 + }, + { + "epoch": 1.54, + "learning_rate": 4.638741824976643e-06, + "loss": 0.108, + "step": 19730 + }, + { + "epoch": 1.54, + "learning_rate": 4.630956088445967e-06, + "loss": 0.0692, + "step": 19740 + }, + { + "epoch": 1.54, + "learning_rate": 4.623170351915292e-06, + "loss": 0.065, + "step": 19750 + }, + { + "epoch": 1.54, + "learning_rate": 4.615384615384616e-06, + "loss": 0.0743, + "step": 19760 + }, + { + "epoch": 1.54, + "learning_rate": 4.60759887885394e-06, + "loss": 0.0921, + "step": 19770 + }, + { + "epoch": 1.54, + "learning_rate": 4.599813142323264e-06, + "loss": 0.085, + "step": 19780 + }, + { + "epoch": 1.54, + "learning_rate": 4.592027405792589e-06, + "loss": 0.0714, + "step": 19790 + }, + { + "epoch": 1.54, + "learning_rate": 4.584241669261912e-06, + "loss": 0.0638, + "step": 19800 + }, + { + "epoch": 1.54, + "learning_rate": 4.576455932731237e-06, + "loss": 0.0683, + "step": 19810 + }, + { + "epoch": 1.54, + "learning_rate": 4.568670196200561e-06, + "loss": 0.062, + "step": 19820 + }, + { + "epoch": 1.54, + "learning_rate": 4.560884459669886e-06, + "loss": 0.0723, + "step": 19830 + }, + { + "epoch": 1.54, + "learning_rate": 4.553098723139209e-06, + "loss": 0.0721, + "step": 19840 + }, + { + "epoch": 1.55, + "learning_rate": 4.545312986608534e-06, + "loss": 0.0679, + "step": 19850 + }, + { + "epoch": 1.55, + "learning_rate": 4.537527250077858e-06, + "loss": 0.104, + "step": 19860 + }, + { + "epoch": 1.55, + "learning_rate": 4.529741513547182e-06, + "loss": 0.0858, + "step": 19870 + }, + { + "epoch": 1.55, + "learning_rate": 4.521955777016506e-06, + "loss": 0.1106, + "step": 19880 + }, + { + "epoch": 1.55, + "learning_rate": 4.51417004048583e-06, + "loss": 0.0747, + "step": 19890 + }, + { + "epoch": 1.55, + "learning_rate": 4.506384303955155e-06, + "loss": 0.0624, + "step": 19900 + }, + { + "epoch": 1.55, + "learning_rate": 4.498598567424479e-06, + "loss": 0.0819, + "step": 19910 + }, + { + "epoch": 1.55, + "learning_rate": 4.490812830893803e-06, + "loss": 0.0693, + "step": 19920 + }, + { + "epoch": 1.55, + "learning_rate": 4.483027094363127e-06, + "loss": 0.0673, + "step": 19930 + }, + { + "epoch": 1.55, + "learning_rate": 4.475241357832451e-06, + "loss": 0.0689, + "step": 19940 + }, + { + "epoch": 1.55, + "learning_rate": 4.467455621301776e-06, + "loss": 0.0653, + "step": 19950 + }, + { + "epoch": 1.55, + "learning_rate": 4.459669884771099e-06, + "loss": 0.08, + "step": 19960 + }, + { + "epoch": 1.55, + "learning_rate": 4.451884148240424e-06, + "loss": 0.0639, + "step": 19970 + }, + { + "epoch": 1.56, + "learning_rate": 4.444098411709748e-06, + "loss": 0.0538, + "step": 19980 + }, + { + "epoch": 1.56, + "learning_rate": 4.436312675179073e-06, + "loss": 0.0622, + "step": 19990 + }, + { + "epoch": 1.56, + "learning_rate": 4.428526938648396e-06, + "loss": 0.0897, + "step": 20000 + }, + { + "epoch": 1.56, + "learning_rate": 4.420741202117721e-06, + "loss": 0.0705, + "step": 20010 + }, + { + "epoch": 1.56, + "learning_rate": 4.412955465587045e-06, + "loss": 0.0757, + "step": 20020 + }, + { + "epoch": 1.56, + "learning_rate": 4.405169729056369e-06, + "loss": 0.0572, + "step": 20030 + }, + { + "epoch": 1.56, + "learning_rate": 4.397383992525693e-06, + "loss": 0.0789, + "step": 20040 + }, + { + "epoch": 1.56, + "learning_rate": 4.389598255995018e-06, + "loss": 0.059, + "step": 20050 + }, + { + "epoch": 1.56, + "learning_rate": 4.381812519464342e-06, + "loss": 0.0831, + "step": 20060 + }, + { + "epoch": 1.56, + "learning_rate": 4.374026782933666e-06, + "loss": 0.0875, + "step": 20070 + }, + { + "epoch": 1.56, + "learning_rate": 4.36624104640299e-06, + "loss": 0.0861, + "step": 20080 + }, + { + "epoch": 1.56, + "learning_rate": 4.358455309872315e-06, + "loss": 0.0651, + "step": 20090 + }, + { + "epoch": 1.56, + "learning_rate": 4.350669573341639e-06, + "loss": 0.0564, + "step": 20100 + }, + { + "epoch": 1.57, + "learning_rate": 4.342883836810963e-06, + "loss": 0.0737, + "step": 20110 + }, + { + "epoch": 1.57, + "learning_rate": 4.335098100280286e-06, + "loss": 0.076, + "step": 20120 + }, + { + "epoch": 1.57, + "learning_rate": 4.3273123637496116e-06, + "loss": 0.0597, + "step": 20130 + }, + { + "epoch": 1.57, + "learning_rate": 4.319526627218935e-06, + "loss": 0.1011, + "step": 20140 + }, + { + "epoch": 1.57, + "learning_rate": 4.31174089068826e-06, + "loss": 0.0547, + "step": 20150 + }, + { + "epoch": 1.57, + "learning_rate": 4.303955154157583e-06, + "loss": 0.0597, + "step": 20160 + }, + { + "epoch": 1.57, + "learning_rate": 4.296169417626908e-06, + "loss": 0.0533, + "step": 20170 + }, + { + "epoch": 1.57, + "learning_rate": 4.288383681096232e-06, + "loss": 0.0823, + "step": 20180 + }, + { + "epoch": 1.57, + "learning_rate": 4.280597944565556e-06, + "loss": 0.0749, + "step": 20190 + }, + { + "epoch": 1.57, + "learning_rate": 4.27281220803488e-06, + "loss": 0.0603, + "step": 20200 + }, + { + "epoch": 1.57, + "learning_rate": 4.265026471504205e-06, + "loss": 0.0547, + "step": 20210 + }, + { + "epoch": 1.57, + "learning_rate": 4.257240734973529e-06, + "loss": 0.0781, + "step": 20220 + }, + { + "epoch": 1.58, + "learning_rate": 4.249454998442853e-06, + "loss": 0.0612, + "step": 20230 + }, + { + "epoch": 1.58, + "learning_rate": 4.241669261912177e-06, + "loss": 0.0592, + "step": 20240 + }, + { + "epoch": 1.58, + "learning_rate": 4.233883525381502e-06, + "loss": 0.0739, + "step": 20250 + }, + { + "epoch": 1.58, + "learning_rate": 4.226097788850826e-06, + "loss": 0.074, + "step": 20260 + }, + { + "epoch": 1.58, + "learning_rate": 4.21831205232015e-06, + "loss": 0.0622, + "step": 20270 + }, + { + "epoch": 1.58, + "learning_rate": 4.210526315789474e-06, + "loss": 0.0701, + "step": 20280 + }, + { + "epoch": 1.58, + "learning_rate": 4.2027405792587986e-06, + "loss": 0.0865, + "step": 20290 + }, + { + "epoch": 1.58, + "learning_rate": 4.194954842728122e-06, + "loss": 0.073, + "step": 20300 + }, + { + "epoch": 1.58, + "learning_rate": 4.187169106197447e-06, + "loss": 0.07, + "step": 20310 + }, + { + "epoch": 1.58, + "learning_rate": 4.179383369666771e-06, + "loss": 0.0903, + "step": 20320 + }, + { + "epoch": 1.58, + "learning_rate": 4.1715976331360955e-06, + "loss": 0.0775, + "step": 20330 + }, + { + "epoch": 1.58, + "learning_rate": 4.163811896605419e-06, + "loss": 0.0615, + "step": 20340 + }, + { + "epoch": 1.58, + "learning_rate": 4.156026160074744e-06, + "loss": 0.0602, + "step": 20350 + }, + { + "epoch": 1.59, + "learning_rate": 4.148240423544068e-06, + "loss": 0.0716, + "step": 20360 + }, + { + "epoch": 1.59, + "learning_rate": 4.140454687013392e-06, + "loss": 0.0571, + "step": 20370 + }, + { + "epoch": 1.59, + "learning_rate": 4.132668950482716e-06, + "loss": 0.0653, + "step": 20380 + }, + { + "epoch": 1.59, + "learning_rate": 4.12488321395204e-06, + "loss": 0.0731, + "step": 20390 + }, + { + "epoch": 1.59, + "learning_rate": 4.117097477421364e-06, + "loss": 0.0984, + "step": 20400 + }, + { + "epoch": 1.59, + "learning_rate": 4.109311740890689e-06, + "loss": 0.0659, + "step": 20410 + }, + { + "epoch": 1.59, + "learning_rate": 4.101526004360013e-06, + "loss": 0.0587, + "step": 20420 + }, + { + "epoch": 1.59, + "learning_rate": 4.093740267829337e-06, + "loss": 0.0577, + "step": 20430 + }, + { + "epoch": 1.59, + "learning_rate": 4.085954531298661e-06, + "loss": 0.0841, + "step": 20440 + }, + { + "epoch": 1.59, + "learning_rate": 4.0781687947679856e-06, + "loss": 0.0606, + "step": 20450 + }, + { + "epoch": 1.59, + "learning_rate": 4.070383058237309e-06, + "loss": 0.0598, + "step": 20460 + }, + { + "epoch": 1.59, + "learning_rate": 4.062597321706634e-06, + "loss": 0.066, + "step": 20470 + }, + { + "epoch": 1.59, + "learning_rate": 4.054811585175958e-06, + "loss": 0.0769, + "step": 20480 + }, + { + "epoch": 1.6, + "learning_rate": 4.0470258486452825e-06, + "loss": 0.1022, + "step": 20490 + }, + { + "epoch": 1.6, + "learning_rate": 4.039240112114606e-06, + "loss": 0.0523, + "step": 20500 + }, + { + "epoch": 1.6, + "learning_rate": 4.031454375583931e-06, + "loss": 0.057, + "step": 20510 + }, + { + "epoch": 1.6, + "learning_rate": 4.023668639053255e-06, + "loss": 0.0895, + "step": 20520 + }, + { + "epoch": 1.6, + "learning_rate": 4.015882902522579e-06, + "loss": 0.0652, + "step": 20530 + }, + { + "epoch": 1.6, + "learning_rate": 4.008097165991903e-06, + "loss": 0.0703, + "step": 20540 + }, + { + "epoch": 1.6, + "learning_rate": 4.0003114294612276e-06, + "loss": 0.0669, + "step": 20550 + }, + { + "epoch": 1.6, + "learning_rate": 3.992525692930552e-06, + "loss": 0.0819, + "step": 20560 + }, + { + "epoch": 1.6, + "learning_rate": 3.984739956399876e-06, + "loss": 0.0562, + "step": 20570 + }, + { + "epoch": 1.6, + "learning_rate": 3.9769542198692e-06, + "loss": 0.0699, + "step": 20580 + }, + { + "epoch": 1.6, + "learning_rate": 3.9691684833385245e-06, + "loss": 0.0727, + "step": 20590 + }, + { + "epoch": 1.6, + "learning_rate": 3.961382746807849e-06, + "loss": 0.0525, + "step": 20600 + }, + { + "epoch": 1.6, + "learning_rate": 3.9535970102771726e-06, + "loss": 0.1218, + "step": 20610 + }, + { + "epoch": 1.61, + "learning_rate": 3.945811273746496e-06, + "loss": 0.0535, + "step": 20620 + }, + { + "epoch": 1.61, + "learning_rate": 3.938025537215821e-06, + "loss": 0.0886, + "step": 20630 + }, + { + "epoch": 1.61, + "learning_rate": 3.930239800685145e-06, + "loss": 0.0605, + "step": 20640 + }, + { + "epoch": 1.61, + "learning_rate": 3.9224540641544695e-06, + "loss": 0.0641, + "step": 20650 + }, + { + "epoch": 1.61, + "learning_rate": 3.914668327623793e-06, + "loss": 0.0815, + "step": 20660 + }, + { + "epoch": 1.61, + "learning_rate": 3.906882591093118e-06, + "loss": 0.0974, + "step": 20670 + }, + { + "epoch": 1.61, + "learning_rate": 3.899096854562442e-06, + "loss": 0.0513, + "step": 20680 + }, + { + "epoch": 1.61, + "learning_rate": 3.891311118031766e-06, + "loss": 0.0919, + "step": 20690 + }, + { + "epoch": 1.61, + "learning_rate": 3.88352538150109e-06, + "loss": 0.0689, + "step": 20700 + }, + { + "epoch": 1.61, + "learning_rate": 3.8757396449704146e-06, + "loss": 0.0691, + "step": 20710 + }, + { + "epoch": 1.61, + "learning_rate": 3.867953908439739e-06, + "loss": 0.0676, + "step": 20720 + }, + { + "epoch": 1.61, + "learning_rate": 3.860168171909063e-06, + "loss": 0.0675, + "step": 20730 + }, + { + "epoch": 1.61, + "learning_rate": 3.852382435378387e-06, + "loss": 0.0807, + "step": 20740 + }, + { + "epoch": 1.62, + "learning_rate": 3.8445966988477115e-06, + "loss": 0.0686, + "step": 20750 + }, + { + "epoch": 1.62, + "learning_rate": 3.836810962317036e-06, + "loss": 0.0761, + "step": 20760 + }, + { + "epoch": 1.62, + "learning_rate": 3.8290252257863596e-06, + "loss": 0.0639, + "step": 20770 + }, + { + "epoch": 1.62, + "learning_rate": 3.821239489255684e-06, + "loss": 0.0606, + "step": 20780 + }, + { + "epoch": 1.62, + "learning_rate": 3.813453752725008e-06, + "loss": 0.0722, + "step": 20790 + }, + { + "epoch": 1.62, + "learning_rate": 3.805668016194332e-06, + "loss": 0.0785, + "step": 20800 + }, + { + "epoch": 1.62, + "learning_rate": 3.7978822796636565e-06, + "loss": 0.0789, + "step": 20810 + }, + { + "epoch": 1.62, + "learning_rate": 3.7900965431329806e-06, + "loss": 0.0773, + "step": 20820 + }, + { + "epoch": 1.62, + "learning_rate": 3.782310806602305e-06, + "loss": 0.0637, + "step": 20830 + }, + { + "epoch": 1.62, + "learning_rate": 3.774525070071629e-06, + "loss": 0.0512, + "step": 20840 + }, + { + "epoch": 1.62, + "learning_rate": 3.7667393335409535e-06, + "loss": 0.0735, + "step": 20850 + }, + { + "epoch": 1.62, + "learning_rate": 3.7589535970102775e-06, + "loss": 0.0617, + "step": 20860 + }, + { + "epoch": 1.62, + "learning_rate": 3.7511678604796016e-06, + "loss": 0.0548, + "step": 20870 + }, + { + "epoch": 1.63, + "learning_rate": 3.743382123948926e-06, + "loss": 0.0762, + "step": 20880 + }, + { + "epoch": 1.63, + "learning_rate": 3.73559638741825e-06, + "loss": 0.0502, + "step": 20890 + }, + { + "epoch": 1.63, + "learning_rate": 3.7278106508875745e-06, + "loss": 0.0757, + "step": 20900 + }, + { + "epoch": 1.63, + "learning_rate": 3.7200249143568985e-06, + "loss": 0.0631, + "step": 20910 + }, + { + "epoch": 1.63, + "learning_rate": 3.712239177826223e-06, + "loss": 0.0561, + "step": 20920 + }, + { + "epoch": 1.63, + "learning_rate": 3.704453441295547e-06, + "loss": 0.0517, + "step": 20930 + }, + { + "epoch": 1.63, + "learning_rate": 3.6966677047648706e-06, + "loss": 0.048, + "step": 20940 + }, + { + "epoch": 1.63, + "learning_rate": 3.6888819682341955e-06, + "loss": 0.0684, + "step": 20950 + }, + { + "epoch": 1.63, + "learning_rate": 3.681096231703519e-06, + "loss": 0.0685, + "step": 20960 + }, + { + "epoch": 1.63, + "learning_rate": 3.673310495172844e-06, + "loss": 0.0593, + "step": 20970 + }, + { + "epoch": 1.63, + "learning_rate": 3.6655247586421676e-06, + "loss": 0.0667, + "step": 20980 + }, + { + "epoch": 1.63, + "learning_rate": 3.6577390221114924e-06, + "loss": 0.068, + "step": 20990 + }, + { + "epoch": 1.64, + "learning_rate": 3.649953285580816e-06, + "loss": 0.059, + "step": 21000 + }, + { + "epoch": 1.64, + "learning_rate": 3.6421675490501405e-06, + "loss": 0.0627, + "step": 21010 + }, + { + "epoch": 1.64, + "learning_rate": 3.6343818125194645e-06, + "loss": 0.0708, + "step": 21020 + }, + { + "epoch": 1.64, + "learning_rate": 3.6265960759887886e-06, + "loss": 0.0581, + "step": 21030 + }, + { + "epoch": 1.64, + "learning_rate": 3.618810339458113e-06, + "loss": 0.0885, + "step": 21040 + }, + { + "epoch": 1.64, + "learning_rate": 3.611024602927437e-06, + "loss": 0.0644, + "step": 21050 + }, + { + "epoch": 1.64, + "learning_rate": 3.6032388663967615e-06, + "loss": 0.0666, + "step": 21060 + }, + { + "epoch": 1.64, + "learning_rate": 3.5954531298660855e-06, + "loss": 0.0569, + "step": 21070 + }, + { + "epoch": 1.64, + "learning_rate": 3.58766739333541e-06, + "loss": 0.0582, + "step": 21080 + }, + { + "epoch": 1.64, + "learning_rate": 3.579881656804734e-06, + "loss": 0.0694, + "step": 21090 + }, + { + "epoch": 1.64, + "learning_rate": 3.5720959202740585e-06, + "loss": 0.075, + "step": 21100 + }, + { + "epoch": 1.64, + "learning_rate": 3.5643101837433825e-06, + "loss": 0.0667, + "step": 21110 + }, + { + "epoch": 1.64, + "learning_rate": 3.5565244472127065e-06, + "loss": 0.047, + "step": 21120 + }, + { + "epoch": 1.65, + "learning_rate": 3.548738710682031e-06, + "loss": 0.0575, + "step": 21130 + }, + { + "epoch": 1.65, + "learning_rate": 3.540952974151355e-06, + "loss": 0.0633, + "step": 21140 + }, + { + "epoch": 1.65, + "learning_rate": 3.5331672376206794e-06, + "loss": 0.0762, + "step": 21150 + }, + { + "epoch": 1.65, + "learning_rate": 3.5253815010900035e-06, + "loss": 0.0584, + "step": 21160 + }, + { + "epoch": 1.65, + "learning_rate": 3.517595764559328e-06, + "loss": 0.0767, + "step": 21170 + }, + { + "epoch": 1.65, + "learning_rate": 3.509810028028652e-06, + "loss": 0.0565, + "step": 21180 + }, + { + "epoch": 1.65, + "learning_rate": 3.5020242914979756e-06, + "loss": 0.0597, + "step": 21190 + }, + { + "epoch": 1.65, + "learning_rate": 3.4942385549673004e-06, + "loss": 0.0615, + "step": 21200 + }, + { + "epoch": 1.65, + "learning_rate": 3.486452818436624e-06, + "loss": 0.0759, + "step": 21210 + }, + { + "epoch": 1.65, + "learning_rate": 3.478667081905949e-06, + "loss": 0.0709, + "step": 21220 + }, + { + "epoch": 1.65, + "learning_rate": 3.4708813453752725e-06, + "loss": 0.0482, + "step": 21230 + }, + { + "epoch": 1.65, + "learning_rate": 3.463095608844597e-06, + "loss": 0.0536, + "step": 21240 + }, + { + "epoch": 1.65, + "learning_rate": 3.455309872313921e-06, + "loss": 0.0625, + "step": 21250 + }, + { + "epoch": 1.66, + "learning_rate": 3.4475241357832455e-06, + "loss": 0.0656, + "step": 21260 + }, + { + "epoch": 1.66, + "learning_rate": 3.4397383992525695e-06, + "loss": 0.0585, + "step": 21270 + }, + { + "epoch": 1.66, + "learning_rate": 3.4319526627218935e-06, + "loss": 0.0724, + "step": 21280 + }, + { + "epoch": 1.66, + "learning_rate": 3.424166926191218e-06, + "loss": 0.0817, + "step": 21290 + }, + { + "epoch": 1.66, + "learning_rate": 3.416381189660542e-06, + "loss": 0.0846, + "step": 21300 + }, + { + "epoch": 1.66, + "learning_rate": 3.4085954531298664e-06, + "loss": 0.0793, + "step": 21310 + }, + { + "epoch": 1.66, + "learning_rate": 3.4008097165991905e-06, + "loss": 0.0558, + "step": 21320 + }, + { + "epoch": 1.66, + "learning_rate": 3.393023980068515e-06, + "loss": 0.0817, + "step": 21330 + }, + { + "epoch": 1.66, + "learning_rate": 3.385238243537839e-06, + "loss": 0.0643, + "step": 21340 + }, + { + "epoch": 1.66, + "learning_rate": 3.3774525070071634e-06, + "loss": 0.0625, + "step": 21350 + }, + { + "epoch": 1.66, + "learning_rate": 3.3696667704764874e-06, + "loss": 0.052, + "step": 21360 + }, + { + "epoch": 1.66, + "learning_rate": 3.3618810339458115e-06, + "loss": 0.0811, + "step": 21370 + }, + { + "epoch": 1.66, + "learning_rate": 3.354095297415136e-06, + "loss": 0.0507, + "step": 21380 + }, + { + "epoch": 1.67, + "learning_rate": 3.34630956088446e-06, + "loss": 0.0736, + "step": 21390 + }, + { + "epoch": 1.67, + "learning_rate": 3.3385238243537844e-06, + "loss": 0.0615, + "step": 21400 + }, + { + "epoch": 1.67, + "learning_rate": 3.3307380878231084e-06, + "loss": 0.0487, + "step": 21410 + }, + { + "epoch": 1.67, + "learning_rate": 3.322952351292433e-06, + "loss": 0.0606, + "step": 21420 + }, + { + "epoch": 1.67, + "learning_rate": 3.315166614761757e-06, + "loss": 0.06, + "step": 21430 + }, + { + "epoch": 1.67, + "learning_rate": 3.3073808782310805e-06, + "loss": 0.0644, + "step": 21440 + }, + { + "epoch": 1.67, + "learning_rate": 3.2995951417004054e-06, + "loss": 0.0583, + "step": 21450 + }, + { + "epoch": 1.67, + "learning_rate": 3.291809405169729e-06, + "loss": 0.0763, + "step": 21460 + }, + { + "epoch": 1.67, + "learning_rate": 3.2840236686390534e-06, + "loss": 0.059, + "step": 21470 + }, + { + "epoch": 1.67, + "learning_rate": 3.2762379321083775e-06, + "loss": 0.0718, + "step": 21480 + }, + { + "epoch": 1.67, + "learning_rate": 3.268452195577702e-06, + "loss": 0.0708, + "step": 21490 + }, + { + "epoch": 1.67, + "learning_rate": 3.260666459047026e-06, + "loss": 0.08, + "step": 21500 + }, + { + "epoch": 1.67, + "learning_rate": 3.2528807225163504e-06, + "loss": 0.069, + "step": 21510 + }, + { + "epoch": 1.68, + "learning_rate": 3.2450949859856744e-06, + "loss": 0.0592, + "step": 21520 + }, + { + "epoch": 1.68, + "learning_rate": 3.2373092494549985e-06, + "loss": 0.0589, + "step": 21530 + }, + { + "epoch": 1.68, + "learning_rate": 3.229523512924323e-06, + "loss": 0.0703, + "step": 21540 + }, + { + "epoch": 1.68, + "learning_rate": 3.221737776393647e-06, + "loss": 0.0549, + "step": 21550 + }, + { + "epoch": 1.68, + "learning_rate": 3.2139520398629714e-06, + "loss": 0.0663, + "step": 21560 + }, + { + "epoch": 1.68, + "learning_rate": 3.2061663033322954e-06, + "loss": 0.0913, + "step": 21570 + }, + { + "epoch": 1.68, + "learning_rate": 3.19838056680162e-06, + "loss": 0.0677, + "step": 21580 + }, + { + "epoch": 1.68, + "learning_rate": 3.190594830270944e-06, + "loss": 0.0821, + "step": 21590 + }, + { + "epoch": 1.68, + "learning_rate": 3.1828090937402684e-06, + "loss": 0.0569, + "step": 21600 + }, + { + "epoch": 1.68, + "learning_rate": 3.1750233572095924e-06, + "loss": 0.0686, + "step": 21610 + }, + { + "epoch": 1.68, + "learning_rate": 3.1672376206789164e-06, + "loss": 0.0636, + "step": 21620 + }, + { + "epoch": 1.68, + "learning_rate": 3.159451884148241e-06, + "loss": 0.0526, + "step": 21630 + }, + { + "epoch": 1.68, + "learning_rate": 3.151666147617565e-06, + "loss": 0.0579, + "step": 21640 + }, + { + "epoch": 1.69, + "learning_rate": 3.1438804110868893e-06, + "loss": 0.0526, + "step": 21650 + }, + { + "epoch": 1.69, + "learning_rate": 3.1360946745562134e-06, + "loss": 0.0538, + "step": 21660 + }, + { + "epoch": 1.69, + "learning_rate": 3.128308938025538e-06, + "loss": 0.0636, + "step": 21670 + }, + { + "epoch": 1.69, + "learning_rate": 3.120523201494862e-06, + "loss": 0.0528, + "step": 21680 + }, + { + "epoch": 1.69, + "learning_rate": 3.1127374649641855e-06, + "loss": 0.0524, + "step": 21690 + }, + { + "epoch": 1.69, + "learning_rate": 3.10495172843351e-06, + "loss": 0.0692, + "step": 21700 + }, + { + "epoch": 1.69, + "learning_rate": 3.097165991902834e-06, + "loss": 0.059, + "step": 21710 + }, + { + "epoch": 1.69, + "learning_rate": 3.0893802553721584e-06, + "loss": 0.0491, + "step": 21720 + }, + { + "epoch": 1.69, + "learning_rate": 3.0815945188414824e-06, + "loss": 0.0622, + "step": 21730 + }, + { + "epoch": 1.69, + "learning_rate": 3.073808782310807e-06, + "loss": 0.0707, + "step": 21740 + }, + { + "epoch": 1.69, + "learning_rate": 3.066023045780131e-06, + "loss": 0.0717, + "step": 21750 + }, + { + "epoch": 1.69, + "learning_rate": 3.0582373092494554e-06, + "loss": 0.0597, + "step": 21760 + }, + { + "epoch": 1.69, + "learning_rate": 3.0504515727187794e-06, + "loss": 0.0566, + "step": 21770 + }, + { + "epoch": 1.7, + "learning_rate": 3.0426658361881034e-06, + "loss": 0.0611, + "step": 21780 + }, + { + "epoch": 1.7, + "learning_rate": 3.034880099657428e-06, + "loss": 0.0599, + "step": 21790 + }, + { + "epoch": 1.7, + "learning_rate": 3.027094363126752e-06, + "loss": 0.0624, + "step": 21800 + }, + { + "epoch": 1.7, + "learning_rate": 3.0193086265960763e-06, + "loss": 0.0705, + "step": 21810 + }, + { + "epoch": 1.7, + "learning_rate": 3.0115228900654004e-06, + "loss": 0.0663, + "step": 21820 + }, + { + "epoch": 1.7, + "learning_rate": 3.003737153534725e-06, + "loss": 0.0571, + "step": 21830 + }, + { + "epoch": 1.7, + "learning_rate": 2.995951417004049e-06, + "loss": 0.0521, + "step": 21840 + }, + { + "epoch": 1.7, + "learning_rate": 2.9881656804733733e-06, + "loss": 0.072, + "step": 21850 + }, + { + "epoch": 1.7, + "learning_rate": 2.9803799439426973e-06, + "loss": 0.0653, + "step": 21860 + }, + { + "epoch": 1.7, + "learning_rate": 2.9725942074120214e-06, + "loss": 0.0642, + "step": 21870 + }, + { + "epoch": 1.7, + "learning_rate": 2.964808470881346e-06, + "loss": 0.0493, + "step": 21880 + }, + { + "epoch": 1.7, + "learning_rate": 2.95702273435067e-06, + "loss": 0.0645, + "step": 21890 + }, + { + "epoch": 1.71, + "learning_rate": 2.9492369978199943e-06, + "loss": 0.0505, + "step": 21900 + }, + { + "epoch": 1.71, + "learning_rate": 2.9414512612893183e-06, + "loss": 0.0873, + "step": 21910 + }, + { + "epoch": 1.71, + "learning_rate": 2.9336655247586428e-06, + "loss": 0.0579, + "step": 21920 + }, + { + "epoch": 1.71, + "learning_rate": 2.9258797882279664e-06, + "loss": 0.088, + "step": 21930 + }, + { + "epoch": 1.71, + "learning_rate": 2.9180940516972904e-06, + "loss": 0.0714, + "step": 21940 + }, + { + "epoch": 1.71, + "learning_rate": 2.910308315166615e-06, + "loss": 0.0774, + "step": 21950 + }, + { + "epoch": 1.71, + "learning_rate": 2.902522578635939e-06, + "loss": 0.0496, + "step": 21960 + }, + { + "epoch": 1.71, + "learning_rate": 2.8947368421052634e-06, + "loss": 0.0519, + "step": 21970 + }, + { + "epoch": 1.71, + "learning_rate": 2.8869511055745874e-06, + "loss": 0.0596, + "step": 21980 + }, + { + "epoch": 1.71, + "learning_rate": 2.879165369043912e-06, + "loss": 0.0627, + "step": 21990 + }, + { + "epoch": 1.71, + "learning_rate": 2.871379632513236e-06, + "loss": 0.0714, + "step": 22000 + }, + { + "epoch": 1.71, + "learning_rate": 2.8635938959825603e-06, + "loss": 0.0571, + "step": 22010 + }, + { + "epoch": 1.71, + "learning_rate": 2.8558081594518843e-06, + "loss": 0.0609, + "step": 22020 + }, + { + "epoch": 1.72, + "learning_rate": 2.8480224229212084e-06, + "loss": 0.0658, + "step": 22030 + }, + { + "epoch": 1.72, + "learning_rate": 2.840236686390533e-06, + "loss": 0.0533, + "step": 22040 + }, + { + "epoch": 1.72, + "learning_rate": 2.832450949859857e-06, + "loss": 0.0456, + "step": 22050 + }, + { + "epoch": 1.72, + "learning_rate": 2.8246652133291813e-06, + "loss": 0.06, + "step": 22060 + }, + { + "epoch": 1.72, + "learning_rate": 2.8168794767985053e-06, + "loss": 0.0509, + "step": 22070 + }, + { + "epoch": 1.72, + "learning_rate": 2.8090937402678298e-06, + "loss": 0.0561, + "step": 22080 + }, + { + "epoch": 1.72, + "learning_rate": 2.801308003737154e-06, + "loss": 0.0552, + "step": 22090 + }, + { + "epoch": 1.72, + "learning_rate": 2.7935222672064783e-06, + "loss": 0.0636, + "step": 22100 + }, + { + "epoch": 1.72, + "learning_rate": 2.7857365306758023e-06, + "loss": 0.0457, + "step": 22110 + }, + { + "epoch": 1.72, + "learning_rate": 2.7779507941451263e-06, + "loss": 0.0634, + "step": 22120 + }, + { + "epoch": 1.72, + "learning_rate": 2.7701650576144508e-06, + "loss": 0.0594, + "step": 22130 + }, + { + "epoch": 1.72, + "learning_rate": 2.762379321083775e-06, + "loss": 0.0687, + "step": 22140 + }, + { + "epoch": 1.72, + "learning_rate": 2.7545935845530993e-06, + "loss": 0.0634, + "step": 22150 + }, + { + "epoch": 1.73, + "learning_rate": 2.746807848022423e-06, + "loss": 0.0563, + "step": 22160 + }, + { + "epoch": 1.73, + "learning_rate": 2.7390221114917477e-06, + "loss": 0.0745, + "step": 22170 + }, + { + "epoch": 1.73, + "learning_rate": 2.7312363749610713e-06, + "loss": 0.0735, + "step": 22180 + }, + { + "epoch": 1.73, + "learning_rate": 2.7234506384303954e-06, + "loss": 0.0593, + "step": 22190 + }, + { + "epoch": 1.73, + "learning_rate": 2.71566490189972e-06, + "loss": 0.0509, + "step": 22200 + }, + { + "epoch": 1.73, + "learning_rate": 2.707879165369044e-06, + "loss": 0.0707, + "step": 22210 + }, + { + "epoch": 1.73, + "learning_rate": 2.7000934288383683e-06, + "loss": 0.0653, + "step": 22220 + }, + { + "epoch": 1.73, + "learning_rate": 2.6923076923076923e-06, + "loss": 0.0514, + "step": 22230 + }, + { + "epoch": 1.73, + "learning_rate": 2.6845219557770168e-06, + "loss": 0.0593, + "step": 22240 + }, + { + "epoch": 1.73, + "learning_rate": 2.676736219246341e-06, + "loss": 0.043, + "step": 22250 + }, + { + "epoch": 1.73, + "learning_rate": 2.6689504827156653e-06, + "loss": 0.0839, + "step": 22260 + }, + { + "epoch": 1.73, + "learning_rate": 2.6611647461849893e-06, + "loss": 0.0555, + "step": 22270 + }, + { + "epoch": 1.73, + "learning_rate": 2.6533790096543133e-06, + "loss": 0.0646, + "step": 22280 + }, + { + "epoch": 1.74, + "learning_rate": 2.6455932731236378e-06, + "loss": 0.063, + "step": 22290 + }, + { + "epoch": 1.74, + "learning_rate": 2.637807536592962e-06, + "loss": 0.0461, + "step": 22300 + }, + { + "epoch": 1.74, + "learning_rate": 2.6300218000622863e-06, + "loss": 0.0611, + "step": 22310 + }, + { + "epoch": 1.74, + "learning_rate": 2.6222360635316103e-06, + "loss": 0.0546, + "step": 22320 + }, + { + "epoch": 1.74, + "learning_rate": 2.6144503270009347e-06, + "loss": 0.0607, + "step": 22330 + }, + { + "epoch": 1.74, + "learning_rate": 2.6066645904702588e-06, + "loss": 0.0552, + "step": 22340 + }, + { + "epoch": 1.74, + "learning_rate": 2.5988788539395832e-06, + "loss": 0.0628, + "step": 22350 + }, + { + "epoch": 1.74, + "learning_rate": 2.5910931174089072e-06, + "loss": 0.0504, + "step": 22360 + }, + { + "epoch": 1.74, + "learning_rate": 2.5833073808782313e-06, + "loss": 0.0537, + "step": 22370 + }, + { + "epoch": 1.74, + "learning_rate": 2.5755216443475557e-06, + "loss": 0.0548, + "step": 22380 + }, + { + "epoch": 1.74, + "learning_rate": 2.5677359078168793e-06, + "loss": 0.0621, + "step": 22390 + }, + { + "epoch": 1.74, + "learning_rate": 2.559950171286204e-06, + "loss": 0.0773, + "step": 22400 + }, + { + "epoch": 1.74, + "learning_rate": 2.552164434755528e-06, + "loss": 0.0585, + "step": 22410 + }, + { + "epoch": 1.75, + "learning_rate": 2.5443786982248527e-06, + "loss": 0.0711, + "step": 22420 + }, + { + "epoch": 1.75, + "learning_rate": 2.5365929616941763e-06, + "loss": 0.0469, + "step": 22430 + }, + { + "epoch": 1.75, + "learning_rate": 2.5288072251635003e-06, + "loss": 0.0724, + "step": 22440 + }, + { + "epoch": 1.75, + "learning_rate": 2.5210214886328248e-06, + "loss": 0.0616, + "step": 22450 + }, + { + "epoch": 1.75, + "learning_rate": 2.513235752102149e-06, + "loss": 0.0506, + "step": 22460 + }, + { + "epoch": 1.75, + "learning_rate": 2.5054500155714733e-06, + "loss": 0.0653, + "step": 22470 + }, + { + "epoch": 1.75, + "learning_rate": 2.4976642790407973e-06, + "loss": 0.0543, + "step": 22480 + }, + { + "epoch": 1.75, + "learning_rate": 2.4898785425101217e-06, + "loss": 0.0484, + "step": 22490 + }, + { + "epoch": 1.75, + "learning_rate": 2.4820928059794458e-06, + "loss": 0.0549, + "step": 22500 + }, + { + "epoch": 1.75, + "learning_rate": 2.4743070694487702e-06, + "loss": 0.0575, + "step": 22510 + }, + { + "epoch": 1.75, + "learning_rate": 2.4665213329180942e-06, + "loss": 0.0619, + "step": 22520 + }, + { + "epoch": 1.75, + "learning_rate": 2.4587355963874183e-06, + "loss": 0.0482, + "step": 22530 + }, + { + "epoch": 1.75, + "learning_rate": 2.4509498598567427e-06, + "loss": 0.0511, + "step": 22540 + }, + { + "epoch": 1.76, + "learning_rate": 2.4431641233260668e-06, + "loss": 0.0605, + "step": 22550 + }, + { + "epoch": 1.76, + "learning_rate": 2.435378386795391e-06, + "loss": 0.0646, + "step": 22560 + }, + { + "epoch": 1.76, + "learning_rate": 2.4275926502647152e-06, + "loss": 0.0593, + "step": 22570 + }, + { + "epoch": 1.76, + "learning_rate": 2.4198069137340393e-06, + "loss": 0.0577, + "step": 22580 + }, + { + "epoch": 1.76, + "learning_rate": 2.4120211772033637e-06, + "loss": 0.0608, + "step": 22590 + }, + { + "epoch": 1.76, + "learning_rate": 2.4042354406726877e-06, + "loss": 0.0519, + "step": 22600 + }, + { + "epoch": 1.76, + "learning_rate": 2.396449704142012e-06, + "loss": 0.049, + "step": 22610 + }, + { + "epoch": 1.76, + "learning_rate": 2.3886639676113362e-06, + "loss": 0.0477, + "step": 22620 + }, + { + "epoch": 1.76, + "learning_rate": 2.3808782310806607e-06, + "loss": 0.0564, + "step": 22630 + }, + { + "epoch": 1.76, + "learning_rate": 2.3730924945499847e-06, + "loss": 0.05, + "step": 22640 + }, + { + "epoch": 1.76, + "learning_rate": 2.3653067580193087e-06, + "loss": 0.06, + "step": 22650 + }, + { + "epoch": 1.76, + "learning_rate": 2.3575210214886328e-06, + "loss": 0.0576, + "step": 22660 + }, + { + "epoch": 1.77, + "learning_rate": 2.3497352849579572e-06, + "loss": 0.072, + "step": 22670 + }, + { + "epoch": 1.77, + "learning_rate": 2.3419495484272812e-06, + "loss": 0.0507, + "step": 22680 + }, + { + "epoch": 1.77, + "learning_rate": 2.3341638118966057e-06, + "loss": 0.0557, + "step": 22690 + }, + { + "epoch": 1.77, + "learning_rate": 2.3263780753659297e-06, + "loss": 0.0553, + "step": 22700 + }, + { + "epoch": 1.77, + "learning_rate": 2.318592338835254e-06, + "loss": 0.0493, + "step": 22710 + }, + { + "epoch": 1.77, + "learning_rate": 2.310806602304578e-06, + "loss": 0.0688, + "step": 22720 + }, + { + "epoch": 1.77, + "learning_rate": 2.3030208657739022e-06, + "loss": 0.0801, + "step": 22730 + }, + { + "epoch": 1.77, + "learning_rate": 2.2952351292432267e-06, + "loss": 0.0608, + "step": 22740 + }, + { + "epoch": 1.77, + "learning_rate": 2.2874493927125507e-06, + "loss": 0.0542, + "step": 22750 + }, + { + "epoch": 1.77, + "learning_rate": 2.279663656181875e-06, + "loss": 0.0521, + "step": 22760 + }, + { + "epoch": 1.77, + "learning_rate": 2.271877919651199e-06, + "loss": 0.059, + "step": 22770 + }, + { + "epoch": 1.77, + "learning_rate": 2.2640921831205232e-06, + "loss": 0.0754, + "step": 22780 + }, + { + "epoch": 1.77, + "learning_rate": 2.2563064465898477e-06, + "loss": 0.0577, + "step": 22790 + }, + { + "epoch": 1.78, + "learning_rate": 2.2485207100591717e-06, + "loss": 0.0455, + "step": 22800 + }, + { + "epoch": 1.78, + "learning_rate": 2.240734973528496e-06, + "loss": 0.046, + "step": 22810 + }, + { + "epoch": 1.78, + "learning_rate": 2.23294923699782e-06, + "loss": 0.0575, + "step": 22820 + }, + { + "epoch": 1.78, + "learning_rate": 2.2251635004671442e-06, + "loss": 0.0499, + "step": 22830 + }, + { + "epoch": 1.78, + "learning_rate": 2.2173777639364687e-06, + "loss": 0.0864, + "step": 22840 + }, + { + "epoch": 1.78, + "learning_rate": 2.2095920274057927e-06, + "loss": 0.056, + "step": 22850 + }, + { + "epoch": 1.78, + "learning_rate": 2.201806290875117e-06, + "loss": 0.0756, + "step": 22860 + }, + { + "epoch": 1.78, + "learning_rate": 2.194020554344441e-06, + "loss": 0.049, + "step": 22870 + }, + { + "epoch": 1.78, + "learning_rate": 2.1862348178137656e-06, + "loss": 0.0531, + "step": 22880 + }, + { + "epoch": 1.78, + "learning_rate": 2.1784490812830897e-06, + "loss": 0.0495, + "step": 22890 + }, + { + "epoch": 1.78, + "learning_rate": 2.1706633447524137e-06, + "loss": 0.0526, + "step": 22900 + }, + { + "epoch": 1.78, + "learning_rate": 2.1628776082217377e-06, + "loss": 0.0588, + "step": 22910 + }, + { + "epoch": 1.78, + "learning_rate": 2.155091871691062e-06, + "loss": 0.0513, + "step": 22920 + }, + { + "epoch": 1.79, + "learning_rate": 2.147306135160386e-06, + "loss": 0.0512, + "step": 22930 + }, + { + "epoch": 1.79, + "learning_rate": 2.1395203986297107e-06, + "loss": 0.0594, + "step": 22940 + }, + { + "epoch": 1.79, + "learning_rate": 2.1317346620990347e-06, + "loss": 0.0626, + "step": 22950 + }, + { + "epoch": 1.79, + "learning_rate": 2.123948925568359e-06, + "loss": 0.0845, + "step": 22960 + }, + { + "epoch": 1.79, + "learning_rate": 2.116163189037683e-06, + "loss": 0.0489, + "step": 22970 + }, + { + "epoch": 1.79, + "learning_rate": 2.108377452507007e-06, + "loss": 0.0525, + "step": 22980 + }, + { + "epoch": 1.79, + "learning_rate": 2.1005917159763316e-06, + "loss": 0.0653, + "step": 22990 + }, + { + "epoch": 1.79, + "learning_rate": 2.0928059794456557e-06, + "loss": 0.0467, + "step": 23000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0850202429149797e-06, + "loss": 0.0558, + "step": 23010 + }, + { + "epoch": 1.79, + "learning_rate": 2.077234506384304e-06, + "loss": 0.0544, + "step": 23020 + }, + { + "epoch": 1.79, + "learning_rate": 2.069448769853628e-06, + "loss": 0.0781, + "step": 23030 + }, + { + "epoch": 1.79, + "learning_rate": 2.0616630333229526e-06, + "loss": 0.0646, + "step": 23040 + }, + { + "epoch": 1.79, + "learning_rate": 2.0538772967922767e-06, + "loss": 0.0495, + "step": 23050 + }, + { + "epoch": 1.8, + "learning_rate": 2.046091560261601e-06, + "loss": 0.0606, + "step": 23060 + }, + { + "epoch": 1.8, + "learning_rate": 2.038305823730925e-06, + "loss": 0.0693, + "step": 23070 + }, + { + "epoch": 1.8, + "learning_rate": 2.030520087200249e-06, + "loss": 0.0549, + "step": 23080 + }, + { + "epoch": 1.8, + "learning_rate": 2.0227343506695736e-06, + "loss": 0.0592, + "step": 23090 + }, + { + "epoch": 1.8, + "learning_rate": 2.0149486141388977e-06, + "loss": 0.0567, + "step": 23100 + }, + { + "epoch": 1.8, + "learning_rate": 2.007162877608222e-06, + "loss": 0.0647, + "step": 23110 + }, + { + "epoch": 1.8, + "learning_rate": 1.999377141077546e-06, + "loss": 0.0504, + "step": 23120 + }, + { + "epoch": 1.8, + "learning_rate": 1.9915914045468706e-06, + "loss": 0.0566, + "step": 23130 + }, + { + "epoch": 1.8, + "learning_rate": 1.9838056680161946e-06, + "loss": 0.0621, + "step": 23140 + }, + { + "epoch": 1.8, + "learning_rate": 1.9760199314855186e-06, + "loss": 0.0541, + "step": 23150 + }, + { + "epoch": 1.8, + "learning_rate": 1.9682341949548427e-06, + "loss": 0.0555, + "step": 23160 + }, + { + "epoch": 1.8, + "learning_rate": 1.960448458424167e-06, + "loss": 0.0583, + "step": 23170 + }, + { + "epoch": 1.8, + "learning_rate": 1.952662721893491e-06, + "loss": 0.047, + "step": 23180 + }, + { + "epoch": 1.81, + "learning_rate": 1.9448769853628156e-06, + "loss": 0.0502, + "step": 23190 + }, + { + "epoch": 1.81, + "learning_rate": 1.9370912488321396e-06, + "loss": 0.0535, + "step": 23200 + }, + { + "epoch": 1.81, + "learning_rate": 1.929305512301464e-06, + "loss": 0.0561, + "step": 23210 + }, + { + "epoch": 1.81, + "learning_rate": 1.921519775770788e-06, + "loss": 0.0583, + "step": 23220 + }, + { + "epoch": 1.81, + "learning_rate": 1.913734039240112e-06, + "loss": 0.0512, + "step": 23230 + }, + { + "epoch": 1.81, + "learning_rate": 1.9059483027094364e-06, + "loss": 0.0499, + "step": 23240 + }, + { + "epoch": 1.81, + "learning_rate": 1.8981625661787606e-06, + "loss": 0.0464, + "step": 23250 + }, + { + "epoch": 1.81, + "learning_rate": 1.8903768296480849e-06, + "loss": 0.057, + "step": 23260 + }, + { + "epoch": 1.81, + "learning_rate": 1.8825910931174091e-06, + "loss": 0.0488, + "step": 23270 + }, + { + "epoch": 1.81, + "learning_rate": 1.8748053565867333e-06, + "loss": 0.0562, + "step": 23280 + }, + { + "epoch": 1.81, + "learning_rate": 1.8670196200560576e-06, + "loss": 0.0447, + "step": 23290 + }, + { + "epoch": 1.81, + "learning_rate": 1.8592338835253818e-06, + "loss": 0.0522, + "step": 23300 + }, + { + "epoch": 1.81, + "learning_rate": 1.851448146994706e-06, + "loss": 0.0425, + "step": 23310 + }, + { + "epoch": 1.82, + "learning_rate": 1.8436624104640299e-06, + "loss": 0.0546, + "step": 23320 + }, + { + "epoch": 1.82, + "learning_rate": 1.8358766739333541e-06, + "loss": 0.0551, + "step": 23330 + }, + { + "epoch": 1.82, + "learning_rate": 1.8280909374026784e-06, + "loss": 0.058, + "step": 23340 + }, + { + "epoch": 1.82, + "learning_rate": 1.8203052008720026e-06, + "loss": 0.0714, + "step": 23350 + }, + { + "epoch": 1.82, + "learning_rate": 1.8125194643413268e-06, + "loss": 0.0617, + "step": 23360 + }, + { + "epoch": 1.82, + "learning_rate": 1.804733727810651e-06, + "loss": 0.0618, + "step": 23370 + }, + { + "epoch": 1.82, + "learning_rate": 1.7969479912799753e-06, + "loss": 0.0504, + "step": 23380 + }, + { + "epoch": 1.82, + "learning_rate": 1.7891622547492996e-06, + "loss": 0.0538, + "step": 23390 + }, + { + "epoch": 1.82, + "learning_rate": 1.7813765182186236e-06, + "loss": 0.0561, + "step": 23400 + }, + { + "epoch": 1.82, + "learning_rate": 1.7735907816879478e-06, + "loss": 0.0585, + "step": 23410 + }, + { + "epoch": 1.82, + "learning_rate": 1.7658050451572719e-06, + "loss": 0.0659, + "step": 23420 + }, + { + "epoch": 1.82, + "learning_rate": 1.7580193086265961e-06, + "loss": 0.0485, + "step": 23430 + }, + { + "epoch": 1.82, + "learning_rate": 1.7502335720959204e-06, + "loss": 0.055, + "step": 23440 + }, + { + "epoch": 1.83, + "learning_rate": 1.7424478355652446e-06, + "loss": 0.0522, + "step": 23450 + }, + { + "epoch": 1.83, + "learning_rate": 1.7346620990345688e-06, + "loss": 0.0438, + "step": 23460 + }, + { + "epoch": 1.83, + "learning_rate": 1.726876362503893e-06, + "loss": 0.0501, + "step": 23470 + }, + { + "epoch": 1.83, + "learning_rate": 1.719090625973217e-06, + "loss": 0.0645, + "step": 23480 + }, + { + "epoch": 1.83, + "learning_rate": 1.7113048894425413e-06, + "loss": 0.0577, + "step": 23490 + }, + { + "epoch": 1.83, + "learning_rate": 1.7035191529118656e-06, + "loss": 0.0557, + "step": 23500 + }, + { + "epoch": 1.83, + "learning_rate": 1.6957334163811898e-06, + "loss": 0.0464, + "step": 23510 + }, + { + "epoch": 1.83, + "learning_rate": 1.687947679850514e-06, + "loss": 0.0498, + "step": 23520 + }, + { + "epoch": 1.83, + "learning_rate": 1.6801619433198383e-06, + "loss": 0.0553, + "step": 23530 + }, + { + "epoch": 1.83, + "learning_rate": 1.6723762067891625e-06, + "loss": 0.0639, + "step": 23540 + }, + { + "epoch": 1.83, + "learning_rate": 1.6645904702584868e-06, + "loss": 0.0564, + "step": 23550 + }, + { + "epoch": 1.83, + "learning_rate": 1.656804733727811e-06, + "loss": 0.0635, + "step": 23560 + }, + { + "epoch": 1.84, + "learning_rate": 1.6490189971971348e-06, + "loss": 0.0623, + "step": 23570 + }, + { + "epoch": 1.84, + "learning_rate": 1.641233260666459e-06, + "loss": 0.0545, + "step": 23580 + }, + { + "epoch": 1.84, + "learning_rate": 1.6334475241357833e-06, + "loss": 0.0678, + "step": 23590 + }, + { + "epoch": 1.84, + "learning_rate": 1.6256617876051076e-06, + "loss": 0.0484, + "step": 23600 + }, + { + "epoch": 1.84, + "learning_rate": 1.6178760510744318e-06, + "loss": 0.0545, + "step": 23610 + }, + { + "epoch": 1.84, + "learning_rate": 1.610090314543756e-06, + "loss": 0.0485, + "step": 23620 + }, + { + "epoch": 1.84, + "learning_rate": 1.6023045780130803e-06, + "loss": 0.0558, + "step": 23630 + }, + { + "epoch": 1.84, + "learning_rate": 1.5945188414824045e-06, + "loss": 0.0579, + "step": 23640 + }, + { + "epoch": 1.84, + "learning_rate": 1.5867331049517283e-06, + "loss": 0.0686, + "step": 23650 + }, + { + "epoch": 1.84, + "learning_rate": 1.5789473684210526e-06, + "loss": 0.045, + "step": 23660 + }, + { + "epoch": 1.84, + "learning_rate": 1.5711616318903768e-06, + "loss": 0.0663, + "step": 23670 + }, + { + "epoch": 1.84, + "learning_rate": 1.563375895359701e-06, + "loss": 0.056, + "step": 23680 + }, + { + "epoch": 1.84, + "learning_rate": 1.5555901588290253e-06, + "loss": 0.0638, + "step": 23690 + }, + { + "epoch": 1.85, + "learning_rate": 1.5478044222983495e-06, + "loss": 0.056, + "step": 23700 + }, + { + "epoch": 1.85, + "learning_rate": 1.5400186857676738e-06, + "loss": 0.0558, + "step": 23710 + }, + { + "epoch": 1.85, + "learning_rate": 1.532232949236998e-06, + "loss": 0.0473, + "step": 23720 + }, + { + "epoch": 1.85, + "learning_rate": 1.524447212706322e-06, + "loss": 0.0602, + "step": 23730 + }, + { + "epoch": 1.85, + "learning_rate": 1.5166614761756463e-06, + "loss": 0.0457, + "step": 23740 + }, + { + "epoch": 1.85, + "learning_rate": 1.5088757396449705e-06, + "loss": 0.0562, + "step": 23750 + }, + { + "epoch": 1.85, + "learning_rate": 1.5010900031142948e-06, + "loss": 0.0479, + "step": 23760 + }, + { + "epoch": 1.85, + "learning_rate": 1.493304266583619e-06, + "loss": 0.0469, + "step": 23770 + }, + { + "epoch": 1.85, + "learning_rate": 1.4855185300529433e-06, + "loss": 0.0498, + "step": 23780 + }, + { + "epoch": 1.85, + "learning_rate": 1.4777327935222675e-06, + "loss": 0.0484, + "step": 23790 + }, + { + "epoch": 1.85, + "learning_rate": 1.4699470569915915e-06, + "loss": 0.0557, + "step": 23800 + }, + { + "epoch": 1.85, + "learning_rate": 1.4621613204609158e-06, + "loss": 0.0488, + "step": 23810 + }, + { + "epoch": 1.85, + "learning_rate": 1.4543755839302398e-06, + "loss": 0.0634, + "step": 23820 + }, + { + "epoch": 1.86, + "learning_rate": 1.446589847399564e-06, + "loss": 0.0568, + "step": 23830 + }, + { + "epoch": 1.86, + "learning_rate": 1.4388041108688883e-06, + "loss": 0.0454, + "step": 23840 + }, + { + "epoch": 1.86, + "learning_rate": 1.4310183743382125e-06, + "loss": 0.0615, + "step": 23850 + }, + { + "epoch": 1.86, + "learning_rate": 1.4232326378075368e-06, + "loss": 0.0548, + "step": 23860 + }, + { + "epoch": 1.86, + "learning_rate": 1.415446901276861e-06, + "loss": 0.052, + "step": 23870 + }, + { + "epoch": 1.86, + "learning_rate": 1.4076611647461852e-06, + "loss": 0.0526, + "step": 23880 + }, + { + "epoch": 1.86, + "learning_rate": 1.3998754282155095e-06, + "loss": 0.0594, + "step": 23890 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920896916848333e-06, + "loss": 0.0643, + "step": 23900 + }, + { + "epoch": 1.86, + "learning_rate": 1.3843039551541575e-06, + "loss": 0.0797, + "step": 23910 + }, + { + "epoch": 1.86, + "learning_rate": 1.3765182186234818e-06, + "loss": 0.0487, + "step": 23920 + }, + { + "epoch": 1.86, + "learning_rate": 1.368732482092806e-06, + "loss": 0.0526, + "step": 23930 + }, + { + "epoch": 1.86, + "learning_rate": 1.3609467455621303e-06, + "loss": 0.0528, + "step": 23940 + }, + { + "epoch": 1.86, + "learning_rate": 1.3531610090314545e-06, + "loss": 0.0491, + "step": 23950 + }, + { + "epoch": 1.87, + "learning_rate": 1.3453752725007787e-06, + "loss": 0.0495, + "step": 23960 + }, + { + "epoch": 1.87, + "learning_rate": 1.337589535970103e-06, + "loss": 0.0685, + "step": 23970 + }, + { + "epoch": 1.87, + "learning_rate": 1.329803799439427e-06, + "loss": 0.0624, + "step": 23980 + }, + { + "epoch": 1.87, + "learning_rate": 1.3220180629087512e-06, + "loss": 0.0419, + "step": 23990 + }, + { + "epoch": 1.87, + "learning_rate": 1.3142323263780755e-06, + "loss": 0.0822, + "step": 24000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3064465898473997e-06, + "loss": 0.0649, + "step": 24010 + }, + { + "epoch": 1.87, + "learning_rate": 1.298660853316724e-06, + "loss": 0.0521, + "step": 24020 + }, + { + "epoch": 1.87, + "learning_rate": 1.290875116786048e-06, + "loss": 0.0432, + "step": 24030 + }, + { + "epoch": 1.87, + "learning_rate": 1.2830893802553722e-06, + "loss": 0.0511, + "step": 24040 + }, + { + "epoch": 1.87, + "learning_rate": 1.2753036437246965e-06, + "loss": 0.0467, + "step": 24050 + }, + { + "epoch": 1.87, + "learning_rate": 1.2675179071940207e-06, + "loss": 0.0554, + "step": 24060 + }, + { + "epoch": 1.87, + "learning_rate": 1.2597321706633447e-06, + "loss": 0.0483, + "step": 24070 + }, + { + "epoch": 1.87, + "learning_rate": 1.251946434132669e-06, + "loss": 0.0533, + "step": 24080 + }, + { + "epoch": 1.88, + "learning_rate": 1.2441606976019932e-06, + "loss": 0.0481, + "step": 24090 + }, + { + "epoch": 1.88, + "learning_rate": 1.2363749610713175e-06, + "loss": 0.054, + "step": 24100 + }, + { + "epoch": 1.88, + "learning_rate": 1.2285892245406417e-06, + "loss": 0.0536, + "step": 24110 + }, + { + "epoch": 1.88, + "learning_rate": 1.220803488009966e-06, + "loss": 0.0509, + "step": 24120 + }, + { + "epoch": 1.88, + "learning_rate": 1.21301775147929e-06, + "loss": 0.0462, + "step": 24130 + }, + { + "epoch": 1.88, + "learning_rate": 1.2052320149486142e-06, + "loss": 0.0478, + "step": 24140 + }, + { + "epoch": 1.88, + "learning_rate": 1.1974462784179385e-06, + "loss": 0.0488, + "step": 24150 + }, + { + "epoch": 1.88, + "learning_rate": 1.1896605418872627e-06, + "loss": 0.057, + "step": 24160 + }, + { + "epoch": 1.88, + "learning_rate": 1.1818748053565867e-06, + "loss": 0.0469, + "step": 24170 + }, + { + "epoch": 1.88, + "learning_rate": 1.174089068825911e-06, + "loss": 0.054, + "step": 24180 + }, + { + "epoch": 1.88, + "learning_rate": 1.1663033322952352e-06, + "loss": 0.0527, + "step": 24190 + }, + { + "epoch": 1.88, + "learning_rate": 1.1585175957645595e-06, + "loss": 0.057, + "step": 24200 + }, + { + "epoch": 1.88, + "learning_rate": 1.1507318592338835e-06, + "loss": 0.0508, + "step": 24210 + }, + { + "epoch": 1.89, + "learning_rate": 1.1429461227032077e-06, + "loss": 0.0526, + "step": 24220 + }, + { + "epoch": 1.89, + "learning_rate": 1.135160386172532e-06, + "loss": 0.0578, + "step": 24230 + }, + { + "epoch": 1.89, + "learning_rate": 1.1273746496418562e-06, + "loss": 0.0585, + "step": 24240 + }, + { + "epoch": 1.89, + "learning_rate": 1.1195889131111804e-06, + "loss": 0.0459, + "step": 24250 + }, + { + "epoch": 1.89, + "learning_rate": 1.1118031765805045e-06, + "loss": 0.05, + "step": 24260 + }, + { + "epoch": 1.89, + "learning_rate": 1.1040174400498287e-06, + "loss": 0.0441, + "step": 24270 + }, + { + "epoch": 1.89, + "learning_rate": 1.096231703519153e-06, + "loss": 0.0597, + "step": 24280 + }, + { + "epoch": 1.89, + "learning_rate": 1.0884459669884772e-06, + "loss": 0.0609, + "step": 24290 + }, + { + "epoch": 1.89, + "learning_rate": 1.0806602304578014e-06, + "loss": 0.0493, + "step": 24300 + }, + { + "epoch": 1.89, + "learning_rate": 1.0728744939271257e-06, + "loss": 0.0494, + "step": 24310 + }, + { + "epoch": 1.89, + "learning_rate": 1.06508875739645e-06, + "loss": 0.0537, + "step": 24320 + }, + { + "epoch": 1.89, + "learning_rate": 1.0573030208657742e-06, + "loss": 0.0433, + "step": 24330 + }, + { + "epoch": 1.9, + "learning_rate": 1.0495172843350982e-06, + "loss": 0.052, + "step": 24340 + }, + { + "epoch": 1.9, + "learning_rate": 1.0417315478044224e-06, + "loss": 0.0463, + "step": 24350 + }, + { + "epoch": 1.9, + "learning_rate": 1.0339458112737467e-06, + "loss": 0.0498, + "step": 24360 + }, + { + "epoch": 1.9, + "learning_rate": 1.026160074743071e-06, + "loss": 0.0515, + "step": 24370 + }, + { + "epoch": 1.9, + "learning_rate": 1.018374338212395e-06, + "loss": 0.0493, + "step": 24380 + }, + { + "epoch": 1.9, + "learning_rate": 1.0105886016817192e-06, + "loss": 0.0601, + "step": 24390 + }, + { + "epoch": 1.9, + "learning_rate": 1.0028028651510434e-06, + "loss": 0.0566, + "step": 24400 + }, + { + "epoch": 1.9, + "learning_rate": 9.950171286203677e-07, + "loss": 0.0496, + "step": 24410 + }, + { + "epoch": 1.9, + "learning_rate": 9.872313920896917e-07, + "loss": 0.0473, + "step": 24420 + }, + { + "epoch": 1.9, + "learning_rate": 9.79445655559016e-07, + "loss": 0.0538, + "step": 24430 + }, + { + "epoch": 1.9, + "learning_rate": 9.716599190283402e-07, + "loss": 0.046, + "step": 24440 + }, + { + "epoch": 1.9, + "learning_rate": 9.638741824976644e-07, + "loss": 0.05, + "step": 24450 + }, + { + "epoch": 1.9, + "learning_rate": 9.560884459669884e-07, + "loss": 0.0656, + "step": 24460 + }, + { + "epoch": 1.91, + "learning_rate": 9.483027094363127e-07, + "loss": 0.0433, + "step": 24470 + }, + { + "epoch": 1.91, + "learning_rate": 9.405169729056369e-07, + "loss": 0.0466, + "step": 24480 + }, + { + "epoch": 1.91, + "learning_rate": 9.327312363749612e-07, + "loss": 0.0493, + "step": 24490 + }, + { + "epoch": 1.91, + "learning_rate": 9.249454998442853e-07, + "loss": 0.0453, + "step": 24500 + }, + { + "epoch": 1.91, + "learning_rate": 9.171597633136095e-07, + "loss": 0.0536, + "step": 24510 + }, + { + "epoch": 1.91, + "learning_rate": 9.093740267829338e-07, + "loss": 0.0484, + "step": 24520 + }, + { + "epoch": 1.91, + "learning_rate": 9.01588290252258e-07, + "loss": 0.0514, + "step": 24530 + }, + { + "epoch": 1.91, + "learning_rate": 8.938025537215823e-07, + "loss": 0.0488, + "step": 24540 + }, + { + "epoch": 1.91, + "learning_rate": 8.860168171909063e-07, + "loss": 0.0458, + "step": 24550 + }, + { + "epoch": 1.91, + "learning_rate": 8.782310806602305e-07, + "loss": 0.0555, + "step": 24560 + }, + { + "epoch": 1.91, + "learning_rate": 8.704453441295548e-07, + "loss": 0.0469, + "step": 24570 + }, + { + "epoch": 1.91, + "learning_rate": 8.62659607598879e-07, + "loss": 0.0444, + "step": 24580 + }, + { + "epoch": 1.91, + "learning_rate": 8.54873871068203e-07, + "loss": 0.0475, + "step": 24590 + }, + { + "epoch": 1.92, + "learning_rate": 8.470881345375273e-07, + "loss": 0.0501, + "step": 24600 + }, + { + "epoch": 1.92, + "learning_rate": 8.393023980068515e-07, + "loss": 0.0474, + "step": 24610 + }, + { + "epoch": 1.92, + "learning_rate": 8.315166614761758e-07, + "loss": 0.0438, + "step": 24620 + }, + { + "epoch": 1.92, + "learning_rate": 8.237309249454999e-07, + "loss": 0.0552, + "step": 24630 + }, + { + "epoch": 1.92, + "learning_rate": 8.159451884148241e-07, + "loss": 0.046, + "step": 24640 + }, + { + "epoch": 1.92, + "learning_rate": 8.081594518841484e-07, + "loss": 0.05, + "step": 24650 + }, + { + "epoch": 1.92, + "learning_rate": 8.003737153534725e-07, + "loss": 0.0518, + "step": 24660 + }, + { + "epoch": 1.92, + "learning_rate": 7.925879788227966e-07, + "loss": 0.0492, + "step": 24670 + }, + { + "epoch": 1.92, + "learning_rate": 7.848022422921209e-07, + "loss": 0.0509, + "step": 24680 + }, + { + "epoch": 1.92, + "learning_rate": 7.770165057614451e-07, + "loss": 0.0511, + "step": 24690 + }, + { + "epoch": 1.92, + "learning_rate": 7.692307692307694e-07, + "loss": 0.0505, + "step": 24700 + }, + { + "epoch": 1.92, + "learning_rate": 7.614450327000934e-07, + "loss": 0.043, + "step": 24710 + }, + { + "epoch": 1.92, + "learning_rate": 7.536592961694176e-07, + "loss": 0.0521, + "step": 24720 + }, + { + "epoch": 1.93, + "learning_rate": 7.458735596387419e-07, + "loss": 0.0427, + "step": 24730 + }, + { + "epoch": 1.93, + "learning_rate": 7.380878231080661e-07, + "loss": 0.0721, + "step": 24740 + }, + { + "epoch": 1.93, + "learning_rate": 7.303020865773902e-07, + "loss": 0.0533, + "step": 24750 + }, + { + "epoch": 1.93, + "learning_rate": 7.225163500467145e-07, + "loss": 0.052, + "step": 24760 + }, + { + "epoch": 1.93, + "learning_rate": 7.147306135160387e-07, + "loss": 0.0488, + "step": 24770 + }, + { + "epoch": 1.93, + "learning_rate": 7.069448769853629e-07, + "loss": 0.0465, + "step": 24780 + }, + { + "epoch": 1.93, + "learning_rate": 6.991591404546871e-07, + "loss": 0.0504, + "step": 24790 + }, + { + "epoch": 1.93, + "learning_rate": 6.913734039240112e-07, + "loss": 0.0436, + "step": 24800 + }, + { + "epoch": 1.93, + "learning_rate": 6.835876673933355e-07, + "loss": 0.0696, + "step": 24810 + }, + { + "epoch": 1.93, + "learning_rate": 6.758019308626597e-07, + "loss": 0.0497, + "step": 24820 + }, + { + "epoch": 1.93, + "learning_rate": 6.68016194331984e-07, + "loss": 0.0512, + "step": 24830 + }, + { + "epoch": 1.93, + "learning_rate": 6.60230457801308e-07, + "loss": 0.0539, + "step": 24840 + }, + { + "epoch": 1.93, + "learning_rate": 6.524447212706322e-07, + "loss": 0.0436, + "step": 24850 + }, + { + "epoch": 1.94, + "learning_rate": 6.446589847399565e-07, + "loss": 0.0455, + "step": 24860 + }, + { + "epoch": 1.94, + "learning_rate": 6.368732482092807e-07, + "loss": 0.0483, + "step": 24870 + }, + { + "epoch": 1.94, + "learning_rate": 6.290875116786048e-07, + "loss": 0.0462, + "step": 24880 + }, + { + "epoch": 1.94, + "learning_rate": 6.21301775147929e-07, + "loss": 0.0469, + "step": 24890 + }, + { + "epoch": 1.94, + "learning_rate": 6.135160386172532e-07, + "loss": 0.0565, + "step": 24900 + }, + { + "epoch": 1.94, + "learning_rate": 6.057303020865775e-07, + "loss": 0.0503, + "step": 24910 + }, + { + "epoch": 1.94, + "learning_rate": 5.979445655559017e-07, + "loss": 0.0479, + "step": 24920 + }, + { + "epoch": 1.94, + "learning_rate": 5.901588290252258e-07, + "loss": 0.0553, + "step": 24930 + }, + { + "epoch": 1.94, + "learning_rate": 5.823730924945501e-07, + "loss": 0.05, + "step": 24940 + }, + { + "epoch": 1.94, + "learning_rate": 5.745873559638742e-07, + "loss": 0.0501, + "step": 24950 + }, + { + "epoch": 1.94, + "learning_rate": 5.668016194331984e-07, + "loss": 0.0461, + "step": 24960 + }, + { + "epoch": 1.94, + "learning_rate": 5.590158829025226e-07, + "loss": 0.0465, + "step": 24970 + }, + { + "epoch": 1.94, + "learning_rate": 5.512301463718468e-07, + "loss": 0.0527, + "step": 24980 + }, + { + "epoch": 1.95, + "learning_rate": 5.43444409841171e-07, + "loss": 0.049, + "step": 24990 + }, + { + "epoch": 1.95, + "learning_rate": 5.356586733104952e-07, + "loss": 0.0606, + "step": 25000 + }, + { + "epoch": 1.95, + "learning_rate": 5.278729367798194e-07, + "loss": 0.0497, + "step": 25010 + }, + { + "epoch": 1.95, + "learning_rate": 5.200872002491436e-07, + "loss": 0.046, + "step": 25020 + }, + { + "epoch": 1.95, + "learning_rate": 5.123014637184678e-07, + "loss": 0.0531, + "step": 25030 + }, + { + "epoch": 1.95, + "learning_rate": 5.045157271877921e-07, + "loss": 0.0456, + "step": 25040 + }, + { + "epoch": 1.95, + "learning_rate": 4.967299906571162e-07, + "loss": 0.0539, + "step": 25050 + }, + { + "epoch": 1.95, + "learning_rate": 4.889442541264404e-07, + "loss": 0.0565, + "step": 25060 + }, + { + "epoch": 1.95, + "learning_rate": 4.811585175957647e-07, + "loss": 0.0444, + "step": 25070 + }, + { + "epoch": 1.95, + "learning_rate": 4.733727810650888e-07, + "loss": 0.0467, + "step": 25080 + }, + { + "epoch": 1.95, + "learning_rate": 4.65587044534413e-07, + "loss": 0.0465, + "step": 25090 + }, + { + "epoch": 1.95, + "learning_rate": 4.578013080037372e-07, + "loss": 0.0434, + "step": 25100 + }, + { + "epoch": 1.95, + "learning_rate": 4.500155714730614e-07, + "loss": 0.046, + "step": 25110 + }, + { + "epoch": 1.96, + "learning_rate": 4.4222983494238555e-07, + "loss": 0.0437, + "step": 25120 + }, + { + "epoch": 1.96, + "learning_rate": 4.344440984117098e-07, + "loss": 0.0507, + "step": 25130 + }, + { + "epoch": 1.96, + "learning_rate": 4.26658361881034e-07, + "loss": 0.046, + "step": 25140 + }, + { + "epoch": 1.96, + "learning_rate": 4.1887262535035817e-07, + "loss": 0.0529, + "step": 25150 + }, + { + "epoch": 1.96, + "learning_rate": 4.1108688881968236e-07, + "loss": 0.0452, + "step": 25160 + }, + { + "epoch": 1.96, + "learning_rate": 4.033011522890066e-07, + "loss": 0.0502, + "step": 25170 + }, + { + "epoch": 1.96, + "learning_rate": 3.9551541575833073e-07, + "loss": 0.0535, + "step": 25180 + }, + { + "epoch": 1.96, + "learning_rate": 3.8772967922765497e-07, + "loss": 0.0493, + "step": 25190 + }, + { + "epoch": 1.96, + "learning_rate": 3.7994394269697916e-07, + "loss": 0.0482, + "step": 25200 + }, + { + "epoch": 1.96, + "learning_rate": 3.7215820616630335e-07, + "loss": 0.0464, + "step": 25210 + }, + { + "epoch": 1.96, + "learning_rate": 3.6437246963562754e-07, + "loss": 0.0455, + "step": 25220 + }, + { + "epoch": 1.96, + "learning_rate": 3.565867331049518e-07, + "loss": 0.0492, + "step": 25230 + }, + { + "epoch": 1.97, + "learning_rate": 3.488009965742759e-07, + "loss": 0.0479, + "step": 25240 + }, + { + "epoch": 1.97, + "learning_rate": 3.4101526004360015e-07, + "loss": 0.0455, + "step": 25250 + }, + { + "epoch": 1.97, + "learning_rate": 3.332295235129244e-07, + "loss": 0.0465, + "step": 25260 + }, + { + "epoch": 1.97, + "learning_rate": 3.254437869822485e-07, + "loss": 0.0505, + "step": 25270 + }, + { + "epoch": 1.97, + "learning_rate": 3.1765805045157277e-07, + "loss": 0.0576, + "step": 25280 + }, + { + "epoch": 1.97, + "learning_rate": 3.0987231392089695e-07, + "loss": 0.0439, + "step": 25290 + }, + { + "epoch": 1.97, + "learning_rate": 3.0208657739022114e-07, + "loss": 0.0429, + "step": 25300 + }, + { + "epoch": 1.97, + "learning_rate": 2.9430084085954533e-07, + "loss": 0.0464, + "step": 25310 + }, + { + "epoch": 1.97, + "learning_rate": 2.865151043288695e-07, + "loss": 0.0443, + "step": 25320 + }, + { + "epoch": 1.97, + "learning_rate": 2.787293677981937e-07, + "loss": 0.0475, + "step": 25330 + }, + { + "epoch": 1.97, + "learning_rate": 2.709436312675179e-07, + "loss": 0.0553, + "step": 25340 + }, + { + "epoch": 1.97, + "learning_rate": 2.6315789473684213e-07, + "loss": 0.0485, + "step": 25350 + }, + { + "epoch": 1.97, + "learning_rate": 2.553721582061663e-07, + "loss": 0.0471, + "step": 25360 + }, + { + "epoch": 1.98, + "learning_rate": 2.4758642167549056e-07, + "loss": 0.0511, + "step": 25370 + }, + { + "epoch": 1.98, + "learning_rate": 2.3980068514481475e-07, + "loss": 0.0455, + "step": 25380 + }, + { + "epoch": 1.98, + "learning_rate": 2.3201494861413894e-07, + "loss": 0.0517, + "step": 25390 + }, + { + "epoch": 1.98, + "learning_rate": 2.2422921208346312e-07, + "loss": 0.0553, + "step": 25400 + }, + { + "epoch": 1.98, + "learning_rate": 2.164434755527873e-07, + "loss": 0.0453, + "step": 25410 + }, + { + "epoch": 1.98, + "learning_rate": 2.0865773902211152e-07, + "loss": 0.0576, + "step": 25420 + }, + { + "epoch": 1.98, + "learning_rate": 2.008720024914357e-07, + "loss": 0.0471, + "step": 25430 + }, + { + "epoch": 1.98, + "learning_rate": 1.930862659607599e-07, + "loss": 0.0471, + "step": 25440 + }, + { + "epoch": 1.98, + "learning_rate": 1.8530052943008411e-07, + "loss": 0.05, + "step": 25450 + }, + { + "epoch": 1.98, + "learning_rate": 1.775147928994083e-07, + "loss": 0.0434, + "step": 25460 + }, + { + "epoch": 1.98, + "learning_rate": 1.697290563687325e-07, + "loss": 0.0439, + "step": 25470 + }, + { + "epoch": 1.98, + "learning_rate": 1.619433198380567e-07, + "loss": 0.0468, + "step": 25480 + }, + { + "epoch": 1.98, + "learning_rate": 1.541575833073809e-07, + "loss": 0.0457, + "step": 25490 + }, + { + "epoch": 1.99, + "learning_rate": 1.4637184677670508e-07, + "loss": 0.0457, + "step": 25500 + }, + { + "epoch": 1.99, + "learning_rate": 1.385861102460293e-07, + "loss": 0.0485, + "step": 25510 + }, + { + "epoch": 1.99, + "learning_rate": 1.3080037371535348e-07, + "loss": 0.0418, + "step": 25520 + }, + { + "epoch": 1.99, + "learning_rate": 1.2301463718467767e-07, + "loss": 0.0558, + "step": 25530 + }, + { + "epoch": 1.99, + "learning_rate": 1.1522890065400187e-07, + "loss": 0.0544, + "step": 25540 + }, + { + "epoch": 1.99, + "learning_rate": 1.0744316412332607e-07, + "loss": 0.048, + "step": 25550 + }, + { + "epoch": 1.99, + "learning_rate": 9.965742759265028e-08, + "loss": 0.0439, + "step": 25560 + }, + { + "epoch": 1.99, + "learning_rate": 9.187169106197447e-08, + "loss": 0.0493, + "step": 25570 + }, + { + "epoch": 1.99, + "learning_rate": 8.408595453129867e-08, + "loss": 0.0447, + "step": 25580 + }, + { + "epoch": 1.99, + "learning_rate": 7.630021800062287e-08, + "loss": 0.041, + "step": 25590 + }, + { + "epoch": 1.99, + "learning_rate": 6.851448146994706e-08, + "loss": 0.047, + "step": 25600 + }, + { + "epoch": 1.99, + "learning_rate": 6.072874493927126e-08, + "loss": 0.0484, + "step": 25610 + }, + { + "epoch": 1.99, + "learning_rate": 5.2943008408595454e-08, + "loss": 0.0442, + "step": 25620 + }, + { + "epoch": 2.0, + "learning_rate": 4.5157271877919655e-08, + "loss": 0.0501, + "step": 25630 + }, + { + "epoch": 2.0, + "learning_rate": 3.7371535347243856e-08, + "loss": 0.0449, + "step": 25640 + }, + { + "epoch": 2.0, + "learning_rate": 2.958579881656805e-08, + "loss": 0.0474, + "step": 25650 + }, + { + "epoch": 2.0, + "learning_rate": 2.1800062285892244e-08, + "loss": 0.0452, + "step": 25660 + }, + { + "epoch": 2.0, + "learning_rate": 1.4014325755216445e-08, + "loss": 0.0536, + "step": 25670 + }, + { + "epoch": 2.0, + "learning_rate": 6.228589224540643e-09, + "loss": 0.0451, + "step": 25680 + } + ], + "max_steps": 25688, + "num_train_epochs": 2, + "total_flos": 1.9075085981712384e+17, + "trial_name": null, + "trial_params": null +}