|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 3000, |
|
"global_step": 5400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019962962962962963, |
|
"loss": 2.1858, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019925925925925927, |
|
"loss": 2.0674, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001988888888888889, |
|
"loss": 1.8298, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019851851851851853, |
|
"loss": 1.6136, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019814814814814814, |
|
"loss": 1.5421, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019777777777777778, |
|
"loss": 1.3979, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019740740740740743, |
|
"loss": 1.3041, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019703703703703704, |
|
"loss": 1.2235, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019666666666666666, |
|
"loss": 1.3809, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001962962962962963, |
|
"loss": 1.2073, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019592592592592594, |
|
"loss": 1.114, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019555555555555556, |
|
"loss": 1.1675, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001951851851851852, |
|
"loss": 1.1497, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019481481481481482, |
|
"loss": 1.2117, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 0.9834, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019407407407407408, |
|
"loss": 1.014, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00019370370370370372, |
|
"loss": 0.9551, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019333333333333333, |
|
"loss": 0.99, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00019296296296296298, |
|
"loss": 0.9756, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001925925925925926, |
|
"loss": 1.0463, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00019222222222222224, |
|
"loss": 1.0912, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019185185185185185, |
|
"loss": 0.9984, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001914814814814815, |
|
"loss": 0.8911, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00019111111111111114, |
|
"loss": 0.7515, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00019074074074074075, |
|
"loss": 1.1003, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00019037037037037037, |
|
"loss": 0.9923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019, |
|
"loss": 0.8772, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00018962962962962965, |
|
"loss": 0.6926, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00018925925925925927, |
|
"loss": 0.7489, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018888888888888888, |
|
"loss": 0.6285, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018851851851851853, |
|
"loss": 0.6881, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001881851851851852, |
|
"loss": 0.6729, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001878148148148148, |
|
"loss": 0.642, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00018744444444444445, |
|
"loss": 0.6423, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001870740740740741, |
|
"loss": 0.72, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001867037037037037, |
|
"loss": 0.7628, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00018633333333333333, |
|
"loss": 0.7165, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00018596296296296297, |
|
"loss": 0.6817, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001855925925925926, |
|
"loss": 0.6094, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00018522222222222223, |
|
"loss": 0.662, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00018485185185185184, |
|
"loss": 0.594, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00018448148148148149, |
|
"loss": 0.7857, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00018411111111111113, |
|
"loss": 0.4482, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00018374074074074074, |
|
"loss": 0.5306, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00018337037037037036, |
|
"loss": 0.5849, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.000183, |
|
"loss": 0.848, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00018262962962962965, |
|
"loss": 0.4931, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00018225925925925926, |
|
"loss": 0.4542, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001818888888888889, |
|
"loss": 0.5918, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00018151851851851852, |
|
"loss": 0.5543, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00018114814814814816, |
|
"loss": 0.6295, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00018077777777777778, |
|
"loss": 0.6341, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00018040740740740742, |
|
"loss": 0.5688, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00018003703703703704, |
|
"loss": 0.646, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00017966666666666668, |
|
"loss": 0.3895, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0001792962962962963, |
|
"loss": 0.4572, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00017892592592592594, |
|
"loss": 0.3104, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00017855555555555555, |
|
"loss": 0.366, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0001781851851851852, |
|
"loss": 0.3954, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00017781481481481484, |
|
"loss": 0.3588, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00017744444444444445, |
|
"loss": 0.3124, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00017707407407407407, |
|
"loss": 0.4456, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001767037037037037, |
|
"loss": 0.3196, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00017633333333333335, |
|
"loss": 0.3449, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00017596296296296297, |
|
"loss": 0.4789, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00017559259259259259, |
|
"loss": 0.3636, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00017522222222222223, |
|
"loss": 0.4037, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00017485185185185187, |
|
"loss": 0.3514, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0001744814814814815, |
|
"loss": 0.3531, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0001741111111111111, |
|
"loss": 0.3864, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00017374074074074077, |
|
"loss": 0.3057, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0001733703703703704, |
|
"loss": 0.3689, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.000173, |
|
"loss": 0.3429, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00017262962962962962, |
|
"loss": 0.4835, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0001722592592592593, |
|
"loss": 0.584, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0001718888888888889, |
|
"loss": 0.4278, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00017151851851851852, |
|
"loss": 0.3338, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00017114814814814814, |
|
"loss": 0.3246, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00017077777777777778, |
|
"loss": 0.4246, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00017040740740740742, |
|
"loss": 0.5964, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00017003703703703704, |
|
"loss": 0.5198, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00016966666666666668, |
|
"loss": 0.4489, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0001692962962962963, |
|
"loss": 0.204, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00016892592592592594, |
|
"loss": 0.2281, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00016855555555555555, |
|
"loss": 0.2293, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0001681851851851852, |
|
"loss": 0.1439, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0001678148148148148, |
|
"loss": 0.1872, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00016744444444444445, |
|
"loss": 0.2705, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00016707407407407407, |
|
"loss": 0.2329, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0001667037037037037, |
|
"loss": 0.349, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00016633333333333333, |
|
"loss": 0.2587, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00016596296296296297, |
|
"loss": 0.1563, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00016559259259259261, |
|
"loss": 0.1608, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.00016522222222222223, |
|
"loss": 0.264, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00016485185185185185, |
|
"loss": 0.2586, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0001644814814814815, |
|
"loss": 0.2584, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00016411111111111113, |
|
"loss": 0.1729, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00016374074074074075, |
|
"loss": 0.3161, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00016337037037037036, |
|
"loss": 0.1579, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.000163, |
|
"loss": 0.159, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.00016262962962962965, |
|
"loss": 0.2138, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00016225925925925926, |
|
"loss": 0.2743, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00016188888888888888, |
|
"loss": 0.2057, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00016151851851851855, |
|
"loss": 0.2297, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00016114814814814816, |
|
"loss": 0.3396, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00016077777777777778, |
|
"loss": 0.2853, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0001604074074074074, |
|
"loss": 0.2184, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00016003703703703707, |
|
"loss": 0.2157, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.00015966666666666668, |
|
"loss": 0.1986, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0001592962962962963, |
|
"loss": 0.1425, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0001589259259259259, |
|
"loss": 0.1571, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00015855555555555558, |
|
"loss": 0.1702, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0001581851851851852, |
|
"loss": 0.0456, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0001578148148148148, |
|
"loss": 0.1112, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00015744444444444446, |
|
"loss": 0.0581, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00015707407407407407, |
|
"loss": 0.161, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00015670370370370371, |
|
"loss": 0.2081, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00015633333333333333, |
|
"loss": 0.1118, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00015596296296296297, |
|
"loss": 0.1484, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0001555925925925926, |
|
"loss": 0.1166, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00015522222222222223, |
|
"loss": 0.162, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.00015485185185185187, |
|
"loss": 0.1473, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0001544814814814815, |
|
"loss": 0.1251, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0001541111111111111, |
|
"loss": 0.1109, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00015374074074074075, |
|
"loss": 0.1666, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.0001533703703703704, |
|
"loss": 0.1386, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.000153, |
|
"loss": 0.0548, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00015262962962962962, |
|
"loss": 0.1446, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00015225925925925926, |
|
"loss": 0.3583, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0001518888888888889, |
|
"loss": 0.3026, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00015151851851851852, |
|
"loss": 0.2623, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.00015114814814814814, |
|
"loss": 0.2197, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.0001507777777777778, |
|
"loss": 0.1991, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00015040740740740742, |
|
"loss": 0.1439, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00015003703703703704, |
|
"loss": 0.1856, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00014966666666666665, |
|
"loss": 0.0815, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00014929629629629632, |
|
"loss": 0.1795, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.00014892592592592594, |
|
"loss": 0.0757, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.00014855555555555556, |
|
"loss": 0.037, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00014818518518518517, |
|
"loss": 0.0464, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00014781481481481484, |
|
"loss": 0.0151, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00014744444444444446, |
|
"loss": 0.0441, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00014707407407407407, |
|
"loss": 0.0703, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00014670370370370371, |
|
"loss": 0.1126, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00014633333333333336, |
|
"loss": 0.066, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00014596296296296297, |
|
"loss": 0.1397, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0001455925925925926, |
|
"loss": 0.0641, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00014522222222222223, |
|
"loss": 0.1555, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.00014485185185185187, |
|
"loss": 0.164, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001444814814814815, |
|
"loss": 0.073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0001441111111111111, |
|
"loss": 0.1236, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00014374074074074075, |
|
"loss": 0.0639, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00014337037037037036, |
|
"loss": 0.1367, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.000143, |
|
"loss": 0.0772, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00014262962962962965, |
|
"loss": 0.0283, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00014225925925925926, |
|
"loss": 0.0523, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.00014188888888888888, |
|
"loss": 0.0502, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00014151851851851852, |
|
"loss": 0.1211, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00014114814814814817, |
|
"loss": 0.0252, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00014077777777777778, |
|
"loss": 0.1756, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.0001404074074074074, |
|
"loss": 0.1343, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00014003703703703704, |
|
"loss": 0.111, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00013966666666666668, |
|
"loss": 0.0292, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.0001392962962962963, |
|
"loss": 0.033, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00013892592592592591, |
|
"loss": 0.0446, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00013855555555555558, |
|
"loss": 0.0272, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0001381851851851852, |
|
"loss": 0.0359, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00013781481481481481, |
|
"loss": 0.0102, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00013744444444444443, |
|
"loss": 0.0641, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0001370740740740741, |
|
"loss": 0.0454, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00013670370370370372, |
|
"loss": 0.0134, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00013633333333333333, |
|
"loss": 0.142, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00013596296296296295, |
|
"loss": 0.0604, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00013559259259259262, |
|
"loss": 0.0709, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00013522222222222223, |
|
"loss": 0.0063, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00013485185185185185, |
|
"loss": 0.0101, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0001344814814814815, |
|
"loss": 0.0382, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.00013411111111111113, |
|
"loss": 0.0049, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00013374074074074075, |
|
"loss": 0.0952, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00013337037037037036, |
|
"loss": 0.049, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.000133, |
|
"loss": 0.1004, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.00013262962962962965, |
|
"loss": 0.0295, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00013225925925925927, |
|
"loss": 0.0158, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.0001318888888888889, |
|
"loss": 0.0157, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.00013151851851851852, |
|
"loss": 0.1002, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.00013114814814814817, |
|
"loss": 0.1301, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.00013077777777777778, |
|
"loss": 0.0851, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00013040740740740742, |
|
"loss": 0.0822, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00013003703703703704, |
|
"loss": 0.0388, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00012966666666666666, |
|
"loss": 0.0098, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0001292962962962963, |
|
"loss": 0.0563, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00012892592592592594, |
|
"loss": 0.0362, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00012855555555555556, |
|
"loss": 0.0491, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.00012818518518518517, |
|
"loss": 0.0435, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00012781481481481484, |
|
"loss": 0.0735, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00012744444444444446, |
|
"loss": 0.0654, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00012707407407407407, |
|
"loss": 0.0175, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0001267037037037037, |
|
"loss": 0.1319, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00012633333333333336, |
|
"loss": 0.0109, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.00012596296296296297, |
|
"loss": 0.018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.0001255925925925926, |
|
"loss": 0.0495, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.0001252222222222222, |
|
"loss": 0.0214, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00012485185185185188, |
|
"loss": 0.0412, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0001244814814814815, |
|
"loss": 0.0253, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.0001241111111111111, |
|
"loss": 0.0485, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00012374074074074075, |
|
"loss": 0.0292, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.0001233703703703704, |
|
"loss": 0.0554, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.000123, |
|
"loss": 0.0476, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00012262962962962962, |
|
"loss": 0.0079, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00012225925925925927, |
|
"loss": 0.0417, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0001218888888888889, |
|
"loss": 0.0397, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.00012151851851851852, |
|
"loss": 0.08, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.00012114814814814814, |
|
"loss": 0.0065, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.00012077777777777778, |
|
"loss": 0.0511, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00012040740740740741, |
|
"loss": 0.1212, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00012003703703703704, |
|
"loss": 0.0828, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00011966666666666668, |
|
"loss": 0.0674, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.0001192962962962963, |
|
"loss": 0.0968, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.00011892592592592593, |
|
"loss": 0.0061, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.00011855555555555556, |
|
"loss": 0.0225, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.0001181851851851852, |
|
"loss": 0.0875, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00011781481481481482, |
|
"loss": 0.0385, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00011744444444444445, |
|
"loss": 0.0058, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.00011707407407407407, |
|
"loss": 0.011, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00011670370370370372, |
|
"loss": 0.0127, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00011633333333333333, |
|
"loss": 0.0293, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.00011596296296296296, |
|
"loss": 0.0221, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.0001155925925925926, |
|
"loss": 0.0031, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.00011522222222222223, |
|
"loss": 0.0393, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 0.00011485185185185185, |
|
"loss": 0.0041, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.00011448148148148148, |
|
"loss": 0.0314, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.00011411111111111112, |
|
"loss": 0.0586, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.00011374074074074075, |
|
"loss": 0.0168, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00011337037037037037, |
|
"loss": 0.0052, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.000113, |
|
"loss": 0.0057, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.00011262962962962964, |
|
"loss": 0.1525, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.00011225925925925927, |
|
"loss": 0.0104, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.00011188888888888888, |
|
"loss": 0.0588, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.00011151851851851854, |
|
"loss": 0.0024, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.00011114814814814815, |
|
"loss": 0.0084, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.00011077777777777778, |
|
"loss": 0.0052, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.0001104074074074074, |
|
"loss": 0.0303, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.00011003703703703706, |
|
"loss": 0.013, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.00010966666666666667, |
|
"loss": 0.0294, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.0001092962962962963, |
|
"loss": 0.0257, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.00010892592592592592, |
|
"loss": 0.002, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.00010855555555555557, |
|
"loss": 0.0087, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.00010818518518518519, |
|
"loss": 0.0082, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.00010781481481481482, |
|
"loss": 0.0066, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.00010744444444444446, |
|
"loss": 0.0604, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.00010707407407407408, |
|
"loss": 0.0146, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.0001067037037037037, |
|
"loss": 0.0829, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.00010633333333333333, |
|
"loss": 0.0169, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.00010596296296296298, |
|
"loss": 0.0252, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.00010559259259259259, |
|
"loss": 0.0368, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 0.00010522222222222222, |
|
"loss": 0.0042, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00010485185185185186, |
|
"loss": 0.0369, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.0001044814814814815, |
|
"loss": 0.0191, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 0.00010411111111111111, |
|
"loss": 0.0035, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.00010374074074074074, |
|
"loss": 0.0121, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00010337037037037038, |
|
"loss": 0.0019, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.00010300000000000001, |
|
"loss": 0.0376, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.00010262962962962963, |
|
"loss": 0.0098, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.00010225925925925925, |
|
"loss": 0.0348, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0001018888888888889, |
|
"loss": 0.0035, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.00010151851851851853, |
|
"loss": 0.0016, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.00010114814814814814, |
|
"loss": 0.002, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.0001007777777777778, |
|
"loss": 0.0015, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.00010040740740740741, |
|
"loss": 0.0017, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00010003703703703704, |
|
"loss": 0.0491, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 9.966666666666667e-05, |
|
"loss": 0.0025, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 9.92962962962963e-05, |
|
"loss": 0.0024, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 9.892592592592593e-05, |
|
"loss": 0.0198, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 9.855555555555556e-05, |
|
"loss": 0.0036, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 9.818518518518519e-05, |
|
"loss": 0.0018, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 9.781481481481482e-05, |
|
"loss": 0.002, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 9.744444444444445e-05, |
|
"loss": 0.0051, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 9.707407407407409e-05, |
|
"loss": 0.0015, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 9.67037037037037e-05, |
|
"loss": 0.0082, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 9.633333333333335e-05, |
|
"loss": 0.0134, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 9.596296296296296e-05, |
|
"loss": 0.0016, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 9.55925925925926e-05, |
|
"loss": 0.0025, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 9.522222222222222e-05, |
|
"loss": 0.0018, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 9.485185185185187e-05, |
|
"loss": 0.0017, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 9.448148148148148e-05, |
|
"loss": 0.0014, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 9.411111111111111e-05, |
|
"loss": 0.0013, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 9.374074074074074e-05, |
|
"loss": 0.003, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 9.34074074074074e-05, |
|
"loss": 0.015, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 9.303703703703705e-05, |
|
"loss": 0.0303, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 9.266666666666666e-05, |
|
"loss": 0.0267, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 9.229629629629631e-05, |
|
"loss": 0.0078, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 9.192592592592592e-05, |
|
"loss": 0.0054, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 9.155555555555557e-05, |
|
"loss": 0.0102, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 9.118518518518518e-05, |
|
"loss": 0.0014, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 9.081481481481482e-05, |
|
"loss": 0.0039, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 9.044444444444445e-05, |
|
"loss": 0.0015, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.007407407407408e-05, |
|
"loss": 0.1549, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 8.970370370370371e-05, |
|
"loss": 0.0015, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 8.933333333333334e-05, |
|
"loss": 0.0504, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 8.896296296296297e-05, |
|
"loss": 0.0087, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_accuracy": 0.8972222222222223, |
|
"eval_loss": 0.5567966103553772, |
|
"eval_runtime": 12.8044, |
|
"eval_samples_per_second": 84.346, |
|
"eval_steps_per_second": 10.543, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 8.85925925925926e-05, |
|
"loss": 0.0011, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 8.822222222222223e-05, |
|
"loss": 0.0015, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 8.785185185185186e-05, |
|
"loss": 0.0012, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 8.748148148148149e-05, |
|
"loss": 0.0011, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 8.711111111111112e-05, |
|
"loss": 0.0129, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 8.674074074074074e-05, |
|
"loss": 0.0011, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 8.637037037037037e-05, |
|
"loss": 0.0048, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 8.6e-05, |
|
"loss": 0.0012, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 8.562962962962963e-05, |
|
"loss": 0.0103, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 8.525925925925926e-05, |
|
"loss": 0.0457, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 8.488888888888889e-05, |
|
"loss": 0.0012, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 8.451851851851852e-05, |
|
"loss": 0.0079, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 8.414814814814815e-05, |
|
"loss": 0.0326, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 8.377777777777778e-05, |
|
"loss": 0.0017, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 8.340740740740741e-05, |
|
"loss": 0.001, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 8.303703703703705e-05, |
|
"loss": 0.0184, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 8.266666666666667e-05, |
|
"loss": 0.002, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 8.229629629629631e-05, |
|
"loss": 0.0011, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 8.192592592592592e-05, |
|
"loss": 0.0026, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 8.155555555555557e-05, |
|
"loss": 0.0038, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 8.118518518518518e-05, |
|
"loss": 0.0238, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 8.081481481481483e-05, |
|
"loss": 0.0394, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 8.044444444444444e-05, |
|
"loss": 0.0328, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 8.007407407407408e-05, |
|
"loss": 0.0078, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 7.97037037037037e-05, |
|
"loss": 0.0012, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 7.933333333333334e-05, |
|
"loss": 0.0517, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 7.896296296296297e-05, |
|
"loss": 0.0011, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 7.85925925925926e-05, |
|
"loss": 0.0011, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 7.822222222222223e-05, |
|
"loss": 0.0066, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 7.785185185185186e-05, |
|
"loss": 0.0793, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 7.748148148148149e-05, |
|
"loss": 0.0305, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 7.711111111111112e-05, |
|
"loss": 0.0012, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 7.674074074074075e-05, |
|
"loss": 0.001, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 7.637037037037038e-05, |
|
"loss": 0.0017, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 7.6e-05, |
|
"loss": 0.0029, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 7.562962962962963e-05, |
|
"loss": 0.0114, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 7.525925925925926e-05, |
|
"loss": 0.001, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 7.488888888888889e-05, |
|
"loss": 0.0009, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 7.451851851851852e-05, |
|
"loss": 0.0039, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 7.414814814814815e-05, |
|
"loss": 0.008, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 7.377777777777778e-05, |
|
"loss": 0.0038, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 7.340740740740741e-05, |
|
"loss": 0.029, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 7.303703703703704e-05, |
|
"loss": 0.0019, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 7.266666666666667e-05, |
|
"loss": 0.0018, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 7.22962962962963e-05, |
|
"loss": 0.0211, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 7.192592592592592e-05, |
|
"loss": 0.0192, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 7.155555555555555e-05, |
|
"loss": 0.0181, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 7.118518518518518e-05, |
|
"loss": 0.0226, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 7.081481481481483e-05, |
|
"loss": 0.0083, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 7.044444444444444e-05, |
|
"loss": 0.013, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.007407407407408e-05, |
|
"loss": 0.0008, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 6.97037037037037e-05, |
|
"loss": 0.0701, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 6.933333333333334e-05, |
|
"loss": 0.0032, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 6.896296296296296e-05, |
|
"loss": 0.0021, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 6.85925925925926e-05, |
|
"loss": 0.001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 6.822222222222222e-05, |
|
"loss": 0.0289, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 6.785185185185186e-05, |
|
"loss": 0.001, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 6.748148148148149e-05, |
|
"loss": 0.0358, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 6.711111111111112e-05, |
|
"loss": 0.001, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 6.674074074074075e-05, |
|
"loss": 0.008, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 6.637037037037038e-05, |
|
"loss": 0.0375, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 6.6e-05, |
|
"loss": 0.0009, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 6.562962962962963e-05, |
|
"loss": 0.0039, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 6.525925925925926e-05, |
|
"loss": 0.0033, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 6.488888888888889e-05, |
|
"loss": 0.0008, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 6.451851851851852e-05, |
|
"loss": 0.0111, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 6.414814814814815e-05, |
|
"loss": 0.0189, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 6.377777777777778e-05, |
|
"loss": 0.0238, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 6.340740740740741e-05, |
|
"loss": 0.0315, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 6.303703703703704e-05, |
|
"loss": 0.0007, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 6.266666666666667e-05, |
|
"loss": 0.0013, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 6.22962962962963e-05, |
|
"loss": 0.0079, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 6.192592592592593e-05, |
|
"loss": 0.0008, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 6.155555555555555e-05, |
|
"loss": 0.0008, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 6.118518518518518e-05, |
|
"loss": 0.0007, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 6.081481481481481e-05, |
|
"loss": 0.0046, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 6.044444444444445e-05, |
|
"loss": 0.0009, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.007407407407407e-05, |
|
"loss": 0.0007, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 5.970370370370371e-05, |
|
"loss": 0.0007, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 5.9333333333333343e-05, |
|
"loss": 0.0008, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 5.8962962962962966e-05, |
|
"loss": 0.0007, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 5.85925925925926e-05, |
|
"loss": 0.0008, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 5.8222222222222224e-05, |
|
"loss": 0.0028, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 5.785185185185186e-05, |
|
"loss": 0.0112, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 5.748148148148148e-05, |
|
"loss": 0.0007, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 5.711111111111112e-05, |
|
"loss": 0.0076, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 5.674074074074074e-05, |
|
"loss": 0.0007, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 5.637037037037037e-05, |
|
"loss": 0.0038, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 0.0007, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 5.562962962962963e-05, |
|
"loss": 0.0006, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 5.5259259259259264e-05, |
|
"loss": 0.0094, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 5.488888888888889e-05, |
|
"loss": 0.0068, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 5.451851851851852e-05, |
|
"loss": 0.0007, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 5.4148148148148145e-05, |
|
"loss": 0.0007, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 5.377777777777778e-05, |
|
"loss": 0.0006, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 5.34074074074074e-05, |
|
"loss": 0.0006, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 5.303703703703704e-05, |
|
"loss": 0.0229, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 5.266666666666666e-05, |
|
"loss": 0.0292, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 5.22962962962963e-05, |
|
"loss": 0.0006, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 5.1925925925925933e-05, |
|
"loss": 0.0006, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 5.1555555555555556e-05, |
|
"loss": 0.0097, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 5.118518518518519e-05, |
|
"loss": 0.0006, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 5.0814814814814814e-05, |
|
"loss": 0.0007, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 5.044444444444445e-05, |
|
"loss": 0.0006, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 5.007407407407407e-05, |
|
"loss": 0.0006, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 4.970370370370371e-05, |
|
"loss": 0.0006, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 4.933333333333334e-05, |
|
"loss": 0.0006, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 4.896296296296297e-05, |
|
"loss": 0.0006, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 4.8592592592592596e-05, |
|
"loss": 0.0055, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 4.8222222222222225e-05, |
|
"loss": 0.0006, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 4.7851851851851854e-05, |
|
"loss": 0.0006, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 4.7481481481481483e-05, |
|
"loss": 0.0006, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 4.711111111111111e-05, |
|
"loss": 0.0008, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 4.674074074074074e-05, |
|
"loss": 0.0044, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 4.637037037037038e-05, |
|
"loss": 0.0006, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 4.5629629629629636e-05, |
|
"loss": 0.0076, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 4.5259259259259265e-05, |
|
"loss": 0.0006, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 4.4888888888888894e-05, |
|
"loss": 0.0006, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 4.4518518518518523e-05, |
|
"loss": 0.0053, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 4.414814814814815e-05, |
|
"loss": 0.0006, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 4.377777777777778e-05, |
|
"loss": 0.0006, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 4.340740740740741e-05, |
|
"loss": 0.0032, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 4.303703703703704e-05, |
|
"loss": 0.015, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 4.266666666666667e-05, |
|
"loss": 0.0006, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 4.22962962962963e-05, |
|
"loss": 0.0046, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 4.192592592592593e-05, |
|
"loss": 0.0006, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 4.155555555555556e-05, |
|
"loss": 0.0006, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 4.1185185185185186e-05, |
|
"loss": 0.0006, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 4.0814814814814815e-05, |
|
"loss": 0.0006, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 4.0444444444444444e-05, |
|
"loss": 0.0006, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.007407407407407e-05, |
|
"loss": 0.0006, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 3.97037037037037e-05, |
|
"loss": 0.0006, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 3.933333333333333e-05, |
|
"loss": 0.0074, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 3.896296296296296e-05, |
|
"loss": 0.0006, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 3.85925925925926e-05, |
|
"loss": 0.0005, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 3.8222222222222226e-05, |
|
"loss": 0.0006, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 3.7851851851851855e-05, |
|
"loss": 0.006, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 3.7481481481481484e-05, |
|
"loss": 0.0006, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 3.7111111111111113e-05, |
|
"loss": 0.0006, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 3.674074074074074e-05, |
|
"loss": 0.0005, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"learning_rate": 3.637037037037037e-05, |
|
"loss": 0.0104, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0005, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 3.562962962962963e-05, |
|
"loss": 0.0056, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 3.525925925925926e-05, |
|
"loss": 0.0108, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 3.4888888888888895e-05, |
|
"loss": 0.0005, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 3.4518518518518524e-05, |
|
"loss": 0.0005, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 3.4148148148148153e-05, |
|
"loss": 0.0005, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 3.377777777777778e-05, |
|
"loss": 0.0005, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 3.340740740740741e-05, |
|
"loss": 0.0005, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 3.303703703703704e-05, |
|
"loss": 0.0005, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 3.266666666666667e-05, |
|
"loss": 0.0005, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 3.22962962962963e-05, |
|
"loss": 0.0005, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 3.192592592592593e-05, |
|
"loss": 0.0005, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 3.155555555555556e-05, |
|
"loss": 0.0005, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 3.118518518518519e-05, |
|
"loss": 0.0043, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 3.0814814814814816e-05, |
|
"loss": 0.0005, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 3.044444444444445e-05, |
|
"loss": 0.0005, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.0074074074074078e-05, |
|
"loss": 0.0005, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 2.9703703703703707e-05, |
|
"loss": 0.0005, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 2.9333333333333336e-05, |
|
"loss": 0.0068, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 2.8962962962962965e-05, |
|
"loss": 0.0005, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 2.8592592592592594e-05, |
|
"loss": 0.0048, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 2.8222222222222223e-05, |
|
"loss": 0.0005, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 2.7851851851851853e-05, |
|
"loss": 0.0005, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 2.7481481481481482e-05, |
|
"loss": 0.0065, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 17.3, |
|
"learning_rate": 2.7111111111111114e-05, |
|
"loss": 0.0005, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 2.6740740740740743e-05, |
|
"loss": 0.0048, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 2.6370370370370373e-05, |
|
"loss": 0.0005, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.0005, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 2.562962962962963e-05, |
|
"loss": 0.0005, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 2.525925925925926e-05, |
|
"loss": 0.004, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.488888888888889e-05, |
|
"loss": 0.0005, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 2.451851851851852e-05, |
|
"loss": 0.0005, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 2.414814814814815e-05, |
|
"loss": 0.0005, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 2.377777777777778e-05, |
|
"loss": 0.0005, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 2.340740740740741e-05, |
|
"loss": 0.0005, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.303703703703704e-05, |
|
"loss": 0.0005, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 2.2666666666666668e-05, |
|
"loss": 0.0032, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 2.2296296296296297e-05, |
|
"loss": 0.0005, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 2.1925925925925926e-05, |
|
"loss": 0.0005, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 2.1555555555555555e-05, |
|
"loss": 0.0005, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 2.1185185185185184e-05, |
|
"loss": 0.0005, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 2.0814814814814813e-05, |
|
"loss": 0.0005, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"learning_rate": 2.0444444444444446e-05, |
|
"loss": 0.0081, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.0074074074074075e-05, |
|
"loss": 0.0064, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 1.9703703703703704e-05, |
|
"loss": 0.0005, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 1.9333333333333333e-05, |
|
"loss": 0.0005, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 1.8962962962962963e-05, |
|
"loss": 0.0005, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 1.8592592592592595e-05, |
|
"loss": 0.0049, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 1.8222222222222224e-05, |
|
"loss": 0.0005, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 1.7851851851851853e-05, |
|
"loss": 0.0005, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 1.7481481481481483e-05, |
|
"loss": 0.0039, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 1.7111111111111112e-05, |
|
"loss": 0.0034, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 1.674074074074074e-05, |
|
"loss": 0.0033, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 1.6370370370370374e-05, |
|
"loss": 0.0005, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0005, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 1.5629629629629632e-05, |
|
"loss": 0.0005, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 1.5259259259259258e-05, |
|
"loss": 0.0005, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 1.4888888888888888e-05, |
|
"loss": 0.0076, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 1.4518518518518521e-05, |
|
"loss": 0.0076, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 1.4148148148148148e-05, |
|
"loss": 0.0005, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 1.3777777777777778e-05, |
|
"loss": 0.0005, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 1.3407407407407407e-05, |
|
"loss": 0.0005, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 1.3037037037037036e-05, |
|
"loss": 0.0005, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 1.2666666666666668e-05, |
|
"loss": 0.0005, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"learning_rate": 1.2296296296296298e-05, |
|
"loss": 0.0005, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 1.1925925925925927e-05, |
|
"loss": 0.0061, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 1.1555555555555556e-05, |
|
"loss": 0.0005, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 1.1185185185185187e-05, |
|
"loss": 0.0005, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 1.0814814814814814e-05, |
|
"loss": 0.0056, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 1.0444444444444445e-05, |
|
"loss": 0.0005, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.0074074074074074e-05, |
|
"loss": 0.0004, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 9.703703703703703e-06, |
|
"loss": 0.0004, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.0005, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 8.962962962962963e-06, |
|
"loss": 0.0049, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 8.592592592592593e-06, |
|
"loss": 0.005, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 19.19, |
|
"learning_rate": 8.222222222222223e-06, |
|
"loss": 0.0005, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 7.851851851851853e-06, |
|
"loss": 0.0043, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 7.481481481481483e-06, |
|
"loss": 0.0051, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 6.74074074074074e-06, |
|
"loss": 0.0048, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"learning_rate": 6.370370370370371e-06, |
|
"loss": 0.0051, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0048, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 5.62962962962963e-06, |
|
"loss": 0.0055, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 19.48, |
|
"learning_rate": 5.259259259259259e-06, |
|
"loss": 0.0005, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.0005, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 4.5185185185185185e-06, |
|
"loss": 0.0005, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 4.1481481481481485e-06, |
|
"loss": 0.0005, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.0004, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 3.4074074074074077e-06, |
|
"loss": 0.0004, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 3.0370370370370372e-06, |
|
"loss": 0.0005, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 2.2962962962962964e-06, |
|
"loss": 0.0005, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.925925925925926e-06, |
|
"loss": 0.0004, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 1.5555555555555556e-06, |
|
"loss": 0.0004, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 1.1851851851851852e-06, |
|
"loss": 0.0004, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 8.148148148148147e-07, |
|
"loss": 0.0005, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0005, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 7.407407407407407e-08, |
|
"loss": 0.0005, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5400, |
|
"total_flos": 6.69572795818967e+18, |
|
"train_loss": 0.14805768244520381, |
|
"train_runtime": 1678.3865, |
|
"train_samples_per_second": 51.478, |
|
"train_steps_per_second": 3.217 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5400, |
|
"num_train_epochs": 20, |
|
"save_steps": 6000, |
|
"total_flos": 6.69572795818967e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|