|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 8899, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.243445692883896e-07, |
|
"loss": 2.2217, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2734082397003748e-06, |
|
"loss": 2.098, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.02247191011236e-06, |
|
"loss": 1.8324, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.771535580524345e-06, |
|
"loss": 1.7504, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.52059925093633e-06, |
|
"loss": 1.5636, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.269662921348315e-06, |
|
"loss": 1.6101, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.0187265917603005e-06, |
|
"loss": 2.1579, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.7677902621722845e-06, |
|
"loss": 1.4598, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.51685393258427e-06, |
|
"loss": 1.6046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.265917602996255e-06, |
|
"loss": 1.1965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.01498127340824e-06, |
|
"loss": 1.3175, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.764044943820226e-06, |
|
"loss": 1.3043, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.51310861423221e-06, |
|
"loss": 1.0915, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0262172284644197e-05, |
|
"loss": 0.8881, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.101123595505618e-05, |
|
"loss": 0.7195, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1760299625468165e-05, |
|
"loss": 0.6414, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.250936329588015e-05, |
|
"loss": 0.5803, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3258426966292135e-05, |
|
"loss": 0.446, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.4007490636704121e-05, |
|
"loss": 0.4358, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.4756554307116106e-05, |
|
"loss": 0.4409, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5505617977528093e-05, |
|
"loss": 0.3052, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.6254681647940076e-05, |
|
"loss": 0.3371, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7003745318352062e-05, |
|
"loss": 0.3135, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7752808988764045e-05, |
|
"loss": 0.3813, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.8501872659176032e-05, |
|
"loss": 0.3232, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.925093632958802e-05, |
|
"loss": 0.301, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1726, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.999993377127307e-05, |
|
"loss": 0.3646, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.999973508596952e-05, |
|
"loss": 0.2276, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.999940394672109e-05, |
|
"loss": 0.334, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9998940357913964e-05, |
|
"loss": 0.2052, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9998344325688727e-05, |
|
"loss": 0.2715, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999761585794026e-05, |
|
"loss": 0.2602, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9996754964317668e-05, |
|
"loss": 0.2067, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999576165622413e-05, |
|
"loss": 0.3932, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9994635946816748e-05, |
|
"loss": 0.2637, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999337785100638e-05, |
|
"loss": 0.2291, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9991987385457452e-05, |
|
"loss": 0.2584, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9990464568587708e-05, |
|
"loss": 0.2392, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9988809420567998e-05, |
|
"loss": 0.2079, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.998702196332199e-05, |
|
"loss": 0.2343, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.998510222052588e-05, |
|
"loss": 0.211, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9983050217608106e-05, |
|
"loss": 0.2283, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.998086598174896e-05, |
|
"loss": 0.2756, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9978549541880295e-05, |
|
"loss": 0.2038, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9976100928685063e-05, |
|
"loss": 0.2509, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9973520174596983e-05, |
|
"loss": 0.2034, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9970807313800063e-05, |
|
"loss": 0.2689, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9967962382228166e-05, |
|
"loss": 0.1766, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.996498541756453e-05, |
|
"loss": 0.213, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9961876459241274e-05, |
|
"loss": 0.2533, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.995863554843887e-05, |
|
"loss": 0.222, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9955262728085592e-05, |
|
"loss": 0.2103, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9951758042856963e-05, |
|
"loss": 0.2198, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.994812153917515e-05, |
|
"loss": 0.2222, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.994435326520835e-05, |
|
"loss": 0.2416, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9940453270870174e-05, |
|
"loss": 0.2062, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9936421607818942e-05, |
|
"loss": 0.2477, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.993225832945704e-05, |
|
"loss": 0.1833, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9927963490930195e-05, |
|
"loss": 0.2297, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9923537149126738e-05, |
|
"loss": 0.2025, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9918979362676875e-05, |
|
"loss": 0.2207, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9914290191951875e-05, |
|
"loss": 0.1873, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.990946969906331e-05, |
|
"loss": 0.2602, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9904517947862193e-05, |
|
"loss": 0.2692, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.989943500393816e-05, |
|
"loss": 0.1779, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9894220934618598e-05, |
|
"loss": 0.1973, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.988887580896774e-05, |
|
"loss": 0.1873, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9883399697785756e-05, |
|
"loss": 0.2301, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9877792673607823e-05, |
|
"loss": 0.2207, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9872054810703155e-05, |
|
"loss": 0.2069, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.986618618507402e-05, |
|
"loss": 0.2315, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9860186874454746e-05, |
|
"loss": 0.1969, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9854056958310667e-05, |
|
"loss": 0.248, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.984779651783709e-05, |
|
"loss": 0.211, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9841405635958225e-05, |
|
"loss": 0.1988, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.983488439732606e-05, |
|
"loss": 0.1728, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9828232888319263e-05, |
|
"loss": 0.2126, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9821451197042028e-05, |
|
"loss": 0.2348, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.981453941332291e-05, |
|
"loss": 0.2369, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.980749762871364e-05, |
|
"loss": 0.1675, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.98003259364879e-05, |
|
"loss": 0.2121, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9793024431640096e-05, |
|
"loss": 0.2358, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9785593210884112e-05, |
|
"loss": 0.1853, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9778032372652e-05, |
|
"loss": 0.2123, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.97703420170927e-05, |
|
"loss": 0.1991, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9762522246070697e-05, |
|
"loss": 0.1983, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9754573163164697e-05, |
|
"loss": 0.1852, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9746494873666226e-05, |
|
"loss": 0.1656, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9738287484578243e-05, |
|
"loss": 0.1611, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.972995110461374e-05, |
|
"loss": 0.1771, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9721485844194282e-05, |
|
"loss": 0.2002, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9712891815448557e-05, |
|
"loss": 0.1828, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9704169132210874e-05, |
|
"loss": 0.182, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.969531791001968e-05, |
|
"loss": 0.1969, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9686338266116006e-05, |
|
"loss": 0.1736, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9677230319441936e-05, |
|
"loss": 0.1496, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9667994190639007e-05, |
|
"loss": 0.2126, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.965863000204663e-05, |
|
"loss": 0.2366, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9649137877700462e-05, |
|
"loss": 0.2226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9639517943330768e-05, |
|
"loss": 0.2237, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.962977032636075e-05, |
|
"loss": 0.2526, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9619895155904855e-05, |
|
"loss": 0.2078, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9609892562767082e-05, |
|
"loss": 0.1882, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.959976267943923e-05, |
|
"loss": 0.2113, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9589505640099156e-05, |
|
"loss": 0.2133, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.957912158060899e-05, |
|
"loss": 0.2228, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9568610638513343e-05, |
|
"loss": 0.1903, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9557972953037476e-05, |
|
"loss": 0.1865, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.954720866508546e-05, |
|
"loss": 0.2001, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9536317917238312e-05, |
|
"loss": 0.1557, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.952530085375211e-05, |
|
"loss": 0.2571, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9514157620556072e-05, |
|
"loss": 0.1855, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9502888365250622e-05, |
|
"loss": 0.2161, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.949149323710545e-05, |
|
"loss": 0.1728, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9479972387057523e-05, |
|
"loss": 0.1924, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9468325967709084e-05, |
|
"loss": 0.2001, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9456554133325642e-05, |
|
"loss": 0.1947, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.944465703983392e-05, |
|
"loss": 0.1822, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.943263484481978e-05, |
|
"loss": 0.1425, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9420487707526163e-05, |
|
"loss": 0.1903, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9408215788850958e-05, |
|
"loss": 0.1772, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.939581925134487e-05, |
|
"loss": 0.164, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.938329825920928e-05, |
|
"loss": 0.1882, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9370652978294065e-05, |
|
"loss": 0.2023, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9357883576095395e-05, |
|
"loss": 0.1628, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9344990221753518e-05, |
|
"loss": 0.1713, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9331973086050524e-05, |
|
"loss": 0.2396, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9318832341408078e-05, |
|
"loss": 0.1939, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.930556816188514e-05, |
|
"loss": 0.1432, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9292180723175656e-05, |
|
"loss": 0.169, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9278670202606222e-05, |
|
"loss": 0.1771, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.926503677913376e-05, |
|
"loss": 0.189, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9251280633343125e-05, |
|
"loss": 0.1619, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9237401947444725e-05, |
|
"loss": 0.2056, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9223400905272093e-05, |
|
"loss": 0.1932, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9209277692279475e-05, |
|
"loss": 0.1878, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.919503249553935e-05, |
|
"loss": 0.1561, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.918066550373997e-05, |
|
"loss": 0.1532, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9166176907182845e-05, |
|
"loss": 0.205, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.915156689778024e-05, |
|
"loss": 0.1945, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9136835669052624e-05, |
|
"loss": 0.1793, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9121983416126095e-05, |
|
"loss": 0.1879, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.910701033572982e-05, |
|
"loss": 0.1717, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.90919166261934e-05, |
|
"loss": 0.1615, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9076702487444275e-05, |
|
"loss": 0.1998, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9061368121005053e-05, |
|
"loss": 0.1775, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.904591372999085e-05, |
|
"loss": 0.1824, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9030339519106588e-05, |
|
"loss": 0.1975, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9014645694644302e-05, |
|
"loss": 0.1729, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8998832464480396e-05, |
|
"loss": 0.1931, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8982900038072892e-05, |
|
"loss": 0.1496, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8966848626458647e-05, |
|
"loss": 0.2168, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.895067844225058e-05, |
|
"loss": 0.1656, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.893438969963483e-05, |
|
"loss": 0.1559, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8917982614367933e-05, |
|
"loss": 0.16, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.890145740377397e-05, |
|
"loss": 0.1801, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8884814286741663e-05, |
|
"loss": 0.2013, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8868053483721507e-05, |
|
"loss": 0.2228, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8851175216722834e-05, |
|
"loss": 0.1901, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8834179709310868e-05, |
|
"loss": 0.1944, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8817067186603774e-05, |
|
"loss": 0.1649, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8799837875269672e-05, |
|
"loss": 0.163, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.878249200352363e-05, |
|
"loss": 0.1703, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8765029801124653e-05, |
|
"loss": 0.1878, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8747451499372623e-05, |
|
"loss": 0.1737, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.872975733110525e-05, |
|
"loss": 0.1844, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8711947530694986e-05, |
|
"loss": 0.1845, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.869402233404591e-05, |
|
"loss": 0.1778, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.867598197859061e-05, |
|
"loss": 0.2139, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.865782670328705e-05, |
|
"loss": 0.1975, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8639556748615372e-05, |
|
"loss": 0.1851, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.862117235657475e-05, |
|
"loss": 0.1872, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.860267377068016e-05, |
|
"loss": 0.181, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8584061235959165e-05, |
|
"loss": 0.2022, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8565334998948648e-05, |
|
"loss": 0.1593, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.854649530769159e-05, |
|
"loss": 0.1803, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.852754241173374e-05, |
|
"loss": 0.1676, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8508476562120332e-05, |
|
"loss": 0.19, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.848929801139275e-05, |
|
"loss": 0.1908, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8470007013585206e-05, |
|
"loss": 0.1558, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8450603824221334e-05, |
|
"loss": 0.1709, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8431088700310846e-05, |
|
"loss": 0.1947, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.84114619003461e-05, |
|
"loss": 0.1753, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.83917236842987e-05, |
|
"loss": 0.1932, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8371874313616017e-05, |
|
"loss": 0.1672, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8351914051217773e-05, |
|
"loss": 0.1965, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.833184316149251e-05, |
|
"loss": 0.1906, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8311661910294138e-05, |
|
"loss": 0.1888, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.829137056493836e-05, |
|
"loss": 0.1797, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8270969394199173e-05, |
|
"loss": 0.201, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.825045866830529e-05, |
|
"loss": 0.1914, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8229838658936566e-05, |
|
"loss": 0.1542, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8209109639220393e-05, |
|
"loss": 0.1642, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.818827188372809e-05, |
|
"loss": 0.1685, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.816732566847126e-05, |
|
"loss": 0.158, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8146271270898138e-05, |
|
"loss": 0.1672, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8125108969889908e-05, |
|
"loss": 0.2137, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.810383904575703e-05, |
|
"loss": 0.1766, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8082461780235497e-05, |
|
"loss": 0.196, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8060977456483127e-05, |
|
"loss": 0.1545, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.80393863590758e-05, |
|
"loss": 0.1856, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.80176887740037e-05, |
|
"loss": 0.1539, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7995884988667513e-05, |
|
"loss": 0.1888, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.797397529187462e-05, |
|
"loss": 0.1929, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.79519599738353e-05, |
|
"loss": 0.177, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7929839326158838e-05, |
|
"loss": 0.2152, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7907613641849705e-05, |
|
"loss": 0.1677, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.788528321530366e-05, |
|
"loss": 0.1619, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7862848342303845e-05, |
|
"loss": 0.1745, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7840309320016875e-05, |
|
"loss": 0.1672, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7817666446988896e-05, |
|
"loss": 0.156, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7794920023141648e-05, |
|
"loss": 0.1866, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7772070349768466e-05, |
|
"loss": 0.2002, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7749117729530306e-05, |
|
"loss": 0.1579, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.772606246645173e-05, |
|
"loss": 0.1876, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.770290486591688e-05, |
|
"loss": 0.2025, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7679645234665442e-05, |
|
"loss": 0.1821, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7656283880788565e-05, |
|
"loss": 0.1663, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7632821113724797e-05, |
|
"loss": 0.1802, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7609257244255977e-05, |
|
"loss": 0.1493, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.758559258450312e-05, |
|
"loss": 0.1777, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.756182744792228e-05, |
|
"loss": 0.144, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7537962149300412e-05, |
|
"loss": 0.1593, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7513997004751178e-05, |
|
"loss": 0.1722, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7489932331710785e-05, |
|
"loss": 0.1551, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7465768448933768e-05, |
|
"loss": 0.2002, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7441505676488758e-05, |
|
"loss": 0.1825, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7417144335754265e-05, |
|
"loss": 0.1645, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7392684749414406e-05, |
|
"loss": 0.1378, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7368127241454634e-05, |
|
"loss": 0.1288, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7343472137157444e-05, |
|
"loss": 0.1748, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7318719763098077e-05, |
|
"loss": 0.1554, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.729387044714017e-05, |
|
"loss": 0.1881, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7268924518431437e-05, |
|
"loss": 0.189, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7243882307399302e-05, |
|
"loss": 0.1824, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.721874414574651e-05, |
|
"loss": 0.1751, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.719351036644676e-05, |
|
"loss": 0.1774, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7168181303740256e-05, |
|
"loss": 0.1658, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7142757293129318e-05, |
|
"loss": 0.1587, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.711723867137392e-05, |
|
"loss": 0.148, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.709162577648722e-05, |
|
"loss": 0.1603, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.706591894773112e-05, |
|
"loss": 0.1715, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7040118525611705e-05, |
|
"loss": 0.1763, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7014224851874814e-05, |
|
"loss": 0.1659, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.698823826950145e-05, |
|
"loss": 0.1612, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.696215912270327e-05, |
|
"loss": 0.1431, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.693598775691801e-05, |
|
"loss": 0.1597, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6909724518804916e-05, |
|
"loss": 0.185, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6883369756240157e-05, |
|
"loss": 0.1276, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6856923818312205e-05, |
|
"loss": 0.1796, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.683038705531722e-05, |
|
"loss": 0.1899, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.680375981875441e-05, |
|
"loss": 0.1937, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6777042461321374e-05, |
|
"loss": 0.138, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6750235336909415e-05, |
|
"loss": 0.1661, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6723338800598886e-05, |
|
"loss": 0.1518, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.669635320865446e-05, |
|
"loss": 0.1817, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6669278918520413e-05, |
|
"loss": 0.1413, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.66421162888159e-05, |
|
"loss": 0.1796, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6614865679330195e-05, |
|
"loss": 0.1863, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.658752745101794e-05, |
|
"loss": 0.1749, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.656010196599434e-05, |
|
"loss": 0.1853, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.653258958753039e-05, |
|
"loss": 0.139, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6504990680048047e-05, |
|
"loss": 0.2061, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6477305609115415e-05, |
|
"loss": 0.1795, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6449534741441893e-05, |
|
"loss": 0.1514, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6421678444873327e-05, |
|
"loss": 0.1785, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6393737088387126e-05, |
|
"loss": 0.1751, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6365711042087385e-05, |
|
"loss": 0.1904, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6337600677199973e-05, |
|
"loss": 0.173, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6309406366067633e-05, |
|
"loss": 0.1479, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6281128482145027e-05, |
|
"loss": 0.1568, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6252767399993807e-05, |
|
"loss": 0.1535, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6224323495277646e-05, |
|
"loss": 0.1966, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.619579714475726e-05, |
|
"loss": 0.1676, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6167188726285433e-05, |
|
"loss": 0.1514, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6138498618801982e-05, |
|
"loss": 0.1348, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6109727202328778e-05, |
|
"loss": 0.1837, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6080874857964666e-05, |
|
"loss": 0.174, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.605194196788046e-05, |
|
"loss": 0.1558, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.602292891531385e-05, |
|
"loss": 0.1734, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.599383608456435e-05, |
|
"loss": 0.1728, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5964663860988186e-05, |
|
"loss": 0.1475, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.59354126309932e-05, |
|
"loss": 0.1707, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5906082782033744e-05, |
|
"loss": 0.1492, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5876674702605524e-05, |
|
"loss": 0.1918, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5847188782240473e-05, |
|
"loss": 0.1561, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5817625411501583e-05, |
|
"loss": 0.161, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5787984981977745e-05, |
|
"loss": 0.1497, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5758267886278533e-05, |
|
"loss": 0.1905, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.572847451802903e-05, |
|
"loss": 0.135, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5698605271864606e-05, |
|
"loss": 0.1885, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.56686605434257e-05, |
|
"loss": 0.1507, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5638640729352548e-05, |
|
"loss": 0.182, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5608546227279967e-05, |
|
"loss": 0.1501, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.557837743583208e-05, |
|
"loss": 0.1764, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5548134754616998e-05, |
|
"loss": 0.1574, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.551781858422159e-05, |
|
"loss": 0.1835, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5487429326206126e-05, |
|
"loss": 0.1904, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5456967383098983e-05, |
|
"loss": 0.1604, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.54264331583913e-05, |
|
"loss": 0.1509, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5395827056531643e-05, |
|
"loss": 0.1535, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5365149482920646e-05, |
|
"loss": 0.1494, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.533440084390564e-05, |
|
"loss": 0.14, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5303581546775263e-05, |
|
"loss": 0.1741, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5272691999754084e-05, |
|
"loss": 0.2048, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5241732611997174e-05, |
|
"loss": 0.1727, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.52107037935847e-05, |
|
"loss": 0.1552, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.517960595551649e-05, |
|
"loss": 0.1693, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5148439509706596e-05, |
|
"loss": 0.1631, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5117204868977815e-05, |
|
"loss": 0.1547, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5085902447056249e-05, |
|
"loss": 0.1427, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.505453265856581e-05, |
|
"loss": 0.1705, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5023095919022728e-05, |
|
"loss": 0.1711, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.499159264483005e-05, |
|
"loss": 0.151, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4960023253272125e-05, |
|
"loss": 0.1723, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4928388162509078e-05, |
|
"loss": 0.175, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.489668779157126e-05, |
|
"loss": 0.15, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4864922560353722e-05, |
|
"loss": 0.1777, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4833092889610624e-05, |
|
"loss": 0.1419, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4801199200949678e-05, |
|
"loss": 0.1866, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4769241916826571e-05, |
|
"loss": 0.1856, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4737221460539344e-05, |
|
"loss": 0.198, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4705138256222813e-05, |
|
"loss": 0.1478, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.467299272884293e-05, |
|
"loss": 0.1693, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4640785304191169e-05, |
|
"loss": 0.1747, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4608516408878875e-05, |
|
"loss": 0.1747, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.457618647033162e-05, |
|
"loss": 0.1556, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4543795916783536e-05, |
|
"loss": 0.1375, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.451134517727165e-05, |
|
"loss": 0.1662, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4478834681630199e-05, |
|
"loss": 0.1504, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4446264860484924e-05, |
|
"loss": 0.1676, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4413636145247386e-05, |
|
"loss": 0.177, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.438094896810924e-05, |
|
"loss": 0.1665, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.434820376203651e-05, |
|
"loss": 0.1875, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4315400960763861e-05, |
|
"loss": 0.1765, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4282540998788846e-05, |
|
"loss": 0.1535, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4249624311366151e-05, |
|
"loss": 0.1577, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.421665133450184e-05, |
|
"loss": 0.1639, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4183622504947571e-05, |
|
"loss": 0.1859, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4150538260194806e-05, |
|
"loss": 0.1699, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.411739903846903e-05, |
|
"loss": 0.1565, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4084205278723937e-05, |
|
"loss": 0.1661, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4050957420635615e-05, |
|
"loss": 0.1627, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4017655904596727e-05, |
|
"loss": 0.1655, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3984301171710677e-05, |
|
"loss": 0.1704, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3950893663785765e-05, |
|
"loss": 0.1622, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.391743382332933e-05, |
|
"loss": 0.1543, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3883922093541903e-05, |
|
"loss": 0.1531, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.385035891831133e-05, |
|
"loss": 0.1542, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3816744742206868e-05, |
|
"loss": 0.1538, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3783080010473351e-05, |
|
"loss": 0.1758, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.374936516902524e-05, |
|
"loss": 0.148, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3715600664440738e-05, |
|
"loss": 0.1782, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3681786943955876e-05, |
|
"loss": 0.1672, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3647924455458588e-05, |
|
"loss": 0.1565, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3614013647482774e-05, |
|
"loss": 0.1725, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3580054969202362e-05, |
|
"loss": 0.1471, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3546048870425356e-05, |
|
"loss": 0.1697, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3511995801587886e-05, |
|
"loss": 0.1775, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3477896213748232e-05, |
|
"loss": 0.1491, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3443750558580847e-05, |
|
"loss": 0.1715, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.340955928837039e-05, |
|
"loss": 0.1753, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3375322856005719e-05, |
|
"loss": 0.1709, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3341041714973901e-05, |
|
"loss": 0.1866, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3306716319354197e-05, |
|
"loss": 0.1456, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3272347123812063e-05, |
|
"loss": 0.2008, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3237934583593112e-05, |
|
"loss": 0.1523, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.320347915451709e-05, |
|
"loss": 0.1425, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3168981292971832e-05, |
|
"loss": 0.1609, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3134441455907237e-05, |
|
"loss": 0.1571, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3099860100829185e-05, |
|
"loss": 0.1712, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3065237685793503e-05, |
|
"loss": 0.1579, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.303057466939989e-05, |
|
"loss": 0.1641, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2995871510785829e-05, |
|
"loss": 0.1399, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2961128669620528e-05, |
|
"loss": 0.161, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2926346606098807e-05, |
|
"loss": 0.2, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2891525780935035e-05, |
|
"loss": 0.1434, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2856666655356988e-05, |
|
"loss": 0.1918, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.282176969109977e-05, |
|
"loss": 0.1308, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2786835350399682e-05, |
|
"loss": 0.164, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2751864095988112e-05, |
|
"loss": 0.1556, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2716856391085384e-05, |
|
"loss": 0.1826, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2681812699394653e-05, |
|
"loss": 0.154, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2646733485095727e-05, |
|
"loss": 0.1442, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2611619212838954e-05, |
|
"loss": 0.1676, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2576470347739043e-05, |
|
"loss": 0.1775, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2541287355368908e-05, |
|
"loss": 0.1905, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.250607070175351e-05, |
|
"loss": 0.1568, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2470820853363674e-05, |
|
"loss": 0.1633, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2435538277109919e-05, |
|
"loss": 0.1393, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.240022344033627e-05, |
|
"loss": 0.1574, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2364876810814059e-05, |
|
"loss": 0.1361, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2329498856735739e-05, |
|
"loss": 0.1568, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2294090046708684e-05, |
|
"loss": 0.1651, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.225865084974898e-05, |
|
"loss": 0.1414, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2223181735275203e-05, |
|
"loss": 0.1796, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2187683173102212e-05, |
|
"loss": 0.1793, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2152155633434922e-05, |
|
"loss": 0.1746, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2116599586862079e-05, |
|
"loss": 0.1613, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2081015504350025e-05, |
|
"loss": 0.1644, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.204540385723645e-05, |
|
"loss": 0.1629, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2009765117224177e-05, |
|
"loss": 0.1977, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1974099756374874e-05, |
|
"loss": 0.1717, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1938408247102825e-05, |
|
"loss": 0.1414, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1902691062168684e-05, |
|
"loss": 0.1508, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1866948674673182e-05, |
|
"loss": 0.1709, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1831181558050889e-05, |
|
"loss": 0.1599, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1795390186063917e-05, |
|
"loss": 0.1653, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1759575032795674e-05, |
|
"loss": 0.1923, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.172373657264456e-05, |
|
"loss": 0.1584, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1687875280317689e-05, |
|
"loss": 0.1659, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1651991630824608e-05, |
|
"loss": 0.1582, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.161608609947101e-05, |
|
"loss": 0.1412, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1580159161852413e-05, |
|
"loss": 0.1566, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1544211293847886e-05, |
|
"loss": 0.1572, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1508242971613741e-05, |
|
"loss": 0.1667, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.147225467157721e-05, |
|
"loss": 0.1381, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1436246870430157e-05, |
|
"loss": 0.15, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1400220045122746e-05, |
|
"loss": 0.1496, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1364174672857131e-05, |
|
"loss": 0.1633, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.132811123108114e-05, |
|
"loss": 0.1867, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1292030197481935e-05, |
|
"loss": 0.1491, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.12559320499797e-05, |
|
"loss": 0.1597, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1219817266721314e-05, |
|
"loss": 0.1655, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.118368632607399e-05, |
|
"loss": 0.1455, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1147539706618976e-05, |
|
"loss": 0.185, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1111377887145186e-05, |
|
"loss": 0.1555, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1075201346642875e-05, |
|
"loss": 0.1695, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1039010564297288e-05, |
|
"loss": 0.1815, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.100280601948231e-05, |
|
"loss": 0.1552, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0966588191754129e-05, |
|
"loss": 0.1731, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0930357560844862e-05, |
|
"loss": 0.1748, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.089411460665623e-05, |
|
"loss": 0.1517, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0857859809253168e-05, |
|
"loss": 0.1834, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.08215936488575e-05, |
|
"loss": 0.1656, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0785316605841544e-05, |
|
"loss": 0.171, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0749029160721782e-05, |
|
"loss": 0.1728, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0712731794152468e-05, |
|
"loss": 0.1575, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0676424986919282e-05, |
|
"loss": 0.1562, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0640109219932946e-05, |
|
"loss": 0.1281, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0603784974222862e-05, |
|
"loss": 0.1702, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0567452730930743e-05, |
|
"loss": 0.1576, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.053111297130423e-05, |
|
"loss": 0.1551, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0494766176690526e-05, |
|
"loss": 0.1451, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.045841282853002e-05, |
|
"loss": 0.1577, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0422053408349908e-05, |
|
"loss": 0.1581, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0385688397757809e-05, |
|
"loss": 0.154, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0349318278435392e-05, |
|
"loss": 0.1726, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0312943532132003e-05, |
|
"loss": 0.1673, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0276564640658265e-05, |
|
"loss": 0.1842, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0240182085879713e-05, |
|
"loss": 0.1473, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0203796349710406e-05, |
|
"loss": 0.1817, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0167407914106541e-05, |
|
"loss": 0.1414, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0131017261060072e-05, |
|
"loss": 0.1928, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0094624872592318e-05, |
|
"loss": 0.1595, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0058231230747597e-05, |
|
"loss": 0.1352, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0021836817586819e-05, |
|
"loss": 0.13, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.985442115181117e-06, |
|
"loss": 0.1528, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.949047605605446e-06, |
|
"loss": 0.1362, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.91265377093222e-06, |
|
"loss": 0.1558, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.87990029519365e-06, |
|
"loss": 0.1523, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.84714810801154e-06, |
|
"loss": 0.1362, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.810758727589814e-06, |
|
"loss": 0.1536, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.774371853809793e-06, |
|
"loss": 0.1441, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.73798796864275e-06, |
|
"loss": 0.1501, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.701607554020364e-06, |
|
"loss": 0.1641, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.66523109182834e-06, |
|
"loss": 0.1471, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.628859063900038e-06, |
|
"loss": 0.1476, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.592491952010081e-06, |
|
"loss": 0.1355, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.556130237867967e-06, |
|
"loss": 0.1535, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.519774403111711e-06, |
|
"loss": 0.156, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.483424929301436e-06, |
|
"loss": 0.1646, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.44708229791302e-06, |
|
"loss": 0.1458, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.41074699033171e-06, |
|
"loss": 0.1626, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.374419487845729e-06, |
|
"loss": 0.1597, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.338100271639932e-06, |
|
"loss": 0.1531, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.301789822789412e-06, |
|
"loss": 0.1566, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.265488622253122e-06, |
|
"loss": 0.1456, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.229197150867525e-06, |
|
"loss": 0.1549, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.192915889340214e-06, |
|
"loss": 0.1775, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.156645318243534e-06, |
|
"loss": 0.1686, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.120385918008244e-06, |
|
"loss": 0.159, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.084138168917117e-06, |
|
"loss": 0.1473, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.047902551098618e-06, |
|
"loss": 0.185, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.011679544520508e-06, |
|
"loss": 0.1486, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.975469628983511e-06, |
|
"loss": 0.1767, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.93927328411495e-06, |
|
"loss": 0.1531, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.903090989362394e-06, |
|
"loss": 0.1769, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.866923223987303e-06, |
|
"loss": 0.1592, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.830770467058688e-06, |
|
"loss": 0.1336, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.79463319744677e-06, |
|
"loss": 0.1544, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.758511893816614e-06, |
|
"loss": 0.1575, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.722407034621812e-06, |
|
"loss": 0.1521, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.686319098098139e-06, |
|
"loss": 0.1746, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.65024856225721e-06, |
|
"loss": 0.1503, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.614195904880164e-06, |
|
"loss": 0.1857, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.578161603511312e-06, |
|
"loss": 0.171, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.54214613545184e-06, |
|
"loss": 0.1703, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.506149977753474e-06, |
|
"loss": 0.1403, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.470173607212145e-06, |
|
"loss": 0.1654, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.434217500361701e-06, |
|
"loss": 0.1384, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.398282133467579e-06, |
|
"loss": 0.1632, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.362367982520495e-06, |
|
"loss": 0.1271, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.326475523230152e-06, |
|
"loss": 0.1556, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.290605231018931e-06, |
|
"loss": 0.1672, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.25475758101558e-06, |
|
"loss": 0.1725, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.218933048048952e-06, |
|
"loss": 0.1685, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.183132106641684e-06, |
|
"loss": 0.128, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.147355231003931e-06, |
|
"loss": 0.1708, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.111602895027083e-06, |
|
"loss": 0.177, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.075875572277474e-06, |
|
"loss": 0.1462, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.040173735990124e-06, |
|
"loss": 0.1811, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.004497859062475e-06, |
|
"loss": 0.1549, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.968848414048097e-06, |
|
"loss": 0.1433, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.93322587315047e-06, |
|
"loss": 0.1464, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.897630708216701e-06, |
|
"loss": 0.1356, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.862063390731277e-06, |
|
"loss": 0.1733, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.826524391809833e-06, |
|
"loss": 0.1461, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.791014182192898e-06, |
|
"loss": 0.1385, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.755533232239667e-06, |
|
"loss": 0.1591, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.720082011921775e-06, |
|
"loss": 0.1458, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.68466099081705e-06, |
|
"loss": 0.1776, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.649270638103324e-06, |
|
"loss": 0.1583, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.613911422552203e-06, |
|
"loss": 0.1555, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.578583812522844e-06, |
|
"loss": 0.1534, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.5432882759557795e-06, |
|
"loss": 0.1497, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.508025280366703e-06, |
|
"loss": 0.1725, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.4727952928402695e-06, |
|
"loss": 0.1693, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.437598780023924e-06, |
|
"loss": 0.1627, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.402436208121723e-06, |
|
"loss": 0.158, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.367308042888131e-06, |
|
"loss": 0.159, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.332214749621884e-06, |
|
"loss": 0.1501, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.297156793159808e-06, |
|
"loss": 0.1493, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.26213463787067e-06, |
|
"loss": 0.1572, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.227148747649024e-06, |
|
"loss": 0.1575, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.192199585909058e-06, |
|
"loss": 0.1718, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.157287615578472e-06, |
|
"loss": 0.1619, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.122413299092343e-06, |
|
"loss": 0.1491, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.0875770983869774e-06, |
|
"loss": 0.1732, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.0527794748938225e-06, |
|
"loss": 0.1543, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.018020889533348e-06, |
|
"loss": 0.1542, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.9833018027089125e-06, |
|
"loss": 0.1743, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.948622674300712e-06, |
|
"loss": 0.1542, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.913983963659639e-06, |
|
"loss": 0.148, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.879386129601244e-06, |
|
"loss": 0.1892, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.8448296303996295e-06, |
|
"loss": 0.1402, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.8103149237813784e-06, |
|
"loss": 0.1552, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.7758424669195086e-06, |
|
"loss": 0.1439, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.7414127164274115e-06, |
|
"loss": 0.1455, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.7070261283527895e-06, |
|
"loss": 0.1611, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.6726831581716374e-06, |
|
"loss": 0.1201, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.638384260782193e-06, |
|
"loss": 0.1406, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.604129890498915e-06, |
|
"loss": 0.1534, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.569920501046474e-06, |
|
"loss": 0.1635, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.535756545553734e-06, |
|
"loss": 0.1388, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.501638476547745e-06, |
|
"loss": 0.1658, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.467566745947771e-06, |
|
"loss": 0.1699, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.433541805059269e-06, |
|
"loss": 0.1257, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.39956410456795e-06, |
|
"loss": 0.1537, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.365634094533786e-06, |
|
"loss": 0.1527, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.331752224385043e-06, |
|
"loss": 0.1662, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.29791894291235e-06, |
|
"loss": 0.1706, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.264134698262745e-06, |
|
"loss": 0.1533, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.230399937933719e-06, |
|
"loss": 0.1612, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.196715108767325e-06, |
|
"loss": 0.1447, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.163080656944234e-06, |
|
"loss": 0.1746, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.129497027977829e-06, |
|
"loss": 0.1667, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.095964666708312e-06, |
|
"loss": 0.1614, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.062484017296796e-06, |
|
"loss": 0.1614, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.029055523219442e-06, |
|
"loss": 0.1416, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.995679627261575e-06, |
|
"loss": 0.1376, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.962356771511808e-06, |
|
"loss": 0.1516, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.929087397356206e-06, |
|
"loss": 0.1646, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.895871945472434e-06, |
|
"loss": 0.1481, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.866024505964063e-06, |
|
"loss": 0.1544, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.832912717900956e-06, |
|
"loss": 0.1629, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.799856126014999e-06, |
|
"loss": 0.1617, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.766855168165374e-06, |
|
"loss": 0.1472, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.733910281474384e-06, |
|
"loss": 0.1214, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.701021902321594e-06, |
|
"loss": 0.1529, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.668190466338111e-06, |
|
"loss": 0.1538, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.635416408400774e-06, |
|
"loss": 0.1485, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.602700162626406e-06, |
|
"loss": 0.1781, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.570042162366076e-06, |
|
"loss": 0.131, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.537442840199337e-06, |
|
"loss": 0.1597, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.504902627928508e-06, |
|
"loss": 0.1589, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.472421956572953e-06, |
|
"loss": 0.1711, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.440001256363386e-06, |
|
"loss": 0.1442, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.407640956736133e-06, |
|
"loss": 0.1515, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.3753414863274985e-06, |
|
"loss": 0.1599, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.343103272968028e-06, |
|
"loss": 0.156, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.310926743676898e-06, |
|
"loss": 0.1526, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.2788123246562206e-06, |
|
"loss": 0.1478, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.249962803412024e-06, |
|
"loss": 0.1541, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.217967565140998e-06, |
|
"loss": 0.1696, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.1860356684540395e-06, |
|
"loss": 0.1359, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.154167536312911e-06, |
|
"loss": 0.1548, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.1223635908347846e-06, |
|
"loss": 0.1113, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.090624253286622e-06, |
|
"loss": 0.1584, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.058949944079607e-06, |
|
"loss": 0.1623, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.027341082763575e-06, |
|
"loss": 0.1646, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.995798088021454e-06, |
|
"loss": 0.1456, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.964321377663718e-06, |
|
"loss": 0.1794, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.93291136862287e-06, |
|
"loss": 0.1703, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.901568476947876e-06, |
|
"loss": 0.1436, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.8702931177987115e-06, |
|
"loss": 0.1436, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.839085705440815e-06, |
|
"loss": 0.1491, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.807946653239621e-06, |
|
"loss": 0.1541, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7768763736550975e-06, |
|
"loss": 0.1484, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7458752782362486e-06, |
|
"loss": 0.1279, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.714943777615693e-06, |
|
"loss": 0.1637, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.684082281504214e-06, |
|
"loss": 0.1632, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.653291198685331e-06, |
|
"loss": 0.1277, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.622570937009879e-06, |
|
"loss": 0.1514, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.5919219033906384e-06, |
|
"loss": 0.1399, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.561344503796887e-06, |
|
"loss": 0.1768, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.530839143249089e-06, |
|
"loss": 0.1449, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.500406225813476e-06, |
|
"loss": 0.1682, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.470046154596725e-06, |
|
"loss": 0.1542, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.439759331740606e-06, |
|
"loss": 0.1674, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.409546158416674e-06, |
|
"loss": 0.1653, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.379407034820915e-06, |
|
"loss": 0.1238, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.349342360168498e-06, |
|
"loss": 0.1399, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.319352532688444e-06, |
|
"loss": 0.1683, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2894379496183725e-06, |
|
"loss": 0.124, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.259599007199233e-06, |
|
"loss": 0.1455, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.229836100670058e-06, |
|
"loss": 0.148, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.200149624262736e-06, |
|
"loss": 0.1838, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.170539971196771e-06, |
|
"loss": 0.1487, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.141007533674087e-06, |
|
"loss": 0.1275, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.11155270287383e-06, |
|
"loss": 0.1682, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.0821758689472e-06, |
|
"loss": 0.1648, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.0528774210122455e-06, |
|
"loss": 0.1476, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.023657747148757e-06, |
|
"loss": 0.1567, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.994517234393093e-06, |
|
"loss": 0.1684, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.965456268733065e-06, |
|
"loss": 0.1408, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.936475235102826e-06, |
|
"loss": 0.1617, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.907574517377766e-06, |
|
"loss": 0.129, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.8787544983694325e-06, |
|
"loss": 0.1448, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.850015559820465e-06, |
|
"loss": 0.1507, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.821358082399522e-06, |
|
"loss": 0.1421, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7927824456962557e-06, |
|
"loss": 0.173, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7642890282162713e-06, |
|
"loss": 0.1682, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7358782073761202e-06, |
|
"loss": 0.1639, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7075503594983064e-06, |
|
"loss": 0.1337, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6793058598062892e-06, |
|
"loss": 0.1781, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6511450824195184e-06, |
|
"loss": 0.1655, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6230684003484785e-06, |
|
"loss": 0.125, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.595076185489761e-06, |
|
"loss": 0.1598, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.567168808621104e-06, |
|
"loss": 0.1549, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.539346639396529e-06, |
|
"loss": 0.1426, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5116100463413926e-06, |
|
"loss": 0.1416, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.483959396847554e-06, |
|
"loss": 0.1745, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.4563950571684725e-06, |
|
"loss": 0.1491, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.428917392414374e-06, |
|
"loss": 0.1853, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.401526766547405e-06, |
|
"loss": 0.167, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.37422354237683e-06, |
|
"loss": 0.1328, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.3470080815542004e-06, |
|
"loss": 0.1339, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.319880744568581e-06, |
|
"loss": 0.1585, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.2928418907417702e-06, |
|
"loss": 0.1458, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.2658918782235383e-06, |
|
"loss": 0.1546, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.2390310639868992e-06, |
|
"loss": 0.1349, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.2122598038233466e-06, |
|
"loss": 0.1536, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.185578452338185e-06, |
|
"loss": 0.1519, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1589873629458002e-06, |
|
"loss": 0.1364, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.132486887864992e-06, |
|
"loss": 0.1608, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1060773781143004e-06, |
|
"loss": 0.1512, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.0797591835073804e-06, |
|
"loss": 0.148, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.053532652648323e-06, |
|
"loss": 0.1666, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.0273981329270865e-06, |
|
"loss": 0.1424, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.001355970514863e-06, |
|
"loss": 0.1398, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.9754065103595054e-06, |
|
"loss": 0.1274, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.949550096180954e-06, |
|
"loss": 0.1511, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.923787070466687e-06, |
|
"loss": 0.1946, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.8981177744671875e-06, |
|
"loss": 0.1533, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.8725425481914127e-06, |
|
"loss": 0.1568, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.8470617304022976e-06, |
|
"loss": 0.1292, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.821675658612263e-06, |
|
"loss": 0.1597, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.7963846690787633e-06, |
|
"loss": 0.1396, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7711890967997923e-06, |
|
"loss": 0.1405, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.746089275509496e-06, |
|
"loss": 0.1495, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7210855376737123e-06, |
|
"loss": 0.151, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.6961782144855876e-06, |
|
"loss": 0.1472, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.6713676358611775e-06, |
|
"loss": 0.1271, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.646654130435101e-06, |
|
"loss": 0.1465, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.622038025556145e-06, |
|
"loss": 0.1483, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.597519647282981e-06, |
|
"loss": 0.1416, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.5730993203797906e-06, |
|
"loss": 0.1524, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.5487773683120166e-06, |
|
"loss": 0.1361, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.5245541132420403e-06, |
|
"loss": 0.1649, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.5004298760249267e-06, |
|
"loss": 0.1477, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4764049762041874e-06, |
|
"loss": 0.1514, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4524797320075233e-06, |
|
"loss": 0.1705, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.42865446034263e-06, |
|
"loss": 0.1412, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4049294767929844e-06, |
|
"loss": 0.1283, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3813050956136876e-06, |
|
"loss": 0.1559, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.357781629727265e-06, |
|
"loss": 0.1437, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.3343593907195692e-06, |
|
"loss": 0.1588, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.311038688835604e-06, |
|
"loss": 0.1702, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2901371267146e-06, |
|
"loss": 0.1615, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.267010195270566e-06, |
|
"loss": 0.1396, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.243985693040561e-06, |
|
"loss": 0.1324, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.221063925001278e-06, |
|
"loss": 0.1731, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.200522422465723e-06, |
|
"loss": 0.1473, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.177796684722696e-06, |
|
"loss": 0.1402, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.155174557893146e-06, |
|
"loss": 0.1556, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1326563416239997e-06, |
|
"loss": 0.1882, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1102423341858235e-06, |
|
"loss": 0.1476, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.0879328324688497e-06, |
|
"loss": 0.1739, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.065728131979058e-06, |
|
"loss": 0.1355, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.0436285268342548e-06, |
|
"loss": 0.162, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.021634309760191e-06, |
|
"loss": 0.1477, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.9997457720866554e-06, |
|
"loss": 0.1639, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.9779632037436513e-06, |
|
"loss": 0.1418, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.9562868932575328e-06, |
|
"loss": 0.1293, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.9347171277471875e-06, |
|
"loss": 0.1529, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.9132541929202384e-06, |
|
"loss": 0.18, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8918983730692563e-06, |
|
"loss": 0.1607, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8706499510679888e-06, |
|
"loss": 0.1394, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8495092083676324e-06, |
|
"loss": 0.1645, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.828476424993071e-06, |
|
"loss": 0.1441, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8075518795392077e-06, |
|
"loss": 0.1432, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7867358491672394e-06, |
|
"loss": 0.1247, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7660286096010027e-06, |
|
"loss": 0.1646, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7454304351233253e-06, |
|
"loss": 0.1419, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7249415985723795e-06, |
|
"loss": 0.144, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7045623713380777e-06, |
|
"loss": 0.1487, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.684293023358472e-06, |
|
"loss": 0.1653, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.664133823116193e-06, |
|
"loss": 0.1439, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6440850376348627e-06, |
|
"loss": 0.1314, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.624146932475601e-06, |
|
"loss": 0.138, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6043197717334614e-06, |
|
"loss": 0.1577, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.584603818033975e-06, |
|
"loss": 0.1528, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5649993325296408e-06, |
|
"loss": 0.1479, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5455065748964825e-06, |
|
"loss": 0.1504, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5261258033306027e-06, |
|
"loss": 0.1494, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.506857274544774e-06, |
|
"loss": 0.1493, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.487701243765013e-06, |
|
"loss": 0.1327, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4686579647272337e-06, |
|
"loss": 0.1488, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4497276896738588e-06, |
|
"loss": 0.139, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4309106693504914e-06, |
|
"loss": 0.1554, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4122071530025915e-06, |
|
"loss": 0.1569, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3936173883721726e-06, |
|
"loss": 0.1249, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.375141621694529e-06, |
|
"loss": 0.1652, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3567800976949585e-06, |
|
"loss": 0.1458, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.338533059585534e-06, |
|
"loss": 0.152, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3204007490618742e-06, |
|
"loss": 0.1296, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.302383406299952e-06, |
|
"loss": 0.147, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.2844812699528963e-06, |
|
"loss": 0.1411, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.266694577147851e-06, |
|
"loss": 0.1521, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2490235634828196e-06, |
|
"loss": 0.1333, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2314684630235507e-06, |
|
"loss": 0.1552, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2140295083004306e-06, |
|
"loss": 0.1626, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1967069303054213e-06, |
|
"loss": 0.1583, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1795009584889716e-06, |
|
"loss": 0.1456, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.16241182075701e-06, |
|
"loss": 0.1408, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"loss": 0.189, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.12858495142946e-06, |
|
"loss": 0.1043, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.111847667895971e-06, |
|
"loss": 0.1107, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0952281145652266e-06, |
|
"loss": 0.1579, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.078726511575603e-06, |
|
"loss": 0.1257, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0623430775031306e-06, |
|
"loss": 0.1452, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0460780293586059e-06, |
|
"loss": 0.1534, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0299315825847122e-06, |
|
"loss": 0.1428, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.01390395105318e-06, |
|
"loss": 0.1354, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.979953470619263e-07, |
|
"loss": 0.1499, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.822059813322771e-07, |
|
"loss": 0.1305, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.665360630061438e-07, |
|
"loss": 0.1615, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.509857996432792e-07, |
|
"loss": 0.1585, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.355553972185116e-07, |
|
"loss": 0.1498, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.202450601190227e-07, |
|
"loss": 0.1773, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.050549911416373e-07, |
|
"loss": 0.1499, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.899853914901446e-07, |
|
"loss": 0.1825, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.750364607726247e-07, |
|
"loss": 0.1626, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.602083969988051e-07, |
|
"loss": 0.1558, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.455013965774462e-07, |
|
"loss": 0.1426, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.309156543137265e-07, |
|
"loss": 0.1408, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.164513634066784e-07, |
|
"loss": 0.1302, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.021087154466156e-07, |
|
"loss": 0.1927, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.878879004126005e-07, |
|
"loss": 0.1481, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.737891066699288e-07, |
|
"loss": 0.1517, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.598125209676321e-07, |
|
"loss": 0.1508, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.459583284360039e-07, |
|
"loss": 0.1587, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.322267125841575e-07, |
|
"loss": 0.1511, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.18617855297572e-07, |
|
"loss": 0.1519, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.051319368357124e-07, |
|
"loss": 0.177, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.917691358296185e-07, |
|
"loss": 0.1553, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.785296292795496e-07, |
|
"loss": 0.1329, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.654135925526373e-07, |
|
"loss": 0.1557, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.524211993805684e-07, |
|
"loss": 0.1511, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.395526218572723e-07, |
|
"loss": 0.1566, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.268080304366509e-07, |
|
"loss": 0.1557, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.141875939303176e-07, |
|
"loss": 0.1458, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.016914795053586e-07, |
|
"loss": 0.1598, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.893198526821287e-07, |
|
"loss": 0.1393, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.770728773320411e-07, |
|
"loss": 0.1595, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.649507156754174e-07, |
|
"loss": 0.1369, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.52953528279323e-07, |
|
"loss": 0.1522, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.410814740554471e-07, |
|
"loss": 0.1533, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.293347102579959e-07, |
|
"loss": 0.1322, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.177133924816169e-07, |
|
"loss": 0.1389, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.062176746593195e-07, |
|
"loss": 0.1331, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.94847709060462e-07, |
|
"loss": 0.1579, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.836036462887061e-07, |
|
"loss": 0.1329, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.724856352800511e-07, |
|
"loss": 0.1511, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.614938233008359e-07, |
|
"loss": 0.1725, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.506283559458047e-07, |
|
"loss": 0.1443, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.398893771361723e-07, |
|
"loss": 0.1602, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.292770291177173e-07, |
|
"loss": 0.143, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.187914524588998e-07, |
|
"loss": 0.168, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.0843278604899673e-07, |
|
"loss": 0.1684, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.982011670962682e-07, |
|
"loss": 0.1777, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.880967311261319e-07, |
|
"loss": 0.1695, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.79111590857375e-07, |
|
"loss": 0.1533, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.692491698917511e-07, |
|
"loss": 0.1563, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.595143153995062e-07, |
|
"loss": 0.1674, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4990715632604145e-07, |
|
"loss": 0.1505, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.404278199253397e-07, |
|
"loss": 0.1714, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.3107643175827707e-07, |
|
"loss": 0.1547, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.218531156909621e-07, |
|
"loss": 0.1546, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.127579938930891e-07, |
|
"loss": 0.1729, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.0379118683632635e-07, |
|
"loss": 0.1476, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.949528132927171e-07, |
|
"loss": 0.1444, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.8624299033310767e-07, |
|
"loss": 0.1583, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7766183332559316e-07, |
|
"loss": 0.1489, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.692094559339975e-07, |
|
"loss": 0.1568, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.6088597011635575e-07, |
|
"loss": 0.159, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.526914861234464e-07, |
|
"loss": 0.1495, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.446261124973137e-07, |
|
"loss": 0.1471, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.3668995606984547e-07, |
|
"loss": 0.1536, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.2888312196134855e-07, |
|
"loss": 0.1583, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.2120571357915898e-07, |
|
"loss": 0.1844, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.1365783261627525e-07, |
|
"loss": 0.1399, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0623957905000603e-07, |
|
"loss": 0.1508, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9967406351210305e-07, |
|
"loss": 0.1458, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9250237128636385e-07, |
|
"loss": 0.1851, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8546058667709088e-07, |
|
"loss": 0.1561, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7854880295797406e-07, |
|
"loss": 0.17, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7176711168073845e-07, |
|
"loss": 0.1376, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6511560267394088e-07, |
|
"loss": 0.1466, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5859436404177532e-07, |
|
"loss": 0.1402, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5220348216290924e-07, |
|
"loss": 0.1232, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4594304168933703e-07, |
|
"loss": 0.1492, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3981312554525728e-07, |
|
"loss": 0.1578, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3381381492598155e-07, |
|
"loss": 0.151, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.279451892968475e-07, |
|
"loss": 0.1267, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2220732639217858e-07, |
|
"loss": 0.162, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1660030221424479e-07, |
|
"loss": 0.1519, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1112419103226136e-07, |
|
"loss": 0.1519, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.057790653814017e-07, |
|
"loss": 0.1342, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0056499606183933e-07, |
|
"loss": 0.1371, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.548205213780859e-08, |
|
"loss": 0.1744, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.053030093669313e-08, |
|
"loss": 0.1813, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.570980804812556e-08, |
|
"loss": 0.1452, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.102063732312925e-08, |
|
"loss": 0.1587, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.646285087326344e-08, |
|
"loss": 0.1961, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.203650906980942e-08, |
|
"loss": 0.1519, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.774167054296233e-08, |
|
"loss": 0.1558, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.357839218106066e-08, |
|
"loss": 0.1698, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.954672912982906e-08, |
|
"loss": 0.1756, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.564673479164895e-08, |
|
"loss": 0.1528, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.187846082485348e-08, |
|
"loss": 0.1431, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.8241957143040365e-08, |
|
"loss": 0.112, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.473727191441124e-08, |
|
"loss": 0.1766, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.136445156113222e-08, |
|
"loss": 0.1372, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.8123540758726596e-08, |
|
"loss": 0.125, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.501458243547085e-08, |
|
"loss": 0.1373, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.203761777183734e-08, |
|
"loss": 0.1584, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.9192686199939204e-08, |
|
"loss": 0.1654, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.6479825403019633e-08, |
|
"loss": 0.1473, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.389907131493785e-08, |
|
"loss": 0.1228, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.145045811970836e-08, |
|
"loss": 0.1324, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9134018251038e-08, |
|
"loss": 0.1737, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.6949782391897375e-08, |
|
"loss": 0.1492, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.4897779474120078e-08, |
|
"loss": 0.1615, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2978036678014117e-08, |
|
"loss": 0.1339, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1190579432003301e-08, |
|
"loss": 0.1475, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.535431412293073e-09, |
|
"loss": 0.1477, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.012614542549646e-09, |
|
"loss": 0.1467, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.622148993619126e-09, |
|
"loss": 0.1563, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.3640531832543916e-09, |
|
"loss": 0.169, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.2383437758719555e-09, |
|
"loss": 0.1605, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.2450356823321427e-09, |
|
"loss": 0.1632, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.3841420597414677e-09, |
|
"loss": 0.1469, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.655674311276112e-09, |
|
"loss": 0.1529, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0596420860353728e-09, |
|
"loss": 0.1308, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.960532789106577e-10, |
|
"loss": 0.14, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.6491403048112266e-10, |
|
"loss": 0.1611, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 8899, |
|
"total_flos": 1198267632648192.0, |
|
"train_loss": 0.1920002836473257, |
|
"train_runtime": 285448.837, |
|
"train_samples_per_second": 0.125, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 8899, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50000, |
|
"total_flos": 1198267632648192.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|