|
{ |
|
"best_metric": 0.5851995594482614, |
|
"best_model_checkpoint": "vit-base-patch16-224-vit-base-patch16\\checkpoint-16086", |
|
"epoch": 2.9995804391403667, |
|
"eval_steps": 500, |
|
"global_step": 16086, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.107520198881293e-07, |
|
"loss": 7.9302, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.215040397762586e-07, |
|
"loss": 7.9301, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.32256059664388e-07, |
|
"loss": 7.93, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2430080795525172e-06, |
|
"loss": 7.9296, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.5537600994406465e-06, |
|
"loss": 7.9293, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.864512119328776e-06, |
|
"loss": 7.9286, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.175264139216905e-06, |
|
"loss": 7.9281, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4860161591050345e-06, |
|
"loss": 7.9278, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.7967681789931635e-06, |
|
"loss": 7.926, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.107520198881293e-06, |
|
"loss": 7.9253, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.418272218769422e-06, |
|
"loss": 7.9242, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.729024238657552e-06, |
|
"loss": 7.9225, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0397762585456806e-06, |
|
"loss": 7.9207, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.35052827843381e-06, |
|
"loss": 7.9191, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.6612802983219395e-06, |
|
"loss": 7.917, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.972032318210069e-06, |
|
"loss": 7.9149, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.282784338098198e-06, |
|
"loss": 7.9116, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.593536357986327e-06, |
|
"loss": 7.9083, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.9042883778744565e-06, |
|
"loss": 7.9065, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.215040397762586e-06, |
|
"loss": 7.9019, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.5257924176507155e-06, |
|
"loss": 7.8957, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.836544437538844e-06, |
|
"loss": 7.8927, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.1472964574269735e-06, |
|
"loss": 7.8868, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.458048477315104e-06, |
|
"loss": 7.8831, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.768800497203232e-06, |
|
"loss": 7.8775, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.079552517091361e-06, |
|
"loss": 7.8719, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.39030453697949e-06, |
|
"loss": 7.8652, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.70105655686762e-06, |
|
"loss": 7.8573, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.01180857675575e-06, |
|
"loss": 7.8496, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.322560596643879e-06, |
|
"loss": 7.8454, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.633312616532007e-06, |
|
"loss": 7.8381, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.944064636420138e-06, |
|
"loss": 7.8295, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0254816656308266e-05, |
|
"loss": 7.8215, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0565568676196395e-05, |
|
"loss": 7.811, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.0876320696084526e-05, |
|
"loss": 7.8033, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1187072715972654e-05, |
|
"loss": 7.7903, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1497824735860784e-05, |
|
"loss": 7.7835, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1808576755748913e-05, |
|
"loss": 7.7719, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2119328775637043e-05, |
|
"loss": 7.7589, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2430080795525172e-05, |
|
"loss": 7.7576, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.27408328154133e-05, |
|
"loss": 7.7456, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3051584835301431e-05, |
|
"loss": 7.7309, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3362336855189559e-05, |
|
"loss": 7.724, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3673088875077688e-05, |
|
"loss": 7.7139, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.398384089496582e-05, |
|
"loss": 7.7025, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4294592914853947e-05, |
|
"loss": 7.6857, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4605344934742077e-05, |
|
"loss": 7.6762, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4916096954630208e-05, |
|
"loss": 7.6618, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5226848974518334e-05, |
|
"loss": 7.6498, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5537600994406463e-05, |
|
"loss": 7.6415, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.5848353014294593e-05, |
|
"loss": 7.6319, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6159105034182722e-05, |
|
"loss": 7.6112, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.646985705407085e-05, |
|
"loss": 7.6032, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.678060907395898e-05, |
|
"loss": 7.5935, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.709136109384711e-05, |
|
"loss": 7.5846, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.740211311373524e-05, |
|
"loss": 7.5674, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.771286513362337e-05, |
|
"loss": 7.5512, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.80236171535115e-05, |
|
"loss": 7.541, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.833436917339963e-05, |
|
"loss": 7.5262, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8645121193287758e-05, |
|
"loss": 7.5138, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8955873213175887e-05, |
|
"loss": 7.4991, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9266625233064014e-05, |
|
"loss": 7.4734, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9577377252952146e-05, |
|
"loss": 7.4607, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9888129272840276e-05, |
|
"loss": 7.4621, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0198881292728402e-05, |
|
"loss": 7.4499, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.050963331261653e-05, |
|
"loss": 7.4228, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0820385332504664e-05, |
|
"loss": 7.4174, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.113113735239279e-05, |
|
"loss": 7.3955, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.144188937228092e-05, |
|
"loss": 7.387, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1752641392169053e-05, |
|
"loss": 7.3643, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.206339341205718e-05, |
|
"loss": 7.3718, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2374145431945308e-05, |
|
"loss": 7.3485, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.2684897451833438e-05, |
|
"loss": 7.3414, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.2995649471721567e-05, |
|
"loss": 7.3117, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3306401491609697e-05, |
|
"loss": 7.2965, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3617153511497826e-05, |
|
"loss": 7.3053, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3927905531385956e-05, |
|
"loss": 7.2712, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.4238657551274085e-05, |
|
"loss": 7.2515, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.4549409571162214e-05, |
|
"loss": 7.2411, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.4860161591050344e-05, |
|
"loss": 7.2357, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5170913610938473e-05, |
|
"loss": 7.2151, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.54816656308266e-05, |
|
"loss": 7.1943, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.579241765071473e-05, |
|
"loss": 7.1974, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.6103169670602862e-05, |
|
"loss": 7.2105, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.6413921690490988e-05, |
|
"loss": 7.156, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.6724673710379117e-05, |
|
"loss": 7.1484, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.703542573026725e-05, |
|
"loss": 7.1293, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7346177750155376e-05, |
|
"loss": 7.1283, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7656929770043506e-05, |
|
"loss": 7.1159, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.796768178993164e-05, |
|
"loss": 7.0768, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8278433809819765e-05, |
|
"loss": 7.0734, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8589185829707894e-05, |
|
"loss": 7.068, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8899937849596027e-05, |
|
"loss": 7.0497, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9210689869484153e-05, |
|
"loss": 7.0373, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9521441889372283e-05, |
|
"loss": 7.0332, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9832193909260415e-05, |
|
"loss": 7.0304, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.014294592914854e-05, |
|
"loss": 7.0186, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0453697949036668e-05, |
|
"loss": 6.9994, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0764449968924804e-05, |
|
"loss": 6.988, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1075201988812927e-05, |
|
"loss": 6.9575, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1385954008701056e-05, |
|
"loss": 6.9407, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1696706028589185e-05, |
|
"loss": 6.9424, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.2007458048477315e-05, |
|
"loss": 6.9267, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.2318210068365444e-05, |
|
"loss": 6.9348, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2628962088253574e-05, |
|
"loss": 6.9101, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.29397141081417e-05, |
|
"loss": 6.8636, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.325046612802983e-05, |
|
"loss": 6.8813, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.356121814791796e-05, |
|
"loss": 6.8706, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.387197016780609e-05, |
|
"loss": 6.8722, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.418272218769422e-05, |
|
"loss": 6.8492, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.449347420758235e-05, |
|
"loss": 6.8014, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.480422622747048e-05, |
|
"loss": 6.8025, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.511497824735861e-05, |
|
"loss": 6.8108, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.542573026724674e-05, |
|
"loss": 6.8155, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.573648228713487e-05, |
|
"loss": 6.7615, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6047234307023e-05, |
|
"loss": 6.7385, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.635798632691113e-05, |
|
"loss": 6.7358, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.666873834679926e-05, |
|
"loss": 6.7388, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6979490366687386e-05, |
|
"loss": 6.7352, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.7290242386575516e-05, |
|
"loss": 6.7093, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7600994406463645e-05, |
|
"loss": 6.7211, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7911746426351775e-05, |
|
"loss": 6.6963, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8222498446239904e-05, |
|
"loss": 6.6921, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.853325046612803e-05, |
|
"loss": 6.6363, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.884400248601616e-05, |
|
"loss": 6.6501, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.915475450590429e-05, |
|
"loss": 6.6199, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9465506525792415e-05, |
|
"loss": 6.5996, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.977625854568055e-05, |
|
"loss": 6.6274, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.008701056556868e-05, |
|
"loss": 6.5743, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.0397762585456804e-05, |
|
"loss": 6.5633, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.070851460534494e-05, |
|
"loss": 6.5602, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.101926662523306e-05, |
|
"loss": 6.5558, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.133001864512119e-05, |
|
"loss": 6.5785, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.164077066500933e-05, |
|
"loss": 6.5247, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.195152268489745e-05, |
|
"loss": 6.5409, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.226227470478558e-05, |
|
"loss": 6.4857, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.257302672467372e-05, |
|
"loss": 6.5344, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.288377874456184e-05, |
|
"loss": 6.5113, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.319453076444997e-05, |
|
"loss": 6.4928, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.3505282784338105e-05, |
|
"loss": 6.4826, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.381603480422623e-05, |
|
"loss": 6.4621, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.412678682411436e-05, |
|
"loss": 6.4211, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4437538844002494e-05, |
|
"loss": 6.4753, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4748290863890616e-05, |
|
"loss": 6.4343, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5059042883778746e-05, |
|
"loss": 6.4252, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5369794903666875e-05, |
|
"loss": 6.3907, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5680546923555005e-05, |
|
"loss": 6.383, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5991298943443134e-05, |
|
"loss": 6.378, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6302050963331264e-05, |
|
"loss": 6.3726, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.661280298321939e-05, |
|
"loss": 6.3738, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.692355500310752e-05, |
|
"loss": 6.3397, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.723430702299565e-05, |
|
"loss": 6.351, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.754505904288378e-05, |
|
"loss": 6.3207, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.785581106277191e-05, |
|
"loss": 6.3161, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.816656308266004e-05, |
|
"loss": 6.3103, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.847731510254817e-05, |
|
"loss": 6.291, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.87880671224363e-05, |
|
"loss": 6.2851, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.909881914232443e-05, |
|
"loss": 6.2605, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.940957116221256e-05, |
|
"loss": 6.2434, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.972032318210069e-05, |
|
"loss": 6.2739, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.999654624576915e-05, |
|
"loss": 6.2309, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.996200870346067e-05, |
|
"loss": 6.2764, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.992747116115217e-05, |
|
"loss": 6.1911, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.989293361884368e-05, |
|
"loss": 6.193, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.98583960765352e-05, |
|
"loss": 6.1912, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982385853422671e-05, |
|
"loss": 6.1854, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.978932099191822e-05, |
|
"loss": 6.1694, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9754783449609724e-05, |
|
"loss": 6.1337, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.972024590730124e-05, |
|
"loss": 6.1062, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.968570836499275e-05, |
|
"loss": 6.132, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.965117082268426e-05, |
|
"loss": 6.178, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.961663328037577e-05, |
|
"loss": 6.1392, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.958209573806728e-05, |
|
"loss": 6.1331, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.954755819575879e-05, |
|
"loss": 6.0742, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.95130206534503e-05, |
|
"loss": 6.0837, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.947848311114181e-05, |
|
"loss": 6.0774, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944394556883332e-05, |
|
"loss": 6.0711, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9409408026524834e-05, |
|
"loss": 6.0922, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9374870484216344e-05, |
|
"loss": 6.041, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9340332941907854e-05, |
|
"loss": 6.0081, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.930579539959937e-05, |
|
"loss": 5.9962, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9271257857290875e-05, |
|
"loss": 6.0346, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9236720314982385e-05, |
|
"loss": 6.0221, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92021827726739e-05, |
|
"loss": 6.0185, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.916764523036541e-05, |
|
"loss": 5.9659, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.913310768805692e-05, |
|
"loss": 5.9443, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9098570145748426e-05, |
|
"loss": 5.9759, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9064032603439943e-05, |
|
"loss": 5.973, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9029495061131454e-05, |
|
"loss": 5.9397, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8994957518822964e-05, |
|
"loss": 5.9718, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8960419976514474e-05, |
|
"loss": 5.8958, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8925882434205985e-05, |
|
"loss": 5.9235, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8891344891897495e-05, |
|
"loss": 5.8631, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8856807349589005e-05, |
|
"loss": 5.8879, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8822269807280516e-05, |
|
"loss": 5.9051, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8787732264972026e-05, |
|
"loss": 5.8506, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8753194722663536e-05, |
|
"loss": 5.8902, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8718657180355047e-05, |
|
"loss": 5.8543, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868411963804656e-05, |
|
"loss": 5.8779, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8649582095738074e-05, |
|
"loss": 5.859, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.861504455342958e-05, |
|
"loss": 5.8649, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.858050701112109e-05, |
|
"loss": 5.8307, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8545969468812605e-05, |
|
"loss": 5.7816, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8511431926504115e-05, |
|
"loss": 5.7834, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.847689438419562e-05, |
|
"loss": 5.8585, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.844235684188713e-05, |
|
"loss": 5.758, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8407819299578646e-05, |
|
"loss": 5.7945, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8373281757270157e-05, |
|
"loss": 5.7647, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.833874421496167e-05, |
|
"loss": 5.7845, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.830420667265318e-05, |
|
"loss": 5.7382, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.826966913034469e-05, |
|
"loss": 5.7743, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.82351315880362e-05, |
|
"loss": 5.7199, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.820059404572771e-05, |
|
"loss": 5.7205, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.816605650341922e-05, |
|
"loss": 5.7001, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.813151896111073e-05, |
|
"loss": 5.7475, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.809698141880224e-05, |
|
"loss": 5.735, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.806244387649375e-05, |
|
"loss": 5.7608, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.8027906334185266e-05, |
|
"loss": 5.6663, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.799336879187678e-05, |
|
"loss": 5.6878, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.795883124956828e-05, |
|
"loss": 5.6914, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.792429370725979e-05, |
|
"loss": 5.6306, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.788975616495131e-05, |
|
"loss": 5.6178, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.785521862264282e-05, |
|
"loss": 5.679, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.782068108033432e-05, |
|
"loss": 5.6543, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.778614353802583e-05, |
|
"loss": 5.6911, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.775160599571735e-05, |
|
"loss": 5.6754, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.771706845340886e-05, |
|
"loss": 5.6252, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.768253091110037e-05, |
|
"loss": 5.6094, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.764799336879188e-05, |
|
"loss": 5.599, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.761345582648339e-05, |
|
"loss": 5.6413, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.75789182841749e-05, |
|
"loss": 5.6193, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.754438074186641e-05, |
|
"loss": 5.5898, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.750984319955792e-05, |
|
"loss": 5.572, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.747530565724943e-05, |
|
"loss": 5.6013, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.744076811494094e-05, |
|
"loss": 5.5543, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.740623057263245e-05, |
|
"loss": 5.5415, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.737169303032397e-05, |
|
"loss": 5.5246, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.733715548801548e-05, |
|
"loss": 5.5657, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.730261794570698e-05, |
|
"loss": 5.5453, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7268080403398493e-05, |
|
"loss": 5.5467, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.723354286109001e-05, |
|
"loss": 5.5455, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.719900531878152e-05, |
|
"loss": 5.5253, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7164467776473024e-05, |
|
"loss": 5.4831, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7129930234164535e-05, |
|
"loss": 5.4704, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.709539269185605e-05, |
|
"loss": 5.4801, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.706085514954756e-05, |
|
"loss": 5.48, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.702631760723907e-05, |
|
"loss": 5.5388, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.699178006493058e-05, |
|
"loss": 5.4883, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.695724252262209e-05, |
|
"loss": 5.4321, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.69227049803136e-05, |
|
"loss": 5.4297, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6888167438005114e-05, |
|
"loss": 5.4174, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6853629895696624e-05, |
|
"loss": 5.5185, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6819092353388134e-05, |
|
"loss": 5.4269, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6784554811079645e-05, |
|
"loss": 5.4688, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6750017268771155e-05, |
|
"loss": 5.384, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.671547972646267e-05, |
|
"loss": 5.4662, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.668094218415418e-05, |
|
"loss": 5.4473, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6646404641845686e-05, |
|
"loss": 5.4024, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6611867099537196e-05, |
|
"loss": 5.3608, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.657732955722871e-05, |
|
"loss": 5.4895, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6542792014920224e-05, |
|
"loss": 5.3868, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.650825447261173e-05, |
|
"loss": 5.3681, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.647371693030324e-05, |
|
"loss": 5.4223, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6439179387994755e-05, |
|
"loss": 5.412, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6404641845686265e-05, |
|
"loss": 5.3381, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6370104303377775e-05, |
|
"loss": 5.3195, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6335566761069285e-05, |
|
"loss": 5.3945, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6301029218760796e-05, |
|
"loss": 5.3316, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6266491676452306e-05, |
|
"loss": 5.3232, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6231954134143816e-05, |
|
"loss": 5.3246, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.619741659183533e-05, |
|
"loss": 5.3445, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.616287904952684e-05, |
|
"loss": 5.2847, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.612834150721835e-05, |
|
"loss": 5.2795, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.609380396490986e-05, |
|
"loss": 5.2559, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6059266422601375e-05, |
|
"loss": 5.3091, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6024728880292885e-05, |
|
"loss": 5.2441, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.599019133798439e-05, |
|
"loss": 5.2534, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.59556537956759e-05, |
|
"loss": 5.2869, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5921116253367416e-05, |
|
"loss": 5.2629, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5886578711058926e-05, |
|
"loss": 5.2835, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.585204116875043e-05, |
|
"loss": 5.2437, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.581750362644194e-05, |
|
"loss": 5.2736, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.578296608413346e-05, |
|
"loss": 5.2331, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.574842854182497e-05, |
|
"loss": 5.2059, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.571389099951648e-05, |
|
"loss": 5.2348, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.567935345720799e-05, |
|
"loss": 5.2183, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.56448159148995e-05, |
|
"loss": 5.1723, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.561027837259101e-05, |
|
"loss": 5.206, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.557574083028252e-05, |
|
"loss": 5.276, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.554120328797403e-05, |
|
"loss": 5.1271, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.550666574566554e-05, |
|
"loss": 5.1887, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.547212820335705e-05, |
|
"loss": 5.2678, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543759066104856e-05, |
|
"loss": 5.2341, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.540305311874008e-05, |
|
"loss": 5.2136, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.536851557643158e-05, |
|
"loss": 5.1565, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.533397803412309e-05, |
|
"loss": 5.1883, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.52994404918146e-05, |
|
"loss": 5.187, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.526490294950612e-05, |
|
"loss": 5.145, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.523036540719763e-05, |
|
"loss": 5.1121, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.519582786488913e-05, |
|
"loss": 5.063, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.516129032258064e-05, |
|
"loss": 5.157, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.512675278027216e-05, |
|
"loss": 5.1123, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.509221523796367e-05, |
|
"loss": 5.115, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.505767769565518e-05, |
|
"loss": 5.1296, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.502314015334669e-05, |
|
"loss": 5.1624, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.49886026110382e-05, |
|
"loss": 5.0889, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.495406506872971e-05, |
|
"loss": 5.0914, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.491952752642122e-05, |
|
"loss": 5.1042, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.488498998411273e-05, |
|
"loss": 5.0769, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485045244180424e-05, |
|
"loss": 5.0678, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.481591489949575e-05, |
|
"loss": 5.0928, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.478137735718726e-05, |
|
"loss": 5.0958, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.474683981487878e-05, |
|
"loss": 5.0713, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4712302272570284e-05, |
|
"loss": 5.0995, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4677764730261794e-05, |
|
"loss": 5.0845, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4643227187953304e-05, |
|
"loss": 5.0856, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.460868964564482e-05, |
|
"loss": 5.0638, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.457415210333633e-05, |
|
"loss": 5.0594, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4539614561027835e-05, |
|
"loss": 5.0941, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4505077018719346e-05, |
|
"loss": 5.0457, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.447053947641086e-05, |
|
"loss": 4.9635, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.443600193410237e-05, |
|
"loss": 4.9817, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4401464391793883e-05, |
|
"loss": 4.989, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4366926849485394e-05, |
|
"loss": 4.9672, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4332389307176904e-05, |
|
"loss": 5.008, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4297851764868414e-05, |
|
"loss": 4.9655, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4263314222559925e-05, |
|
"loss": 4.9778, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4228776680251435e-05, |
|
"loss": 4.9593, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4194239137942945e-05, |
|
"loss": 4.9961, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4159701595634456e-05, |
|
"loss": 4.9818, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4125164053325966e-05, |
|
"loss": 4.913, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.409062651101748e-05, |
|
"loss": 4.9754, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4056088968708987e-05, |
|
"loss": 4.9515, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.40215514264005e-05, |
|
"loss": 4.9495, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.398701388409201e-05, |
|
"loss": 4.9268, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3952476341783524e-05, |
|
"loss": 4.9985, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3917938799475035e-05, |
|
"loss": 4.9254, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.388340125716654e-05, |
|
"loss": 4.9554, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.384886371485805e-05, |
|
"loss": 4.9328, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3814326172549566e-05, |
|
"loss": 4.9389, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3779788630241076e-05, |
|
"loss": 4.9226, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3745251087932586e-05, |
|
"loss": 4.9134, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3710713545624097e-05, |
|
"loss": 4.9435, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.367617600331561e-05, |
|
"loss": 4.9202, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.364163846100712e-05, |
|
"loss": 4.9282, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.360710091869863e-05, |
|
"loss": 4.8598, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.357256337639014e-05, |
|
"loss": 4.9474, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.353802583408165e-05, |
|
"loss": 4.8958, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.350348829177316e-05, |
|
"loss": 4.8854, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.346895074946467e-05, |
|
"loss": 4.8184, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3434413207156186e-05, |
|
"loss": 4.9108, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.339987566484769e-05, |
|
"loss": 4.8647, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.33653381225392e-05, |
|
"loss": 4.8807, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.333080058023071e-05, |
|
"loss": 4.8766, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.329626303792223e-05, |
|
"loss": 4.8508, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.326172549561374e-05, |
|
"loss": 4.8532, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.322718795330524e-05, |
|
"loss": 4.8687, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.319265041099675e-05, |
|
"loss": 4.7944, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.315811286868827e-05, |
|
"loss": 4.7731, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.312357532637978e-05, |
|
"loss": 4.8183, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.308903778407129e-05, |
|
"loss": 4.7874, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.30545002417628e-05, |
|
"loss": 4.7327, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.301996269945431e-05, |
|
"loss": 4.8096, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.298542515714582e-05, |
|
"loss": 4.738, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.295088761483733e-05, |
|
"loss": 4.8368, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.291635007252884e-05, |
|
"loss": 4.757, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.288181253022035e-05, |
|
"loss": 4.789, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.284727498791186e-05, |
|
"loss": 4.7926, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.281273744560337e-05, |
|
"loss": 4.7196, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.277819990329489e-05, |
|
"loss": 4.7615, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.274366236098639e-05, |
|
"loss": 4.8161, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.27091248186779e-05, |
|
"loss": 4.7221, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.267458727636941e-05, |
|
"loss": 4.7104, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.264004973406093e-05, |
|
"loss": 4.6939, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.260551219175244e-05, |
|
"loss": 4.7584, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2570974649443944e-05, |
|
"loss": 4.7131, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2536437107135454e-05, |
|
"loss": 4.7359, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.250189956482697e-05, |
|
"loss": 4.7093, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.246736202251848e-05, |
|
"loss": 4.6965, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.243282448020999e-05, |
|
"loss": 4.7546, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.23982869379015e-05, |
|
"loss": 4.6808, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236374939559301e-05, |
|
"loss": 4.7021, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.232921185328452e-05, |
|
"loss": 4.6742, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.229467431097603e-05, |
|
"loss": 4.7074, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.226013676866754e-05, |
|
"loss": 4.719, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2225599226359054e-05, |
|
"loss": 4.6518, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2191061684050564e-05, |
|
"loss": 4.6734, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2156524141742074e-05, |
|
"loss": 4.686, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.212198659943359e-05, |
|
"loss": 4.6587, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2087449057125095e-05, |
|
"loss": 4.6642, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2052911514816605e-05, |
|
"loss": 4.6555, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2018373972508116e-05, |
|
"loss": 4.6315, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198383643019963e-05, |
|
"loss": 4.6435, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.194929888789114e-05, |
|
"loss": 4.6456, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1914761345582646e-05, |
|
"loss": 4.5384, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.188022380327416e-05, |
|
"loss": 4.6354, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1845686260965674e-05, |
|
"loss": 4.5797, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1811148718657184e-05, |
|
"loss": 4.6615, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1776611176348694e-05, |
|
"loss": 4.6493, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1742073634040205e-05, |
|
"loss": 4.5619, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1707536091731715e-05, |
|
"loss": 4.5834, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1672998549423225e-05, |
|
"loss": 4.6102, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1638461007114736e-05, |
|
"loss": 4.6063, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1603923464806246e-05, |
|
"loss": 4.5329, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.1569385922497756e-05, |
|
"loss": 4.6316, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.153484838018927e-05, |
|
"loss": 4.6018, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.150031083788078e-05, |
|
"loss": 4.5185, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.1465773295572294e-05, |
|
"loss": 4.572, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.14312357532638e-05, |
|
"loss": 4.5646, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.139669821095531e-05, |
|
"loss": 4.603, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.136216066864682e-05, |
|
"loss": 4.5372, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1327623126338335e-05, |
|
"loss": 4.5963, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1293085584029846e-05, |
|
"loss": 4.5808, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.125854804172135e-05, |
|
"loss": 4.497, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.122401049941286e-05, |
|
"loss": 4.5251, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.118947295710438e-05, |
|
"loss": 4.6056, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.115493541479589e-05, |
|
"loss": 4.5351, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.11203978724874e-05, |
|
"loss": 4.5328, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.108586033017891e-05, |
|
"loss": 4.5216, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.105132278787042e-05, |
|
"loss": 4.4807, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.101678524556193e-05, |
|
"loss": 4.4105, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.098224770325344e-05, |
|
"loss": 4.5167, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.094771016094495e-05, |
|
"loss": 4.5025, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.091317261863646e-05, |
|
"loss": 4.4726, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.087863507632797e-05, |
|
"loss": 4.5453, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.084409753401948e-05, |
|
"loss": 4.5499, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0809559991711e-05, |
|
"loss": 4.477, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.07750224494025e-05, |
|
"loss": 4.4173, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.074048490709401e-05, |
|
"loss": 4.4168, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.070594736478552e-05, |
|
"loss": 4.4963, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.067140982247704e-05, |
|
"loss": 4.4329, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.063687228016855e-05, |
|
"loss": 4.4016, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.060233473786005e-05, |
|
"loss": 4.4456, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.056779719555156e-05, |
|
"loss": 4.5099, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.053325965324308e-05, |
|
"loss": 4.5314, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.049872211093459e-05, |
|
"loss": 4.3918, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.04641845686261e-05, |
|
"loss": 4.3786, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.042964702631761e-05, |
|
"loss": 4.3641, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.039510948400912e-05, |
|
"loss": 4.3287, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.036057194170063e-05, |
|
"loss": 4.3898, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.032603439939214e-05, |
|
"loss": 4.4319, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.029149685708365e-05, |
|
"loss": 4.4166, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.025695931477516e-05, |
|
"loss": 4.4221, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.022242177246667e-05, |
|
"loss": 4.4518, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.018788423015818e-05, |
|
"loss": 4.433, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.01533466878497e-05, |
|
"loss": 4.4263, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.01188091455412e-05, |
|
"loss": 4.44, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0084271603232713e-05, |
|
"loss": 4.3684, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0049734060924224e-05, |
|
"loss": 4.3712, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.001519651861574e-05, |
|
"loss": 4.3684, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.998065897630725e-05, |
|
"loss": 4.3534, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9946121433998755e-05, |
|
"loss": 4.355, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9911583891690265e-05, |
|
"loss": 4.3857, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.987704634938178e-05, |
|
"loss": 4.3869, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.984250880707329e-05, |
|
"loss": 4.3584, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.98079712647648e-05, |
|
"loss": 4.2255, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.977343372245631e-05, |
|
"loss": 4.3284, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9738896180147823e-05, |
|
"loss": 4.3396, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9704358637839334e-05, |
|
"loss": 4.3761, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9669821095530844e-05, |
|
"loss": 4.3291, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9635283553222354e-05, |
|
"loss": 4.3493, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9600746010913865e-05, |
|
"loss": 4.3123, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9566208468605375e-05, |
|
"loss": 4.3874, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9531670926296885e-05, |
|
"loss": 4.3719, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.94971333839884e-05, |
|
"loss": 4.3051, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9462595841679906e-05, |
|
"loss": 4.3216, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9428058299371416e-05, |
|
"loss": 4.4278, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9393520757062927e-05, |
|
"loss": 4.3334, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9358983214754444e-05, |
|
"loss": 4.2799, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9324445672445954e-05, |
|
"loss": 4.3025, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.928990813013746e-05, |
|
"loss": 4.2286, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9255370587828975e-05, |
|
"loss": 4.286, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9220833045520485e-05, |
|
"loss": 4.2102, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9186295503211995e-05, |
|
"loss": 4.2735, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.91517579609035e-05, |
|
"loss": 4.3194, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9117220418595016e-05, |
|
"loss": 4.2928, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9082682876286526e-05, |
|
"loss": 4.348, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9048145333978037e-05, |
|
"loss": 4.3026, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.901360779166955e-05, |
|
"loss": 4.2144, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.897907024936106e-05, |
|
"loss": 4.2993, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.894453270705257e-05, |
|
"loss": 4.2313, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.890999516474408e-05, |
|
"loss": 4.2205, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.887545762243559e-05, |
|
"loss": 4.2033, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8840920080127105e-05, |
|
"loss": 4.2286, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.880638253781861e-05, |
|
"loss": 4.2882, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.877184499551012e-05, |
|
"loss": 4.2705, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.873730745320163e-05, |
|
"loss": 4.2006, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8702769910893146e-05, |
|
"loss": 4.2344, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.866823236858466e-05, |
|
"loss": 4.2227, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.863369482627616e-05, |
|
"loss": 4.2481, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.859915728396768e-05, |
|
"loss": 4.2321, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.856461974165919e-05, |
|
"loss": 4.2588, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.85300821993507e-05, |
|
"loss": 4.1625, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.84955446570422e-05, |
|
"loss": 4.1685, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.846100711473372e-05, |
|
"loss": 4.2126, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.842646957242523e-05, |
|
"loss": 4.1852, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.839193203011674e-05, |
|
"loss": 4.0762, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.835739448780825e-05, |
|
"loss": 4.1486, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.832285694549976e-05, |
|
"loss": 4.2086, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.828831940319127e-05, |
|
"loss": 4.1741, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.825378186088278e-05, |
|
"loss": 4.1473, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.821924431857429e-05, |
|
"loss": 4.1852, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.818470677626581e-05, |
|
"loss": 4.126, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.815016923395731e-05, |
|
"loss": 4.2465, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.811563169164882e-05, |
|
"loss": 4.0662, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.808109414934033e-05, |
|
"loss": 4.1311, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.804655660703185e-05, |
|
"loss": 4.1458, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.801201906472336e-05, |
|
"loss": 4.1106, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797748152241486e-05, |
|
"loss": 4.1293, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.794294398010638e-05, |
|
"loss": 4.0843, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.790840643779789e-05, |
|
"loss": 4.1247, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.78738688954894e-05, |
|
"loss": 4.1126, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.7839331353180904e-05, |
|
"loss": 4.0693, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.780479381087242e-05, |
|
"loss": 4.0749, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777025626856393e-05, |
|
"loss": 4.1138, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.773571872625544e-05, |
|
"loss": 4.1342, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.770118118394695e-05, |
|
"loss": 4.2315, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.766664364163846e-05, |
|
"loss": 4.0744, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.763210609932997e-05, |
|
"loss": 4.1098, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.759756855702148e-05, |
|
"loss": 4.1641, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7563031014712994e-05, |
|
"loss": 4.1207, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.752849347240451e-05, |
|
"loss": 4.0232, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7493955930096014e-05, |
|
"loss": 4.1027, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7459418387787525e-05, |
|
"loss": 4.1394, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7424880845479035e-05, |
|
"loss": 4.0756, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.739034330317055e-05, |
|
"loss": 4.1204, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.735580576086206e-05, |
|
"loss": 4.1218, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7321268218553566e-05, |
|
"loss": 4.0515, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.728673067624508e-05, |
|
"loss": 4.109, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.725219313393659e-05, |
|
"loss": 3.9909, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7217655591628104e-05, |
|
"loss": 4.0671, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.718311804931961e-05, |
|
"loss": 4.1067, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7148580507011124e-05, |
|
"loss": 4.0092, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7114042964702634e-05, |
|
"loss": 4.0346, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7079505422394145e-05, |
|
"loss": 4.0381, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7044967880085655e-05, |
|
"loss": 4.0308, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.24909529553679133, |
|
"eval_loss": 3.6947672367095947, |
|
"eval_runtime": 8621.8692, |
|
"eval_samples_per_second": 8.846, |
|
"eval_steps_per_second": 0.277, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7010430337777165e-05, |
|
"loss": 4.0648, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6975892795468676e-05, |
|
"loss": 3.9987, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6941355253160186e-05, |
|
"loss": 3.9417, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6906817710851696e-05, |
|
"loss": 3.9734, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6872280168543213e-05, |
|
"loss": 3.9119, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.683774262623472e-05, |
|
"loss": 3.9818, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.680320508392623e-05, |
|
"loss": 3.9722, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.676866754161774e-05, |
|
"loss": 3.9544, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6734129999309255e-05, |
|
"loss": 3.9963, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6699592457000765e-05, |
|
"loss": 3.9356, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.666505491469227e-05, |
|
"loss": 3.9639, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6630517372383786e-05, |
|
"loss": 3.9783, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6595979830075296e-05, |
|
"loss": 3.9439, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6561442287766806e-05, |
|
"loss": 3.9195, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.652690474545831e-05, |
|
"loss": 3.8927, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.649236720314983e-05, |
|
"loss": 3.9244, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.645782966084134e-05, |
|
"loss": 3.9266, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.642329211853285e-05, |
|
"loss": 3.9866, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.638875457622436e-05, |
|
"loss": 3.8888, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.635421703391587e-05, |
|
"loss": 3.8811, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.631967949160738e-05, |
|
"loss": 3.959, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.628514194929889e-05, |
|
"loss": 4.0576, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.62506044069904e-05, |
|
"loss": 3.9046, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6216066864681916e-05, |
|
"loss": 3.8684, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.618152932237342e-05, |
|
"loss": 3.9167, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.614699178006493e-05, |
|
"loss": 3.8899, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.611245423775644e-05, |
|
"loss": 3.9039, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.607791669544796e-05, |
|
"loss": 3.8306, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.604337915313946e-05, |
|
"loss": 3.8872, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.600884161083097e-05, |
|
"loss": 3.8607, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.597430406852249e-05, |
|
"loss": 3.9271, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5939766526214e-05, |
|
"loss": 4.006, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.590522898390551e-05, |
|
"loss": 3.901, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.587069144159701e-05, |
|
"loss": 3.8321, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.583615389928853e-05, |
|
"loss": 3.8744, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.580161635698004e-05, |
|
"loss": 3.9017, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.576707881467155e-05, |
|
"loss": 3.8878, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.573254127236306e-05, |
|
"loss": 3.832, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.569800373005457e-05, |
|
"loss": 3.831, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566346618774608e-05, |
|
"loss": 3.9113, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.562892864543759e-05, |
|
"loss": 3.796, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.55943911031291e-05, |
|
"loss": 3.8043, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.555985356082062e-05, |
|
"loss": 3.9136, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.552531601851212e-05, |
|
"loss": 3.7756, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.549077847620363e-05, |
|
"loss": 3.7959, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.545624093389514e-05, |
|
"loss": 3.7942, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.542170339158666e-05, |
|
"loss": 3.8605, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5387165849278164e-05, |
|
"loss": 3.7954, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5352628306969674e-05, |
|
"loss": 3.8062, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.531809076466119e-05, |
|
"loss": 3.8087, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.52835532223527e-05, |
|
"loss": 3.801, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.524901568004421e-05, |
|
"loss": 3.7685, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5214478137735715e-05, |
|
"loss": 3.8729, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.517994059542723e-05, |
|
"loss": 3.8502, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.514540305311874e-05, |
|
"loss": 3.8123, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.511086551081025e-05, |
|
"loss": 3.7957, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.5076327968501763e-05, |
|
"loss": 3.7741, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.5041790426193274e-05, |
|
"loss": 3.7132, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.5007252883884784e-05, |
|
"loss": 3.7215, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4972715341576294e-05, |
|
"loss": 3.773, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4938177799267805e-05, |
|
"loss": 3.7669, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.490364025695932e-05, |
|
"loss": 3.7389, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4869102714650825e-05, |
|
"loss": 3.748, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4834565172342336e-05, |
|
"loss": 3.7029, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4800027630033846e-05, |
|
"loss": 3.8851, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.476549008772536e-05, |
|
"loss": 3.8138, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4730952545416867e-05, |
|
"loss": 3.7595, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.469641500310838e-05, |
|
"loss": 3.7483, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4661877460799894e-05, |
|
"loss": 3.7428, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4627339918491404e-05, |
|
"loss": 3.7077, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4592802376182915e-05, |
|
"loss": 3.7716, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.455826483387442e-05, |
|
"loss": 3.7343, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4523727291565935e-05, |
|
"loss": 3.7992, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4489189749257446e-05, |
|
"loss": 3.6995, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4454652206948956e-05, |
|
"loss": 3.7807, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4420114664640466e-05, |
|
"loss": 3.7395, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4385577122331976e-05, |
|
"loss": 3.8304, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.435103958002349e-05, |
|
"loss": 3.6993, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4316502037715e-05, |
|
"loss": 3.6915, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.428196449540651e-05, |
|
"loss": 3.6993, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4247426953098025e-05, |
|
"loss": 3.7521, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.421288941078953e-05, |
|
"loss": 3.6159, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.417835186848104e-05, |
|
"loss": 3.8004, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.414381432617255e-05, |
|
"loss": 3.7197, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4109276783864066e-05, |
|
"loss": 3.6887, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.407473924155557e-05, |
|
"loss": 3.6608, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.404020169924708e-05, |
|
"loss": 3.6339, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.40056641569386e-05, |
|
"loss": 3.7329, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.397112661463011e-05, |
|
"loss": 3.7651, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.393658907232162e-05, |
|
"loss": 3.731, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.390205153001312e-05, |
|
"loss": 3.6192, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.386751398770464e-05, |
|
"loss": 3.6153, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.383297644539615e-05, |
|
"loss": 3.6365, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.379843890308766e-05, |
|
"loss": 3.6716, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.376390136077917e-05, |
|
"loss": 3.6605, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.372936381847068e-05, |
|
"loss": 3.7046, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.369482627616219e-05, |
|
"loss": 3.6256, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.36602887338537e-05, |
|
"loss": 3.6081, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.362575119154521e-05, |
|
"loss": 3.6484, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.359121364923673e-05, |
|
"loss": 3.6968, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.355667610692823e-05, |
|
"loss": 3.5797, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.352213856461974e-05, |
|
"loss": 3.6661, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.348760102231125e-05, |
|
"loss": 3.6663, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.345306348000277e-05, |
|
"loss": 3.5707, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.341852593769427e-05, |
|
"loss": 3.5989, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.338398839538578e-05, |
|
"loss": 3.6403, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.33494508530773e-05, |
|
"loss": 3.6176, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.331491331076881e-05, |
|
"loss": 3.5832, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.328037576846032e-05, |
|
"loss": 3.6051, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3245838226151824e-05, |
|
"loss": 3.6246, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.321130068384334e-05, |
|
"loss": 3.6141, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.317676314153485e-05, |
|
"loss": 3.5999, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.314222559922636e-05, |
|
"loss": 3.6628, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.310768805691787e-05, |
|
"loss": 3.651, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.307315051460938e-05, |
|
"loss": 3.5738, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.303861297230089e-05, |
|
"loss": 3.5479, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.30040754299924e-05, |
|
"loss": 3.5372, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.296953788768391e-05, |
|
"loss": 3.6088, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.293500034537542e-05, |
|
"loss": 3.5082, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2900462803066934e-05, |
|
"loss": 3.5443, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2865925260758444e-05, |
|
"loss": 3.4684, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2831387718449954e-05, |
|
"loss": 3.5836, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279685017614147e-05, |
|
"loss": 3.5457, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2762312633832975e-05, |
|
"loss": 3.5694, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2727775091524485e-05, |
|
"loss": 3.4931, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2693237549216e-05, |
|
"loss": 3.6173, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.265870000690751e-05, |
|
"loss": 3.5387, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.262416246459902e-05, |
|
"loss": 3.552, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2589624922290526e-05, |
|
"loss": 3.5856, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2555087379982044e-05, |
|
"loss": 3.5191, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2520549837673554e-05, |
|
"loss": 3.4632, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2486012295365064e-05, |
|
"loss": 3.557, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2451474753056574e-05, |
|
"loss": 3.4683, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2416937210748085e-05, |
|
"loss": 3.6178, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2382399668439595e-05, |
|
"loss": 3.509, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2347862126131105e-05, |
|
"loss": 3.5511, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2313324583822616e-05, |
|
"loss": 3.5375, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2278787041514126e-05, |
|
"loss": 3.5648, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2244249499205636e-05, |
|
"loss": 3.4806, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.220971195689715e-05, |
|
"loss": 3.5598, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.217517441458866e-05, |
|
"loss": 3.5497, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2140636872280174e-05, |
|
"loss": 3.5869, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.210609932997168e-05, |
|
"loss": 3.46, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.207156178766319e-05, |
|
"loss": 3.4238, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2037024245354705e-05, |
|
"loss": 3.5371, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2002486703046215e-05, |
|
"loss": 3.5355, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.1967949160737726e-05, |
|
"loss": 3.481, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.193341161842923e-05, |
|
"loss": 3.3692, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1898874076120746e-05, |
|
"loss": 3.4681, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1864336533812257e-05, |
|
"loss": 3.4593, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.182979899150377e-05, |
|
"loss": 3.4291, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.179526144919528e-05, |
|
"loss": 3.452, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.176072390688679e-05, |
|
"loss": 3.4585, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.17261863645783e-05, |
|
"loss": 3.4393, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.169164882226981e-05, |
|
"loss": 3.472, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.165711127996132e-05, |
|
"loss": 3.4524, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.162257373765283e-05, |
|
"loss": 3.4865, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.158803619534434e-05, |
|
"loss": 3.442, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.155349865303585e-05, |
|
"loss": 3.4376, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.151896111072736e-05, |
|
"loss": 3.4278, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.148442356841888e-05, |
|
"loss": 3.342, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.144988602611038e-05, |
|
"loss": 3.4077, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.141534848380189e-05, |
|
"loss": 3.3748, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.138081094149341e-05, |
|
"loss": 3.3983, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.134627339918492e-05, |
|
"loss": 3.4114, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.131173585687643e-05, |
|
"loss": 3.5379, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.127719831456793e-05, |
|
"loss": 3.446, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.124266077225945e-05, |
|
"loss": 3.3867, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.120812322995096e-05, |
|
"loss": 3.434, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.117358568764247e-05, |
|
"loss": 3.425, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.113904814533398e-05, |
|
"loss": 3.4585, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.110451060302549e-05, |
|
"loss": 3.4087, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1069973060717e-05, |
|
"loss": 3.4151, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.103543551840851e-05, |
|
"loss": 3.3507, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.100089797610002e-05, |
|
"loss": 3.5211, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.096636043379153e-05, |
|
"loss": 3.3704, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.093182289148304e-05, |
|
"loss": 3.4302, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.089728534917455e-05, |
|
"loss": 3.4675, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.086274780686606e-05, |
|
"loss": 3.4799, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.082821026455758e-05, |
|
"loss": 3.4777, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.079367272224908e-05, |
|
"loss": 3.4224, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0759135179940593e-05, |
|
"loss": 3.3939, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.072459763763211e-05, |
|
"loss": 3.4045, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.069006009532362e-05, |
|
"loss": 3.3775, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.065552255301513e-05, |
|
"loss": 3.3832, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0620985010706635e-05, |
|
"loss": 3.3987, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.058644746839815e-05, |
|
"loss": 3.3736, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.055190992608966e-05, |
|
"loss": 3.4009, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0517372383781172e-05, |
|
"loss": 3.4189, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0482834841472686e-05, |
|
"loss": 3.3918, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.044829729916419e-05, |
|
"loss": 3.38, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0413759756855703e-05, |
|
"loss": 3.3672, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0379222214547214e-05, |
|
"loss": 3.3829, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0344684672238727e-05, |
|
"loss": 3.3583, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0310147129930234e-05, |
|
"loss": 3.2762, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0275609587621745e-05, |
|
"loss": 3.2542, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.024107204531326e-05, |
|
"loss": 3.2616, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.020653450300477e-05, |
|
"loss": 3.4247, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.017199696069628e-05, |
|
"loss": 3.2569, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0137459418387786e-05, |
|
"loss": 3.2765, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.01029218760793e-05, |
|
"loss": 3.4377, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.006838433377081e-05, |
|
"loss": 3.289, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.0033846791462324e-05, |
|
"loss": 3.4032, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9999309249153834e-05, |
|
"loss": 3.3145, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.996477170684534e-05, |
|
"loss": 3.3723, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.993023416453685e-05, |
|
"loss": 3.3719, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9895696622228365e-05, |
|
"loss": 3.3268, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9861159079919875e-05, |
|
"loss": 3.4391, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.982662153761139e-05, |
|
"loss": 3.2366, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9792083995302892e-05, |
|
"loss": 3.3724, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9757546452994406e-05, |
|
"loss": 3.3473, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9723008910685916e-05, |
|
"loss": 3.2653, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.968847136837743e-05, |
|
"loss": 3.2872, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9653933826068937e-05, |
|
"loss": 3.2932, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9619396283760447e-05, |
|
"loss": 3.2541, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.958485874145196e-05, |
|
"loss": 3.3233, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.955032119914347e-05, |
|
"loss": 3.3025, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9515783656834982e-05, |
|
"loss": 3.2965, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.948124611452649e-05, |
|
"loss": 3.2564, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9446708572218002e-05, |
|
"loss": 3.3116, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9412171029909513e-05, |
|
"loss": 3.2414, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9377633487601026e-05, |
|
"loss": 3.3608, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9343095945292537e-05, |
|
"loss": 3.3019, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9308558402984044e-05, |
|
"loss": 3.3488, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9274020860675554e-05, |
|
"loss": 3.3367, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9239483318367068e-05, |
|
"loss": 3.2071, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9204945776058578e-05, |
|
"loss": 3.2963, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9170408233750085e-05, |
|
"loss": 3.2445, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9135870691441595e-05, |
|
"loss": 3.3087, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.910133314913311e-05, |
|
"loss": 3.3239, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.906679560682462e-05, |
|
"loss": 3.2231, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9032258064516133e-05, |
|
"loss": 3.3682, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.899772052220764e-05, |
|
"loss": 3.1886, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.896318297989915e-05, |
|
"loss": 3.285, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8928645437590664e-05, |
|
"loss": 3.1984, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8894107895282174e-05, |
|
"loss": 3.2885, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8859570352973685e-05, |
|
"loss": 3.2339, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.882503281066519e-05, |
|
"loss": 3.3014, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8790495268356705e-05, |
|
"loss": 3.3142, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8755957726048215e-05, |
|
"loss": 3.2698, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.872142018373973e-05, |
|
"loss": 3.2825, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.868688264143124e-05, |
|
"loss": 3.2214, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8652345099122746e-05, |
|
"loss": 3.1402, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8617807556814257e-05, |
|
"loss": 3.2267, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858327001450577e-05, |
|
"loss": 3.185, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.854873247219728e-05, |
|
"loss": 3.1691, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8514194929888788e-05, |
|
"loss": 3.1357, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8479657387580298e-05, |
|
"loss": 3.2274, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.844511984527181e-05, |
|
"loss": 3.2324, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8410582302963322e-05, |
|
"loss": 3.2761, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8376044760654836e-05, |
|
"loss": 3.2883, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8341507218346343e-05, |
|
"loss": 3.2104, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8306969676037853e-05, |
|
"loss": 3.2581, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8272432133729367e-05, |
|
"loss": 3.265, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8237894591420877e-05, |
|
"loss": 3.2577, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8203357049112387e-05, |
|
"loss": 3.2683, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8168819506803894e-05, |
|
"loss": 3.1734, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8134281964495408e-05, |
|
"loss": 3.2041, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8099744422186918e-05, |
|
"loss": 3.1578, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8065206879878432e-05, |
|
"loss": 3.2432, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8030669337569942e-05, |
|
"loss": 3.2978, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.799613179526145e-05, |
|
"loss": 3.201, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.796159425295296e-05, |
|
"loss": 3.2955, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.7927056710644473e-05, |
|
"loss": 3.206, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.7892519168335984e-05, |
|
"loss": 3.1621, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.785798162602749e-05, |
|
"loss": 3.2604, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7823444083719004e-05, |
|
"loss": 3.2261, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7788906541410514e-05, |
|
"loss": 3.1247, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7754368999102025e-05, |
|
"loss": 3.1877, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.771983145679354e-05, |
|
"loss": 3.1831, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7685293914485045e-05, |
|
"loss": 3.177, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.7650756372176556e-05, |
|
"loss": 3.1021, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.761621882986807e-05, |
|
"loss": 3.1824, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.758168128755958e-05, |
|
"loss": 3.1609, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.754714374525109e-05, |
|
"loss": 3.2185, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.7512606202942597e-05, |
|
"loss": 3.1663, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.747806866063411e-05, |
|
"loss": 3.1356, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.744353111832562e-05, |
|
"loss": 3.1245, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7408993576017135e-05, |
|
"loss": 3.1758, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7374456033708645e-05, |
|
"loss": 3.0987, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7339918491400152e-05, |
|
"loss": 3.0779, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7305380949091662e-05, |
|
"loss": 3.1655, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7270843406783176e-05, |
|
"loss": 3.1217, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7236305864474686e-05, |
|
"loss": 3.1073, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7201768322166193e-05, |
|
"loss": 3.1061, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7167230779857707e-05, |
|
"loss": 3.1527, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7132693237549217e-05, |
|
"loss": 3.1558, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7098155695240728e-05, |
|
"loss": 3.1518, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.706361815293224e-05, |
|
"loss": 3.1196, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7029080610623748e-05, |
|
"loss": 3.1993, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.699454306831526e-05, |
|
"loss": 3.1277, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.6960005526006772e-05, |
|
"loss": 3.1874, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6925467983698282e-05, |
|
"loss": 3.1914, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6890930441389793e-05, |
|
"loss": 3.0947, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.68563928990813e-05, |
|
"loss": 3.0878, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6821855356772813e-05, |
|
"loss": 3.1066, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6787317814464324e-05, |
|
"loss": 3.1181, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6752780272155837e-05, |
|
"loss": 3.1704, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6718242729847348e-05, |
|
"loss": 3.1091, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6683705187538855e-05, |
|
"loss": 3.0995, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6649167645230365e-05, |
|
"loss": 3.1144, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.661463010292188e-05, |
|
"loss": 3.0199, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.658009256061339e-05, |
|
"loss": 3.118, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6545555018304896e-05, |
|
"loss": 3.1443, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.651101747599641e-05, |
|
"loss": 3.1003, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.647647993368792e-05, |
|
"loss": 3.1032, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.644194239137943e-05, |
|
"loss": 3.0726, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6407404849070944e-05, |
|
"loss": 3.1226, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.637286730676245e-05, |
|
"loss": 3.1293, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.633832976445396e-05, |
|
"loss": 2.9997, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6303792222145475e-05, |
|
"loss": 3.0414, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6269254679836985e-05, |
|
"loss": 3.11, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6234717137528496e-05, |
|
"loss": 3.1564, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6200179595220002e-05, |
|
"loss": 3.0275, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6165642052911516e-05, |
|
"loss": 3.1224, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6131104510603027e-05, |
|
"loss": 3.089, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.609656696829454e-05, |
|
"loss": 2.9951, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6062029425986044e-05, |
|
"loss": 3.0263, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6027491883677557e-05, |
|
"loss": 3.0849, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5992954341369068e-05, |
|
"loss": 3.0331, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.595841679906058e-05, |
|
"loss": 3.0678, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5923879256752092e-05, |
|
"loss": 3.0979, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.58893417144436e-05, |
|
"loss": 3.1191, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5854804172135112e-05, |
|
"loss": 3.0135, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5820266629826623e-05, |
|
"loss": 3.0578, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5785729087518133e-05, |
|
"loss": 3.1627, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5751191545209647e-05, |
|
"loss": 3.0275, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5716654002901154e-05, |
|
"loss": 2.9919, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5682116460592664e-05, |
|
"loss": 3.0629, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5647578918284178e-05, |
|
"loss": 3.0972, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5613041375975688e-05, |
|
"loss": 3.1405, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5578503833667202e-05, |
|
"loss": 3.0783, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5543966291358705e-05, |
|
"loss": 3.0817, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.550942874905022e-05, |
|
"loss": 3.0429, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.547489120674173e-05, |
|
"loss": 3.0268, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5440353664433243e-05, |
|
"loss": 3.0695, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5405816122124747e-05, |
|
"loss": 3.0673, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.537127857981626e-05, |
|
"loss": 2.9537, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533674103750777e-05, |
|
"loss": 2.982, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5302203495199284e-05, |
|
"loss": 3.0885, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5267665952890795e-05, |
|
"loss": 3.0187, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.52331284105823e-05, |
|
"loss": 2.9979, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5198590868273815e-05, |
|
"loss": 2.998, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5164053325965326e-05, |
|
"loss": 2.9708, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5129515783656836e-05, |
|
"loss": 2.9856, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.509497824134835e-05, |
|
"loss": 3.0117, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5060440699039856e-05, |
|
"loss": 3.0323, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5025903156731367e-05, |
|
"loss": 3.0227, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.499136561442288e-05, |
|
"loss": 3.0074, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4956828072114387e-05, |
|
"loss": 3.0091, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49222905298059e-05, |
|
"loss": 3.1571, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.488775298749741e-05, |
|
"loss": 3.0275, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4853215445188922e-05, |
|
"loss": 3.0307, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4818677902880432e-05, |
|
"loss": 2.9366, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4784140360571942e-05, |
|
"loss": 2.9924, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4749602818263453e-05, |
|
"loss": 3.0313, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4715065275954966e-05, |
|
"loss": 2.9134, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4680527733646473e-05, |
|
"loss": 2.9929, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4645990191337987e-05, |
|
"loss": 2.9983, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4611452649029494e-05, |
|
"loss": 2.9756, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4576915106721008e-05, |
|
"loss": 2.9688, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4542377564412518e-05, |
|
"loss": 2.9359, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4507840022104028e-05, |
|
"loss": 3.0948, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.447330247979554e-05, |
|
"loss": 2.9085, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.443876493748705e-05, |
|
"loss": 3.053, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4404227395178563e-05, |
|
"loss": 3.0007, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.436968985287007e-05, |
|
"loss": 2.9837, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4335152310561583e-05, |
|
"loss": 2.9764, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.430061476825309e-05, |
|
"loss": 2.955, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4266077225944604e-05, |
|
"loss": 2.9645, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4231539683636114e-05, |
|
"loss": 3.0081, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4197002141327624e-05, |
|
"loss": 3.0566, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4162464599019135e-05, |
|
"loss": 2.8261, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4127927056710645e-05, |
|
"loss": 2.9972, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4093389514402155e-05, |
|
"loss": 2.9635, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4058851972093666e-05, |
|
"loss": 2.9686, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4024314429785176e-05, |
|
"loss": 2.952, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.398977688747669e-05, |
|
"loss": 2.9526, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3955239345168197e-05, |
|
"loss": 3.0147, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.392070180285971e-05, |
|
"loss": 2.8927, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.388616426055122e-05, |
|
"loss": 2.8888, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.385162671824273e-05, |
|
"loss": 2.9943, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.381708917593424e-05, |
|
"loss": 2.8836, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.378255163362575e-05, |
|
"loss": 3.0489, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3748014091317265e-05, |
|
"loss": 3.009, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3713476549008772e-05, |
|
"loss": 2.8603, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3678939006700286e-05, |
|
"loss": 2.9036, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3644401464391793e-05, |
|
"loss": 2.9626, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3609863922083307e-05, |
|
"loss": 2.9827, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3575326379774817e-05, |
|
"loss": 3.0024, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3540788837466327e-05, |
|
"loss": 2.9592, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3506251295157838e-05, |
|
"loss": 2.9028, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3471713752849348e-05, |
|
"loss": 2.8719, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3437176210540858e-05, |
|
"loss": 2.9314, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.340263866823237e-05, |
|
"loss": 3.0222, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.336810112592388e-05, |
|
"loss": 2.9664, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3333563583615393e-05, |
|
"loss": 2.8377, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.32990260413069e-05, |
|
"loss": 2.9357, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3264488498998413e-05, |
|
"loss": 2.8697, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3229950956689923e-05, |
|
"loss": 2.909, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3195413414381434e-05, |
|
"loss": 2.9791, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3160875872072944e-05, |
|
"loss": 2.9093, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3126338329764454e-05, |
|
"loss": 2.843, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3091800787455968e-05, |
|
"loss": 2.889, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3057263245147475e-05, |
|
"loss": 2.8633, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.302272570283899e-05, |
|
"loss": 2.9043, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2988188160530496e-05, |
|
"loss": 2.8618, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.295365061822201e-05, |
|
"loss": 2.8755, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.291911307591352e-05, |
|
"loss": 2.8721, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.288457553360503e-05, |
|
"loss": 2.926, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.285003799129654e-05, |
|
"loss": 2.8687, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.281550044898805e-05, |
|
"loss": 2.9855, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.278096290667956e-05, |
|
"loss": 2.7932, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.274642536437107e-05, |
|
"loss": 2.8963, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.271188782206258e-05, |
|
"loss": 2.886, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2677350279754095e-05, |
|
"loss": 2.8421, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2642812737445606e-05, |
|
"loss": 2.9048, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2608275195137116e-05, |
|
"loss": 2.8581, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2573737652828626e-05, |
|
"loss": 2.8165, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2539200110520137e-05, |
|
"loss": 2.8677, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2504662568211647e-05, |
|
"loss": 2.7541, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2470125025903157e-05, |
|
"loss": 2.907, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.243558748359467e-05, |
|
"loss": 2.9712, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2401049941286178e-05, |
|
"loss": 2.945, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.236651239897769e-05, |
|
"loss": 2.9733, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.23319748566692e-05, |
|
"loss": 2.7933, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2297437314360712e-05, |
|
"loss": 2.9037, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2262899772052222e-05, |
|
"loss": 2.8159, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2228362229743733e-05, |
|
"loss": 2.9867, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2193824687435243e-05, |
|
"loss": 2.7537, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2159287145126753e-05, |
|
"loss": 2.859, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2124749602818264e-05, |
|
"loss": 2.8736, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2090212060509774e-05, |
|
"loss": 2.8086, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2055674518201284e-05, |
|
"loss": 2.913, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2021136975892798e-05, |
|
"loss": 2.7818, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.198659943358431e-05, |
|
"loss": 2.8732, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.195206189127582e-05, |
|
"loss": 2.8173, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.191752434896733e-05, |
|
"loss": 2.8845, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.188298680665884e-05, |
|
"loss": 2.8686, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.184844926435035e-05, |
|
"loss": 2.9331, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.181391172204186e-05, |
|
"loss": 2.8525, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1779374179733374e-05, |
|
"loss": 2.8304, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.174483663742488e-05, |
|
"loss": 2.7739, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1710299095116394e-05, |
|
"loss": 2.9298, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.16757615528079e-05, |
|
"loss": 2.824, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1641224010499415e-05, |
|
"loss": 2.7872, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1606686468190925e-05, |
|
"loss": 2.7732, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1572148925882436e-05, |
|
"loss": 2.7426, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1537611383573946e-05, |
|
"loss": 2.82, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1503073841265456e-05, |
|
"loss": 2.7538, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1468536298956967e-05, |
|
"loss": 2.7856, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1433998756648477e-05, |
|
"loss": 2.7411, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1399461214339987e-05, |
|
"loss": 2.7934, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.13649236720315e-05, |
|
"loss": 2.8427, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.133038612972301e-05, |
|
"loss": 2.7685, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.129584858741452e-05, |
|
"loss": 2.7284, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1261311045106032e-05, |
|
"loss": 2.8034, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1226773502797542e-05, |
|
"loss": 2.8175, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1192235960489052e-05, |
|
"loss": 2.7895, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1157698418180563e-05, |
|
"loss": 2.8621, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1123160875872076e-05, |
|
"loss": 2.7962, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1088623333563583e-05, |
|
"loss": 2.7863, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1054085791255097e-05, |
|
"loss": 2.7884, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1019548248946604e-05, |
|
"loss": 2.7566, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0985010706638118e-05, |
|
"loss": 2.8012, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0950473164329625e-05, |
|
"loss": 2.8358, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.091593562202114e-05, |
|
"loss": 2.8367, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.088139807971265e-05, |
|
"loss": 2.7646, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.084686053740416e-05, |
|
"loss": 2.8934, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.081232299509567e-05, |
|
"loss": 2.8152, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.077778545278718e-05, |
|
"loss": 2.7449, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.074324791047869e-05, |
|
"loss": 2.7978, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0708710368170204e-05, |
|
"loss": 2.7717, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0674172825861714e-05, |
|
"loss": 2.8873, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0639635283553224e-05, |
|
"loss": 2.8055, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0605097741244735e-05, |
|
"loss": 2.6969, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0570560198936245e-05, |
|
"loss": 2.7373, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0536022656627755e-05, |
|
"loss": 2.7297, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0501485114319265e-05, |
|
"loss": 2.824, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.046694757201078e-05, |
|
"loss": 2.6395, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0432410029702286e-05, |
|
"loss": 2.7764, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.03978724873938e-05, |
|
"loss": 2.7993, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0363334945085307e-05, |
|
"loss": 2.7092, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.032879740277682e-05, |
|
"loss": 2.8668, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0294259860468327e-05, |
|
"loss": 2.7189, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.025972231815984e-05, |
|
"loss": 2.6979, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.022518477585135e-05, |
|
"loss": 2.7583, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0190647233542862e-05, |
|
"loss": 2.8106, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0156109691234372e-05, |
|
"loss": 2.7257, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0121572148925882e-05, |
|
"loss": 2.6996, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0087034606617393e-05, |
|
"loss": 2.6771, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0052497064308906e-05, |
|
"loss": 2.8394, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0017959522000417e-05, |
|
"loss": 2.7741, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9983421979691927e-05, |
|
"loss": 2.7705, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9948884437383437e-05, |
|
"loss": 2.7212, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9914346895074948e-05, |
|
"loss": 2.773, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9879809352766458e-05, |
|
"loss": 2.7534, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9845271810457968e-05, |
|
"loss": 2.7153, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9810734268149482e-05, |
|
"loss": 2.7342, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.977619672584099e-05, |
|
"loss": 2.7553, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9741659183532503e-05, |
|
"loss": 2.7939, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.970712164122401e-05, |
|
"loss": 2.7801, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9672584098915523e-05, |
|
"loss": 2.786, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.963804655660703e-05, |
|
"loss": 2.7158, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9603509014298544e-05, |
|
"loss": 2.7806, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9568971471990054e-05, |
|
"loss": 2.7648, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9534433929681564e-05, |
|
"loss": 2.8309, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9499896387373075e-05, |
|
"loss": 2.6813, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9465358845064585e-05, |
|
"loss": 2.6845, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9430821302756095e-05, |
|
"loss": 2.7858, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.939628376044761e-05, |
|
"loss": 2.8503, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.936174621813912e-05, |
|
"loss": 2.7545, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.932720867583063e-05, |
|
"loss": 2.6919, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.929267113352214e-05, |
|
"loss": 2.8175, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.925813359121365e-05, |
|
"loss": 2.8181, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.922359604890516e-05, |
|
"loss": 2.7865, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.918905850659667e-05, |
|
"loss": 2.8738, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9154520964288185e-05, |
|
"loss": 2.7771, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.911998342197969e-05, |
|
"loss": 2.6842, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9085445879671205e-05, |
|
"loss": 2.7889, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9050908337362712e-05, |
|
"loss": 2.7784, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9016370795054226e-05, |
|
"loss": 2.746, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.8981833252745733e-05, |
|
"loss": 2.733, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8947295710437247e-05, |
|
"loss": 2.8084, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8912758168128757e-05, |
|
"loss": 2.6748, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8878220625820267e-05, |
|
"loss": 2.6682, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8843683083511778e-05, |
|
"loss": 2.7535, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8809145541203288e-05, |
|
"loss": 2.8174, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8774607998894798e-05, |
|
"loss": 2.7326, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.874007045658631e-05, |
|
"loss": 2.6306, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8705532914277822e-05, |
|
"loss": 2.6619, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8670995371969333e-05, |
|
"loss": 2.6543, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8636457829660843e-05, |
|
"loss": 2.7638, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8601920287352353e-05, |
|
"loss": 2.7623, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8567382745043863e-05, |
|
"loss": 2.7837, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8532845202735374e-05, |
|
"loss": 2.694, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5199166098494782, |
|
"eval_loss": 2.258553981781006, |
|
"eval_runtime": 8350.0446, |
|
"eval_samples_per_second": 9.134, |
|
"eval_steps_per_second": 0.286, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8498307660426887e-05, |
|
"loss": 2.7086, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8463770118118394e-05, |
|
"loss": 2.5868, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8429232575809908e-05, |
|
"loss": 2.6158, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8394695033501415e-05, |
|
"loss": 2.5816, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.836015749119293e-05, |
|
"loss": 2.6675, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8325619948884436e-05, |
|
"loss": 2.6007, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.829108240657595e-05, |
|
"loss": 2.7189, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.825654486426746e-05, |
|
"loss": 2.6269, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.822200732195897e-05, |
|
"loss": 2.6914, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.818746977965048e-05, |
|
"loss": 2.6807, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.815293223734199e-05, |
|
"loss": 2.6789, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.81183946950335e-05, |
|
"loss": 2.6979, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.808385715272501e-05, |
|
"loss": 2.7043, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8049319610416525e-05, |
|
"loss": 2.5784, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8014782068108035e-05, |
|
"loss": 2.765, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7980244525799546e-05, |
|
"loss": 2.7079, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7945706983491056e-05, |
|
"loss": 2.5952, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7911169441182566e-05, |
|
"loss": 2.5811, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7876631898874077e-05, |
|
"loss": 2.5979, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.784209435656559e-05, |
|
"loss": 2.5244, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7807556814257097e-05, |
|
"loss": 2.5945, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.777301927194861e-05, |
|
"loss": 2.6323, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7738481729640118e-05, |
|
"loss": 2.7229, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.770394418733163e-05, |
|
"loss": 2.6336, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.766940664502314e-05, |
|
"loss": 2.7559, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7634869102714652e-05, |
|
"loss": 2.5932, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7600331560406162e-05, |
|
"loss": 2.6705, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7565794018097673e-05, |
|
"loss": 2.6559, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7531256475789183e-05, |
|
"loss": 2.6464, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7496718933480693e-05, |
|
"loss": 2.612, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7462181391172207e-05, |
|
"loss": 2.6383, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7427643848863714e-05, |
|
"loss": 2.6744, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7393106306555228e-05, |
|
"loss": 2.6917, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7358568764246738e-05, |
|
"loss": 2.6124, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.732403122193825e-05, |
|
"loss": 2.7056, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.728949367962976e-05, |
|
"loss": 2.666, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.725495613732127e-05, |
|
"loss": 2.5992, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.722041859501278e-05, |
|
"loss": 2.6046, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.718588105270429e-05, |
|
"loss": 2.5876, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.71513435103958e-05, |
|
"loss": 2.5939, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7116805968087314e-05, |
|
"loss": 2.6956, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.708226842577882e-05, |
|
"loss": 2.6503, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7047730883470334e-05, |
|
"loss": 2.606, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.701319334116184e-05, |
|
"loss": 2.6121, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6978655798853355e-05, |
|
"loss": 2.5824, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6944118256544865e-05, |
|
"loss": 2.5693, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6909580714236376e-05, |
|
"loss": 2.6992, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6875043171927886e-05, |
|
"loss": 2.6964, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6840505629619396e-05, |
|
"loss": 2.5641, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.680596808731091e-05, |
|
"loss": 2.687, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6771430545002417e-05, |
|
"loss": 2.7424, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.673689300269393e-05, |
|
"loss": 2.5703, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.670235546038544e-05, |
|
"loss": 2.6801, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.666781791807695e-05, |
|
"loss": 2.6199, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.663328037576846e-05, |
|
"loss": 2.5185, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6598742833459972e-05, |
|
"loss": 2.6805, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6564205291151482e-05, |
|
"loss": 2.6054, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6529667748842992e-05, |
|
"loss": 2.5373, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6495130206534503e-05, |
|
"loss": 2.6074, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6460592664226016e-05, |
|
"loss": 2.5797, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6426055121917523e-05, |
|
"loss": 2.6493, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6391517579609037e-05, |
|
"loss": 2.5966, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6356980037300544e-05, |
|
"loss": 2.6321, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6322442494992058e-05, |
|
"loss": 2.6245, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6287904952683568e-05, |
|
"loss": 2.6626, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6253367410375078e-05, |
|
"loss": 2.6061, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.621882986806659e-05, |
|
"loss": 2.5854, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.61842923257581e-05, |
|
"loss": 2.6, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6149754783449613e-05, |
|
"loss": 2.5424, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.611521724114112e-05, |
|
"loss": 2.6849, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6080679698832633e-05, |
|
"loss": 2.5638, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6046142156524144e-05, |
|
"loss": 2.6181, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.6011604614215654e-05, |
|
"loss": 2.5725, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5977067071907164e-05, |
|
"loss": 2.5491, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5942529529598675e-05, |
|
"loss": 2.6698, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5907991987290185e-05, |
|
"loss": 2.6591, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5873454444981695e-05, |
|
"loss": 2.669, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5838916902673205e-05, |
|
"loss": 2.6449, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.580437936036472e-05, |
|
"loss": 2.6878, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5769841818056226e-05, |
|
"loss": 2.5862, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.573530427574774e-05, |
|
"loss": 2.6372, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5700766733439247e-05, |
|
"loss": 2.5786, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.566622919113076e-05, |
|
"loss": 2.525, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.563169164882227e-05, |
|
"loss": 2.5454, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.559715410651378e-05, |
|
"loss": 2.5995, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.556261656420529e-05, |
|
"loss": 2.5934, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5528079021896802e-05, |
|
"loss": 2.5854, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5493541479588315e-05, |
|
"loss": 2.4892, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5459003937279822e-05, |
|
"loss": 2.5199, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5424466394971336e-05, |
|
"loss": 2.6195, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5389928852662846e-05, |
|
"loss": 2.5539, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5355391310354357e-05, |
|
"loss": 2.4715, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5320853768045867e-05, |
|
"loss": 2.636, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5286316225737377e-05, |
|
"loss": 2.6113, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5251778683428888e-05, |
|
"loss": 2.6125, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5217241141120398e-05, |
|
"loss": 2.6328, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.518270359881191e-05, |
|
"loss": 2.6832, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5148166056503422e-05, |
|
"loss": 2.6303, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.511362851419493e-05, |
|
"loss": 2.5381, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5079090971886443e-05, |
|
"loss": 2.5419, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5044553429577951e-05, |
|
"loss": 2.5489, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5010015887269463e-05, |
|
"loss": 2.5821, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4975478344960972e-05, |
|
"loss": 2.5817, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4940940802652484e-05, |
|
"loss": 2.5954, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4906403260343996e-05, |
|
"loss": 2.5759, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4871865718035504e-05, |
|
"loss": 2.4825, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4837328175727016e-05, |
|
"loss": 2.6348, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4802790633418525e-05, |
|
"loss": 2.5029, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4768253091110037e-05, |
|
"loss": 2.5852, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4733715548801549e-05, |
|
"loss": 2.5978, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4699178006493058e-05, |
|
"loss": 2.5382, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.466464046418457e-05, |
|
"loss": 2.5815, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.463010292187608e-05, |
|
"loss": 2.5992, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.459556537956759e-05, |
|
"loss": 2.5691, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.45610278372591e-05, |
|
"loss": 2.6351, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4526490294950613e-05, |
|
"loss": 2.572, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4491952752642125e-05, |
|
"loss": 2.5904, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4457415210333633e-05, |
|
"loss": 2.5471, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4422877668025145e-05, |
|
"loss": 2.4937, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4388340125716654e-05, |
|
"loss": 2.4822, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4353802583408166e-05, |
|
"loss": 2.5416, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4319265041099675e-05, |
|
"loss": 2.4873, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4284727498791187e-05, |
|
"loss": 2.4911, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4250189956482699e-05, |
|
"loss": 2.4459, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4215652414174207e-05, |
|
"loss": 2.499, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.418111487186572e-05, |
|
"loss": 2.4586, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4146577329557228e-05, |
|
"loss": 2.5339, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.411203978724874e-05, |
|
"loss": 2.5919, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.407750224494025e-05, |
|
"loss": 2.5695, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.404296470263176e-05, |
|
"loss": 2.5624, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4008427160323273e-05, |
|
"loss": 2.5149, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3973889618014783e-05, |
|
"loss": 2.6278, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3939352075706293e-05, |
|
"loss": 2.539, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3904814533397803e-05, |
|
"loss": 2.4993, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3870276991089315e-05, |
|
"loss": 2.576, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3835739448780827e-05, |
|
"loss": 2.5318, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3801201906472336e-05, |
|
"loss": 2.5886, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3766664364163848e-05, |
|
"loss": 2.4935, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3732126821855357e-05, |
|
"loss": 2.4726, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3697589279546869e-05, |
|
"loss": 2.5471, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3663051737238377e-05, |
|
"loss": 2.5019, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.362851419492989e-05, |
|
"loss": 2.5326, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3593976652621401e-05, |
|
"loss": 2.5554, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.355943911031291e-05, |
|
"loss": 2.5263, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3524901568004422e-05, |
|
"loss": 2.4685, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.349036402569593e-05, |
|
"loss": 2.5834, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3455826483387443e-05, |
|
"loss": 2.4834, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3421288941078953e-05, |
|
"loss": 2.5581, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3386751398770463e-05, |
|
"loss": 2.4237, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3352213856461975e-05, |
|
"loss": 2.5372, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3317676314153486e-05, |
|
"loss": 2.5476, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3283138771844998e-05, |
|
"loss": 2.5046, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3248601229536506e-05, |
|
"loss": 2.5236, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3214063687228018e-05, |
|
"loss": 2.4282, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.317952614491953e-05, |
|
"loss": 2.5041, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3144988602611039e-05, |
|
"loss": 2.5305, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3110451060302551e-05, |
|
"loss": 2.5322, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.307591351799406e-05, |
|
"loss": 2.443, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.3041375975685571e-05, |
|
"loss": 2.5025, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.300683843337708e-05, |
|
"loss": 2.6194, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2972300891068592e-05, |
|
"loss": 2.5422, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2937763348760104e-05, |
|
"loss": 2.487, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 2.5176, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2868688264143125e-05, |
|
"loss": 2.4831, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2834150721834633e-05, |
|
"loss": 2.4596, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2799613179526145e-05, |
|
"loss": 2.5229, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2765075637217656e-05, |
|
"loss": 2.4673, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2730538094909166e-05, |
|
"loss": 2.4803, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2696000552600678e-05, |
|
"loss": 2.4534, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2661463010292188e-05, |
|
"loss": 2.4959, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.26269254679837e-05, |
|
"loss": 2.523, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2592387925675209e-05, |
|
"loss": 2.5406, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2557850383366721e-05, |
|
"loss": 2.4748, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.252331284105823e-05, |
|
"loss": 2.5003, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2488775298749742e-05, |
|
"loss": 2.5308, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2454237756441252e-05, |
|
"loss": 2.5162, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2419700214132762e-05, |
|
"loss": 2.4429, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2385162671824274e-05, |
|
"loss": 2.5054, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2350625129515785e-05, |
|
"loss": 2.427, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2316087587207295e-05, |
|
"loss": 2.4848, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2281550044898805e-05, |
|
"loss": 2.4365, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2247012502590316e-05, |
|
"loss": 2.5547, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2212474960281826e-05, |
|
"loss": 2.5358, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2177937417973338e-05, |
|
"loss": 2.4882, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2143399875664848e-05, |
|
"loss": 2.5398, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2108862333356358e-05, |
|
"loss": 2.426, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2074324791047869e-05, |
|
"loss": 2.5536, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.203978724873938e-05, |
|
"loss": 2.4513, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2005249706430891e-05, |
|
"loss": 2.5256, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1970712164122401e-05, |
|
"loss": 2.5244, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1936174621813913e-05, |
|
"loss": 2.5293, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1901637079505424e-05, |
|
"loss": 2.4588, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1867099537196934e-05, |
|
"loss": 2.4325, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1832561994888444e-05, |
|
"loss": 2.4101, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1798024452579955e-05, |
|
"loss": 2.4884, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1763486910271465e-05, |
|
"loss": 2.5393, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1728949367962977e-05, |
|
"loss": 2.527, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1694411825654487e-05, |
|
"loss": 2.5476, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1659874283345998e-05, |
|
"loss": 2.5215, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1625336741037508e-05, |
|
"loss": 2.4547, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1590799198729018e-05, |
|
"loss": 2.3536, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1556261656420529e-05, |
|
"loss": 2.4102, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1521724114112039e-05, |
|
"loss": 2.5389, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1487186571803551e-05, |
|
"loss": 2.4363, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1452649029495061e-05, |
|
"loss": 2.5441, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1418111487186573e-05, |
|
"loss": 2.4774, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1383573944878084e-05, |
|
"loss": 2.4745, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1349036402569594e-05, |
|
"loss": 2.4728, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1314498860261104e-05, |
|
"loss": 2.5576, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1279961317952616e-05, |
|
"loss": 2.5435, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1245423775644127e-05, |
|
"loss": 2.4142, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1210886233335637e-05, |
|
"loss": 2.4979, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1176348691027147e-05, |
|
"loss": 2.582, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1141811148718657e-05, |
|
"loss": 2.5088, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1107273606410168e-05, |
|
"loss": 2.4736, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.107273606410168e-05, |
|
"loss": 2.5025, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.103819852179319e-05, |
|
"loss": 2.3659, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.10036609794847e-05, |
|
"loss": 2.4489, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.096912343717621e-05, |
|
"loss": 2.4468, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0934585894867721e-05, |
|
"loss": 2.4301, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0900048352559231e-05, |
|
"loss": 2.5407, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0865510810250742e-05, |
|
"loss": 2.5414, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0830973267942254e-05, |
|
"loss": 2.5377, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0796435725633764e-05, |
|
"loss": 2.3887, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0761898183325276e-05, |
|
"loss": 2.4716, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0727360641016786e-05, |
|
"loss": 2.3875, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0692823098708297e-05, |
|
"loss": 2.4645, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0658285556399807e-05, |
|
"loss": 2.4588, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0623748014091319e-05, |
|
"loss": 2.4394, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.058921047178283e-05, |
|
"loss": 2.4167, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.055467292947434e-05, |
|
"loss": 2.4317, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.052013538716585e-05, |
|
"loss": 2.46, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.048559784485736e-05, |
|
"loss": 2.5175, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.045106030254887e-05, |
|
"loss": 2.3986, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0416522760240381e-05, |
|
"loss": 2.4969, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0381985217931893e-05, |
|
"loss": 2.3781, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0347447675623403e-05, |
|
"loss": 2.4569, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0312910133314914e-05, |
|
"loss": 2.3831, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0278372591006424e-05, |
|
"loss": 2.4431, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0243835048697934e-05, |
|
"loss": 2.5117, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0209297506389444e-05, |
|
"loss": 2.5629, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0174759964080956e-05, |
|
"loss": 2.437, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0140222421772467e-05, |
|
"loss": 2.5089, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0105684879463979e-05, |
|
"loss": 2.5891, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0071147337155489e-05, |
|
"loss": 2.4592, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0036609794847e-05, |
|
"loss": 2.4947, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.000207225253851e-05, |
|
"loss": 2.3656, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.96753471023002e-06, |
|
"loss": 2.3021, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.932997167921532e-06, |
|
"loss": 2.5248, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.898459625613042e-06, |
|
"loss": 2.3746, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.863922083304553e-06, |
|
"loss": 2.4837, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.829384540996063e-06, |
|
"loss": 2.3482, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.794846998687573e-06, |
|
"loss": 2.3631, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.760309456379084e-06, |
|
"loss": 2.3923, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.725771914070596e-06, |
|
"loss": 2.465, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.691234371762106e-06, |
|
"loss": 2.5215, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.656696829453616e-06, |
|
"loss": 2.4189, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.622159287145127e-06, |
|
"loss": 2.4592, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.587621744836637e-06, |
|
"loss": 2.5317, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.553084202528149e-06, |
|
"loss": 2.5719, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.51854666021966e-06, |
|
"loss": 2.4286, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.48400911791117e-06, |
|
"loss": 2.3491, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.449471575602682e-06, |
|
"loss": 2.5172, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.414934033294192e-06, |
|
"loss": 2.4466, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.380396490985702e-06, |
|
"loss": 2.4197, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.345858948677212e-06, |
|
"loss": 2.5182, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.311321406368723e-06, |
|
"loss": 2.5291, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.276783864060235e-06, |
|
"loss": 2.5132, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.242246321751745e-06, |
|
"loss": 2.4365, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.207708779443255e-06, |
|
"loss": 2.5625, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.173171237134766e-06, |
|
"loss": 2.4663, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.138633694826276e-06, |
|
"loss": 2.5276, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.104096152517786e-06, |
|
"loss": 2.4061, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.069558610209298e-06, |
|
"loss": 2.4691, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.035021067900809e-06, |
|
"loss": 2.4038, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.000483525592319e-06, |
|
"loss": 2.4281, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.96594598328383e-06, |
|
"loss": 2.4752, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.93140844097534e-06, |
|
"loss": 2.3682, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.896870898666852e-06, |
|
"loss": 2.4398, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.862333356358362e-06, |
|
"loss": 2.4714, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.827795814049874e-06, |
|
"loss": 2.4158, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.793258271741384e-06, |
|
"loss": 2.4668, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.758720729432895e-06, |
|
"loss": 2.3691, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.724183187124405e-06, |
|
"loss": 2.3835, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.689645644815915e-06, |
|
"loss": 2.4404, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.655108102507426e-06, |
|
"loss": 2.4065, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.620570560198938e-06, |
|
"loss": 2.4134, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.586033017890448e-06, |
|
"loss": 2.3334, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.551495475581958e-06, |
|
"loss": 2.4672, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.516957933273469e-06, |
|
"loss": 2.3859, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.482420390964979e-06, |
|
"loss": 2.4169, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.44788284865649e-06, |
|
"loss": 2.4103, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.413345306348e-06, |
|
"loss": 2.492, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.378807764039511e-06, |
|
"loss": 2.4176, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.344270221731022e-06, |
|
"loss": 2.4864, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.309732679422532e-06, |
|
"loss": 2.412, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.275195137114042e-06, |
|
"loss": 2.2933, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.240657594805554e-06, |
|
"loss": 2.3668, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.206120052497065e-06, |
|
"loss": 2.4555, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.171582510188577e-06, |
|
"loss": 2.3584, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.137044967880087e-06, |
|
"loss": 2.3607, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.102507425571597e-06, |
|
"loss": 2.4085, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.067969883263108e-06, |
|
"loss": 2.3381, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.033432340954618e-06, |
|
"loss": 2.4618, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.998894798646128e-06, |
|
"loss": 2.3399, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.96435725633764e-06, |
|
"loss": 2.4523, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.92981971402915e-06, |
|
"loss": 2.387, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.895282171720661e-06, |
|
"loss": 2.3244, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.860744629412171e-06, |
|
"loss": 2.4542, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.826207087103682e-06, |
|
"loss": 2.4163, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.791669544795192e-06, |
|
"loss": 2.4133, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.757132002486702e-06, |
|
"loss": 2.5119, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.722594460178214e-06, |
|
"loss": 2.398, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.688056917869725e-06, |
|
"loss": 2.3494, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.653519375561235e-06, |
|
"loss": 2.3333, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.618981833252746e-06, |
|
"loss": 2.3259, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.584444290944256e-06, |
|
"loss": 2.3831, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.549906748635767e-06, |
|
"loss": 2.3466, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.515369206327279e-06, |
|
"loss": 2.3265, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.480831664018789e-06, |
|
"loss": 2.5178, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.446294121710299e-06, |
|
"loss": 2.434, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.4117565794018105e-06, |
|
"loss": 2.373, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.377219037093321e-06, |
|
"loss": 2.4323, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.342681494784831e-06, |
|
"loss": 2.4116, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.308143952476341e-06, |
|
"loss": 2.4483, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.273606410167853e-06, |
|
"loss": 2.316, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.239068867859364e-06, |
|
"loss": 2.4437, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.204531325550874e-06, |
|
"loss": 2.2788, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.169993783242384e-06, |
|
"loss": 2.3947, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.1354562409338955e-06, |
|
"loss": 2.3904, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.100918698625406e-06, |
|
"loss": 2.3955, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.066381156316918e-06, |
|
"loss": 2.3703, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.031843614008428e-06, |
|
"loss": 2.3459, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.9973060716999385e-06, |
|
"loss": 2.3317, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.962768529391449e-06, |
|
"loss": 2.4926, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.928230987082959e-06, |
|
"loss": 2.3021, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.8936934447744694e-06, |
|
"loss": 2.4277, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.85915590246598e-06, |
|
"loss": 2.4472, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.824618360157492e-06, |
|
"loss": 2.3815, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.790080817849002e-06, |
|
"loss": 2.4714, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.755543275540513e-06, |
|
"loss": 2.3671, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.7210057332320235e-06, |
|
"loss": 2.3541, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.686468190923534e-06, |
|
"loss": 2.3399, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.651930648615044e-06, |
|
"loss": 2.4081, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.617393106306556e-06, |
|
"loss": 2.4018, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.5828555639980665e-06, |
|
"loss": 2.4288, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.548318021689577e-06, |
|
"loss": 2.3876, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.513780479381087e-06, |
|
"loss": 2.3145, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.479242937072598e-06, |
|
"loss": 2.4065, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.444705394764109e-06, |
|
"loss": 2.4055, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.410167852455621e-06, |
|
"loss": 2.3118, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.375630310147131e-06, |
|
"loss": 2.36, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.341092767838641e-06, |
|
"loss": 2.3578, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.3065552255301516e-06, |
|
"loss": 2.3756, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.272017683221662e-06, |
|
"loss": 2.4196, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.237480140913173e-06, |
|
"loss": 2.4409, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.202942598604683e-06, |
|
"loss": 2.3316, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.1684050562961945e-06, |
|
"loss": 2.306, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.133867513987706e-06, |
|
"loss": 2.3856, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.099329971679216e-06, |
|
"loss": 2.3944, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.064792429370726e-06, |
|
"loss": 2.4568, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.030254887062237e-06, |
|
"loss": 2.3327, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.995717344753748e-06, |
|
"loss": 2.3498, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.961179802445258e-06, |
|
"loss": 2.4181, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.926642260136768e-06, |
|
"loss": 2.398, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.89210471782828e-06, |
|
"loss": 2.4317, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.85756717551979e-06, |
|
"loss": 2.4137, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.823029633211301e-06, |
|
"loss": 2.4382, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.788492090902812e-06, |
|
"loss": 2.3696, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.7539545485943225e-06, |
|
"loss": 2.4141, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.719417006285833e-06, |
|
"loss": 2.3535, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.684879463977344e-06, |
|
"loss": 2.408, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.650341921668854e-06, |
|
"loss": 2.354, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.615804379360365e-06, |
|
"loss": 2.4954, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.581266837051876e-06, |
|
"loss": 2.4316, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.546729294743386e-06, |
|
"loss": 2.3022, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.512191752434897e-06, |
|
"loss": 2.2843, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.477654210126408e-06, |
|
"loss": 2.4304, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.443116667817919e-06, |
|
"loss": 2.2816, |
|
"step": 14510 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.408579125509429e-06, |
|
"loss": 2.3768, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.374041583200939e-06, |
|
"loss": 2.4476, |
|
"step": 14530 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.3395040408924506e-06, |
|
"loss": 2.439, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.304966498583961e-06, |
|
"loss": 2.3349, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.270428956275471e-06, |
|
"loss": 2.542, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.235891413966982e-06, |
|
"loss": 2.3421, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.201353871658493e-06, |
|
"loss": 2.3747, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.166816329350004e-06, |
|
"loss": 2.3729, |
|
"step": 14590 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.132278787041515e-06, |
|
"loss": 2.3492, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.097741244733025e-06, |
|
"loss": 2.2954, |
|
"step": 14610 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.063203702424536e-06, |
|
"loss": 2.3548, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.028666160116046e-06, |
|
"loss": 2.2816, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.994128617807557e-06, |
|
"loss": 2.4028, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.959591075499067e-06, |
|
"loss": 2.3788, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.925053533190578e-06, |
|
"loss": 2.4394, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.890515990882089e-06, |
|
"loss": 2.2951, |
|
"step": 14670 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.8559784485736e-06, |
|
"loss": 2.2777, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.82144090626511e-06, |
|
"loss": 2.3185, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.7869033639566215e-06, |
|
"loss": 2.4007, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.752365821648132e-06, |
|
"loss": 2.3751, |
|
"step": 14710 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.717828279339642e-06, |
|
"loss": 2.3223, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.683290737031153e-06, |
|
"loss": 2.3298, |
|
"step": 14730 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.648753194722664e-06, |
|
"loss": 2.3654, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.614215652414174e-06, |
|
"loss": 2.4975, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.579678110105685e-06, |
|
"loss": 2.4081, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.545140567797196e-06, |
|
"loss": 2.3517, |
|
"step": 14770 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.510603025488707e-06, |
|
"loss": 2.4114, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.476065483180217e-06, |
|
"loss": 2.2955, |
|
"step": 14790 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.441527940871728e-06, |
|
"loss": 2.2924, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.406990398563238e-06, |
|
"loss": 2.3289, |
|
"step": 14810 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.372452856254749e-06, |
|
"loss": 2.4274, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.33791531394626e-06, |
|
"loss": 2.2682, |
|
"step": 14830 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.30337777163777e-06, |
|
"loss": 2.3307, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2688402293292805e-06, |
|
"loss": 2.3643, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.234302687020792e-06, |
|
"loss": 2.4233, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.199765144712303e-06, |
|
"loss": 2.4175, |
|
"step": 14870 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.165227602403813e-06, |
|
"loss": 2.3321, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.130690060095324e-06, |
|
"loss": 2.4373, |
|
"step": 14890 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.096152517786835e-06, |
|
"loss": 2.3562, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.061614975478345e-06, |
|
"loss": 2.3348, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.027077433169856e-06, |
|
"loss": 2.3721, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.992539890861366e-06, |
|
"loss": 2.3379, |
|
"step": 14930 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.958002348552877e-06, |
|
"loss": 2.3698, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.923464806244388e-06, |
|
"loss": 2.4562, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.888927263935899e-06, |
|
"loss": 2.4839, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.854389721627409e-06, |
|
"loss": 2.3623, |
|
"step": 14970 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.81985217931892e-06, |
|
"loss": 2.3421, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.785314637010431e-06, |
|
"loss": 2.3138, |
|
"step": 14990 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.750777094701941e-06, |
|
"loss": 2.3843, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7162395523934515e-06, |
|
"loss": 2.3591, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6817020100849626e-06, |
|
"loss": 2.3636, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6471644677764734e-06, |
|
"loss": 2.4267, |
|
"step": 15030 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6126269254679837e-06, |
|
"loss": 2.373, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.578089383159495e-06, |
|
"loss": 2.5028, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.543551840851005e-06, |
|
"loss": 2.3799, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.509014298542516e-06, |
|
"loss": 2.4805, |
|
"step": 15070 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.474476756234026e-06, |
|
"loss": 2.2899, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.4399392139255374e-06, |
|
"loss": 2.3221, |
|
"step": 15090 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.4054016716170477e-06, |
|
"loss": 2.3465, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3708641293085584e-06, |
|
"loss": 2.3942, |
|
"step": 15110 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3363265870000696e-06, |
|
"loss": 2.4674, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.30178904469158e-06, |
|
"loss": 2.4301, |
|
"step": 15130 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.2672515023830902e-06, |
|
"loss": 2.4088, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.2327139600746014e-06, |
|
"loss": 2.3602, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.198176417766112e-06, |
|
"loss": 2.3013, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.1636388754576224e-06, |
|
"loss": 2.3958, |
|
"step": 15170 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.1291013331491336e-06, |
|
"loss": 2.3123, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.094563790840644e-06, |
|
"loss": 2.3162, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.0600262485321547e-06, |
|
"loss": 2.3614, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0254887062236654e-06, |
|
"loss": 2.3247, |
|
"step": 15210 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.990951163915176e-06, |
|
"loss": 2.3145, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9564136216066864e-06, |
|
"loss": 2.3681, |
|
"step": 15230 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.921876079298197e-06, |
|
"loss": 2.2981, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.887338536989708e-06, |
|
"loss": 2.346, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8528009946812187e-06, |
|
"loss": 2.4164, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8182634523727294e-06, |
|
"loss": 2.4174, |
|
"step": 15270 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7837259100642397e-06, |
|
"loss": 2.3599, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7491883677557505e-06, |
|
"loss": 2.278, |
|
"step": 15290 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7146508254472616e-06, |
|
"loss": 2.3583, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.680113283138772e-06, |
|
"loss": 2.4094, |
|
"step": 15310 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6455757408302827e-06, |
|
"loss": 2.4016, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6110381985217934e-06, |
|
"loss": 2.3733, |
|
"step": 15330 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.5765006562133037e-06, |
|
"loss": 2.3657, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.541963113904815e-06, |
|
"loss": 2.3585, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.507425571596325e-06, |
|
"loss": 2.3034, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.472888029287836e-06, |
|
"loss": 2.399, |
|
"step": 15370 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4383504869793467e-06, |
|
"loss": 2.3786, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4038129446708574e-06, |
|
"loss": 2.3629, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.369275402362368e-06, |
|
"loss": 2.3671, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.334737860053879e-06, |
|
"loss": 2.2268, |
|
"step": 15410 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.300200317745389e-06, |
|
"loss": 2.3121, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2656627754369e-06, |
|
"loss": 2.3288, |
|
"step": 15430 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2311252331284107e-06, |
|
"loss": 2.3856, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1965876908199214e-06, |
|
"loss": 2.3113, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.162050148511432e-06, |
|
"loss": 2.2406, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1275126062029425e-06, |
|
"loss": 2.353, |
|
"step": 15470 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0929750638944532e-06, |
|
"loss": 2.3918, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.058437521585964e-06, |
|
"loss": 2.4184, |
|
"step": 15490 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0238999792774747e-06, |
|
"loss": 2.3828, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9893624369689854e-06, |
|
"loss": 2.4124, |
|
"step": 15510 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9548248946604957e-06, |
|
"loss": 2.3288, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.920287352352007e-06, |
|
"loss": 2.2583, |
|
"step": 15530 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8857498100435174e-06, |
|
"loss": 2.4088, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.851212267735028e-06, |
|
"loss": 2.2754, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8166747254265387e-06, |
|
"loss": 2.3621, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7821371831180492e-06, |
|
"loss": 2.3717, |
|
"step": 15570 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.74759964080956e-06, |
|
"loss": 2.4536, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.713062098501071e-06, |
|
"loss": 2.3785, |
|
"step": 15590 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6785245561925812e-06, |
|
"loss": 2.2907, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6439870138840922e-06, |
|
"loss": 2.3086, |
|
"step": 15610 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.609449471575603e-06, |
|
"loss": 2.358, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5749119292671134e-06, |
|
"loss": 2.3363, |
|
"step": 15630 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5403743869586242e-06, |
|
"loss": 2.3688, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5058368446501347e-06, |
|
"loss": 2.3412, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4712993023416455e-06, |
|
"loss": 2.3828, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.436761760033156e-06, |
|
"loss": 2.2863, |
|
"step": 15670 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.402224217724667e-06, |
|
"loss": 2.5111, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.3676866754161775e-06, |
|
"loss": 2.4224, |
|
"step": 15690 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.3331491331076882e-06, |
|
"loss": 2.4222, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2986115907991987e-06, |
|
"loss": 2.3345, |
|
"step": 15710 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2640740484907095e-06, |
|
"loss": 2.2492, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2295365061822202e-06, |
|
"loss": 2.4186, |
|
"step": 15730 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1949989638737307e-06, |
|
"loss": 2.328, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1604614215652415e-06, |
|
"loss": 2.3137, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1259238792567522e-06, |
|
"loss": 2.3326, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.091386336948263e-06, |
|
"loss": 2.3862, |
|
"step": 15770 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0568487946397735e-06, |
|
"loss": 2.3108, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.022311252331284e-06, |
|
"loss": 2.26, |
|
"step": 15790 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.87773710022795e-07, |
|
"loss": 2.4293, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.532361677143055e-07, |
|
"loss": 2.3243, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.186986254058161e-07, |
|
"loss": 2.3935, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.841610830973267e-07, |
|
"loss": 2.3129, |
|
"step": 15830 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.496235407888376e-07, |
|
"loss": 2.3353, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.150859984803482e-07, |
|
"loss": 2.2725, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.805484561718589e-07, |
|
"loss": 2.378, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.460109138633696e-07, |
|
"loss": 2.3231, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.114733715548802e-07, |
|
"loss": 2.3203, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.769358292463909e-07, |
|
"loss": 2.3593, |
|
"step": 15890 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.423982869379015e-07, |
|
"loss": 2.3439, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 6.078607446294122e-07, |
|
"loss": 2.3775, |
|
"step": 15910 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.733232023209229e-07, |
|
"loss": 2.3511, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.387856600124336e-07, |
|
"loss": 2.3449, |
|
"step": 15930 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.042481177039442e-07, |
|
"loss": 2.3427, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.697105753954549e-07, |
|
"loss": 2.2693, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.3517303308696555e-07, |
|
"loss": 2.3398, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.006354907784763e-07, |
|
"loss": 2.2743, |
|
"step": 15970 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.6609794846998687e-07, |
|
"loss": 2.2086, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.3156040616149755e-07, |
|
"loss": 2.3393, |
|
"step": 15990 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.9702286385300824e-07, |
|
"loss": 2.3861, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.624853215445189e-07, |
|
"loss": 2.2871, |
|
"step": 16010 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.2794777923602959e-07, |
|
"loss": 2.3119, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.9341023692754024e-07, |
|
"loss": 2.385, |
|
"step": 16030 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5887269461905093e-07, |
|
"loss": 2.2872, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.243351523105616e-07, |
|
"loss": 2.3681, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.979761000207225e-08, |
|
"loss": 2.3491, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.526006769358292e-08, |
|
"loss": 2.3968, |
|
"step": 16070 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.0722525385093597e-08, |
|
"loss": 2.4475, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5851995594482614, |
|
"eval_loss": 1.9156352281570435, |
|
"eval_runtime": 8294.1564, |
|
"eval_samples_per_second": 9.195, |
|
"eval_steps_per_second": 0.287, |
|
"step": 16086 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 16086, |
|
"total_flos": 1.6352487334240263e+20, |
|
"train_loss": 3.7719367721047283, |
|
"train_runtime": 628821.6026, |
|
"train_samples_per_second": 3.275, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 16086, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.6352487334240263e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|