|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.01960784313726, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0003996, |
|
"loss": 9.2736, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0003992, |
|
"loss": 7.7626, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00039880000000000004, |
|
"loss": 7.5202, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00039840000000000003, |
|
"loss": 7.371, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000398, |
|
"loss": 7.2352, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003976, |
|
"loss": 7.1352, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0003972, |
|
"loss": 7.0625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0003968, |
|
"loss": 6.911, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00039640000000000004, |
|
"loss": 6.7717, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00039600000000000003, |
|
"loss": 6.6484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003956, |
|
"loss": 6.5492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0003952, |
|
"loss": 6.4417, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0003948, |
|
"loss": 6.3311, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0003944, |
|
"loss": 6.2206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00039400000000000004, |
|
"loss": 6.1179, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0003936, |
|
"loss": 6.0151, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0003932, |
|
"loss": 5.9035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0003928, |
|
"loss": 5.8222, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0003924, |
|
"loss": 5.7042, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.000392, |
|
"loss": 5.6265, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00039160000000000003, |
|
"loss": 5.5338, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0003912, |
|
"loss": 5.4521, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0003908, |
|
"loss": 5.3552, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0003904, |
|
"loss": 5.2771, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00039, |
|
"loss": 5.1587, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0003896, |
|
"loss": 5.0899, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00038920000000000003, |
|
"loss": 5.0191, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0003888, |
|
"loss": 4.9602, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0003884, |
|
"loss": 4.8366, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.000388, |
|
"loss": 4.7848, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0003876, |
|
"loss": 4.7199, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00038720000000000003, |
|
"loss": 4.6306, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0003868, |
|
"loss": 4.5337, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0003864, |
|
"loss": 4.4796, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.000386, |
|
"loss": 4.3881, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0003856, |
|
"loss": 4.2989, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0003852, |
|
"loss": 4.2533, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00038480000000000003, |
|
"loss": 4.2379, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0003844, |
|
"loss": 4.142, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.000384, |
|
"loss": 4.0793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0003836, |
|
"loss": 4.0005, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0003832, |
|
"loss": 3.9619, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0003828, |
|
"loss": 3.8687, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0003824, |
|
"loss": 3.8486, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.000382, |
|
"loss": 3.7684, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0003816, |
|
"loss": 3.7013, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0003812, |
|
"loss": 3.6851, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0003808, |
|
"loss": 3.6585, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0003804, |
|
"loss": 3.6172, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00038, |
|
"loss": 3.5557, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0003796, |
|
"loss": 3.4746, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0003792, |
|
"loss": 3.4473, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0003788, |
|
"loss": 3.3828, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0003784, |
|
"loss": 3.3868, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00037799999999999997, |
|
"loss": 3.31, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0003776, |
|
"loss": 3.2628, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0003772, |
|
"loss": 3.2541, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.0003768, |
|
"loss": 3.223, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0003764, |
|
"loss": 3.2028, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.000376, |
|
"loss": 3.1659, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.0003756, |
|
"loss": 3.0847, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0003752, |
|
"loss": 3.0215, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00037480000000000006, |
|
"loss": 3.0149, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00037440000000000005, |
|
"loss": 3.0177, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00037400000000000004, |
|
"loss": 2.9474, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00037360000000000003, |
|
"loss": 2.9245, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.0003732, |
|
"loss": 2.9218, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00037280000000000006, |
|
"loss": 2.8666, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00037240000000000005, |
|
"loss": 2.8821, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00037200000000000004, |
|
"loss": 2.8243, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00037160000000000003, |
|
"loss": 2.7753, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0003712, |
|
"loss": 2.7086, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0003708, |
|
"loss": 2.7104, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00037040000000000006, |
|
"loss": 2.7103, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00037000000000000005, |
|
"loss": 2.6707, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00036960000000000004, |
|
"loss": 2.6413, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00036920000000000003, |
|
"loss": 2.6359, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0003688, |
|
"loss": 2.5838, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0003684, |
|
"loss": 2.6212, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00036800000000000005, |
|
"loss": 2.5718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00036760000000000004, |
|
"loss": 2.5348, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00036720000000000004, |
|
"loss": 2.4195, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0003668, |
|
"loss": 2.4938, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0003664, |
|
"loss": 2.4372, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.000366, |
|
"loss": 2.4567, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.00036560000000000005, |
|
"loss": 2.4108, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00036520000000000004, |
|
"loss": 2.3993, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.00036480000000000003, |
|
"loss": 2.3739, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 0.0003644, |
|
"loss": 2.391, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.000364, |
|
"loss": 2.38, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.00036360000000000006, |
|
"loss": 2.3257, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.00036320000000000005, |
|
"loss": 2.1956, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00036280000000000004, |
|
"loss": 2.2944, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.0003624, |
|
"loss": 2.2163, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.000362, |
|
"loss": 2.2641, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.0003616, |
|
"loss": 2.2035, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.00036120000000000005, |
|
"loss": 2.188, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.00036080000000000004, |
|
"loss": 2.197, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00036040000000000003, |
|
"loss": 2.18, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00036, |
|
"loss": 2.1994, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0003596, |
|
"loss": 2.1538, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0003592, |
|
"loss": 2.0495, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.00035880000000000005, |
|
"loss": 2.0501, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.00035840000000000004, |
|
"loss": 2.0302, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.00035800000000000003, |
|
"loss": 2.0996, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 0.0003576, |
|
"loss": 2.0369, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.0003572, |
|
"loss": 1.9996, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.0003568, |
|
"loss": 2.0254, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.00035640000000000004, |
|
"loss": 2.0004, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 0.00035600000000000003, |
|
"loss": 2.0154, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.0003556, |
|
"loss": 2.0014, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.00035524, |
|
"loss": 1.9377, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.00035484000000000004, |
|
"loss": 1.8296, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 0.00035444000000000003, |
|
"loss": 1.9002, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 0.00035404, |
|
"loss": 1.9336, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 0.00035364, |
|
"loss": 1.9041, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 0.00035324, |
|
"loss": 1.8324, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.00035284, |
|
"loss": 1.8663, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 0.00035244000000000003, |
|
"loss": 1.8358, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 0.00035204, |
|
"loss": 1.8586, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00035164, |
|
"loss": 1.8452, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.00035124, |
|
"loss": 1.7957, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 0.00035084, |
|
"loss": 1.6603, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.00035044000000000004, |
|
"loss": 1.7903, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 0.00035004000000000003, |
|
"loss": 1.7574, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.00034964, |
|
"loss": 1.7823, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.00034924, |
|
"loss": 1.7038, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 0.00034884, |
|
"loss": 1.7186, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 0.00034844, |
|
"loss": 1.6948, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.00034804000000000004, |
|
"loss": 1.7347, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 0.00034764, |
|
"loss": 1.7304, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 0.00034724, |
|
"loss": 1.6862, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00034684, |
|
"loss": 1.4857, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.00034644, |
|
"loss": 1.6981, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.00034604, |
|
"loss": 1.5926, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00034564000000000003, |
|
"loss": 1.6545, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 0.00034524, |
|
"loss": 1.5918, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 0.00034484, |
|
"loss": 1.5743, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 0.00034444, |
|
"loss": 1.5867, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.00034404, |
|
"loss": 1.6042, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 0.00034364, |
|
"loss": 1.6308, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 0.00034324000000000003, |
|
"loss": 1.5813, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 0.00034284, |
|
"loss": 1.364, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 0.00034244, |
|
"loss": 1.5849, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 0.00034204, |
|
"loss": 1.4957, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 0.00034164, |
|
"loss": 1.5312, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.00034124, |
|
"loss": 1.4995, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 0.00034084, |
|
"loss": 1.4501, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 0.00034044, |
|
"loss": 1.4721, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.00034004, |
|
"loss": 1.4792, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.00033964, |
|
"loss": 1.513, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 0.00033924, |
|
"loss": 1.5028, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.00033884000000000003, |
|
"loss": 1.3511, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 0.00033844, |
|
"loss": 1.4152, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.00033804, |
|
"loss": 1.4216, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 0.00033764, |
|
"loss": 1.4177, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 0.00033724, |
|
"loss": 1.4074, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 0.00033684, |
|
"loss": 1.3532, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 0.00033644, |
|
"loss": 1.3525, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.00033604, |
|
"loss": 1.3755, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 0.00033564, |
|
"loss": 1.3989, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 0.00033524, |
|
"loss": 1.4012, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 0.00033484, |
|
"loss": 1.3386, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 0.00033444, |
|
"loss": 1.2476, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 0.00033404, |
|
"loss": 1.3435, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 0.00033364, |
|
"loss": 1.3131, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"learning_rate": 0.00033324, |
|
"loss": 1.3099, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 0.00033284, |
|
"loss": 1.2693, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 0.00033244, |
|
"loss": 1.2649, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.00033203999999999997, |
|
"loss": 1.2777, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 0.00033164, |
|
"loss": 1.2889, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.00033124, |
|
"loss": 1.2729, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.00033084, |
|
"loss": 1.2529, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 0.00033044, |
|
"loss": 1.1566, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 0.00033004, |
|
"loss": 1.2668, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 0.00032964, |
|
"loss": 1.2052, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 0.00032924, |
|
"loss": 1.2159, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 0.00032884000000000006, |
|
"loss": 1.2003, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 0.00032844000000000005, |
|
"loss": 1.1836, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 0.00032804000000000004, |
|
"loss": 1.1857, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 0.00032764000000000003, |
|
"loss": 1.2012, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 0.00032724, |
|
"loss": 1.1907, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 0.00032684, |
|
"loss": 1.1723, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 0.00032644000000000005, |
|
"loss": 1.0299, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 0.00032604000000000004, |
|
"loss": 1.1996, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 0.00032564000000000003, |
|
"loss": 1.0919, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 0.00032524, |
|
"loss": 1.1291, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 0.00032484, |
|
"loss": 1.1343, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 0.00032444000000000006, |
|
"loss": 1.0965, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 0.00032404000000000005, |
|
"loss": 1.1149, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 0.00032364000000000004, |
|
"loss": 1.132, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 0.00032324000000000003, |
|
"loss": 1.1218, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 0.00032284, |
|
"loss": 1.0928, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 0.00032244, |
|
"loss": 0.9377, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 0.00032204000000000005, |
|
"loss": 1.1132, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 0.00032164000000000004, |
|
"loss": 1.0275, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 0.00032124000000000003, |
|
"loss": 1.0312, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 0.00032084, |
|
"loss": 1.0248, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 0.00032044, |
|
"loss": 1.0063, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 0.00032004, |
|
"loss": 1.0438, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 0.00031964000000000005, |
|
"loss": 1.0608, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 0.00031924000000000004, |
|
"loss": 1.0503, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 19.9, |
|
"learning_rate": 0.00031884000000000003, |
|
"loss": 1.0422, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00031844, |
|
"loss": 0.9331, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 20.1, |
|
"learning_rate": 0.00031804, |
|
"loss": 0.9744, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 0.00031768000000000003, |
|
"loss": 0.9977, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 0.00031728, |
|
"loss": 0.9579, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 20.39, |
|
"learning_rate": 0.00031688, |
|
"loss": 0.908, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"learning_rate": 0.00031648, |
|
"loss": 0.9323, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 20.59, |
|
"learning_rate": 0.00031608, |
|
"loss": 0.9597, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 0.00031568000000000004, |
|
"loss": 0.975, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 20.78, |
|
"learning_rate": 0.00031528000000000003, |
|
"loss": 0.9676, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 0.00031488, |
|
"loss": 0.979, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 0.00031448, |
|
"loss": 0.9263, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 0.00031408, |
|
"loss": 0.8463, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 21.18, |
|
"learning_rate": 0.00031368, |
|
"loss": 0.9642, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"learning_rate": 0.00031328000000000004, |
|
"loss": 0.8091, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 0.00031288, |
|
"loss": 0.7911, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"learning_rate": 0.00031248, |
|
"loss": 0.8679, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 0.00031208, |
|
"loss": 0.893, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 0.00031168, |
|
"loss": 0.9048, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 0.00031128000000000004, |
|
"loss": 0.9283, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 0.00031088000000000003, |
|
"loss": 0.9238, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 0.00031048, |
|
"loss": 0.8698, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"learning_rate": 0.00031008, |
|
"loss": 0.794, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"learning_rate": 0.00030968, |
|
"loss": 0.8646, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 0.00030928, |
|
"loss": 0.7995, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"learning_rate": 0.00030888000000000004, |
|
"loss": 0.8045, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"learning_rate": 0.00030848000000000003, |
|
"loss": 0.8186, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 0.00030808, |
|
"loss": 0.8085, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 22.65, |
|
"learning_rate": 0.00030768, |
|
"loss": 0.8098, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 0.00030728, |
|
"loss": 0.8741, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"learning_rate": 0.00030688, |
|
"loss": 0.8602, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"learning_rate": 0.00030648000000000003, |
|
"loss": 0.7975, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 0.00030608, |
|
"loss": 0.6979, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"learning_rate": 0.00030568, |
|
"loss": 0.8089, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 0.00030528, |
|
"loss": 0.7418, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 0.00030488, |
|
"loss": 0.752, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"learning_rate": 0.00030448, |
|
"loss": 0.7302, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 0.00030408000000000003, |
|
"loss": 0.7431, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 23.63, |
|
"learning_rate": 0.00030368, |
|
"loss": 0.7443, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 0.00030328, |
|
"loss": 0.7954, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"learning_rate": 0.00030288, |
|
"loss": 0.8135, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 0.00030248, |
|
"loss": 0.7517, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 0.00030208, |
|
"loss": 0.6313, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"learning_rate": 0.00030168, |
|
"loss": 0.7276, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 24.22, |
|
"learning_rate": 0.00030128, |
|
"loss": 0.6859, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 0.00030088, |
|
"loss": 0.6701, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"learning_rate": 0.00030048, |
|
"loss": 0.6492, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 24.51, |
|
"learning_rate": 0.00030008, |
|
"loss": 0.7006, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.61, |
|
"learning_rate": 0.00029968000000000003, |
|
"loss": 0.7105, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"learning_rate": 0.00029928, |
|
"loss": 0.7175, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.00029888, |
|
"loss": 0.7297, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"learning_rate": 0.00029848, |
|
"loss": 0.7295, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.00029808, |
|
"loss": 0.6337, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"learning_rate": 0.00029768, |
|
"loss": 0.6452, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 0.00029728, |
|
"loss": 0.6282, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 0.00029688, |
|
"loss": 0.6218, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 0.00029648, |
|
"loss": 0.5982, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"learning_rate": 0.00029608, |
|
"loss": 0.6601, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 25.59, |
|
"learning_rate": 0.00029568, |
|
"loss": 0.6761, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 25.69, |
|
"learning_rate": 0.00029528, |
|
"loss": 0.6685, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 25.78, |
|
"learning_rate": 0.00029488, |
|
"loss": 0.6506, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 0.00029448, |
|
"loss": 0.6804, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"learning_rate": 0.00029408, |
|
"loss": 0.6387, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 26.08, |
|
"learning_rate": 0.00029368, |
|
"loss": 0.5516, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"learning_rate": 0.00029328, |
|
"loss": 0.5891, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 0.00029288, |
|
"loss": 0.5907, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 0.00029248, |
|
"loss": 0.5559, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"learning_rate": 0.00029208, |
|
"loss": 0.5942, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 0.00029168, |
|
"loss": 0.625, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 0.00029128, |
|
"loss": 0.621, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 26.76, |
|
"learning_rate": 0.00029088, |
|
"loss": 0.5987, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 0.00029047999999999997, |
|
"loss": 0.6137, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 0.00029008, |
|
"loss": 0.5928, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 27.06, |
|
"learning_rate": 0.00028968, |
|
"loss": 0.5423, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"learning_rate": 0.00028928, |
|
"loss": 0.5484, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 27.25, |
|
"learning_rate": 0.00028888, |
|
"loss": 0.563, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"learning_rate": 0.00028848, |
|
"loss": 0.5161, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 0.00028808, |
|
"loss": 0.5432, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 27.55, |
|
"learning_rate": 0.00028768, |
|
"loss": 0.5607, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 27.65, |
|
"learning_rate": 0.00028728, |
|
"loss": 0.557, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"learning_rate": 0.00028688, |
|
"loss": 0.5698, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 0.00028648, |
|
"loss": 0.5571, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 27.94, |
|
"learning_rate": 0.00028607999999999997, |
|
"loss": 0.5525, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 0.00028568, |
|
"loss": 0.5017, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"learning_rate": 0.00028528, |
|
"loss": 0.5348, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 0.00028488000000000005, |
|
"loss": 0.4948, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 0.00028448000000000004, |
|
"loss": 0.4791, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 28.43, |
|
"learning_rate": 0.00028408000000000003, |
|
"loss": 0.4908, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"learning_rate": 0.00028368, |
|
"loss": 0.4943, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"learning_rate": 0.00028328, |
|
"loss": 0.4995, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 28.73, |
|
"learning_rate": 0.00028288000000000006, |
|
"loss": 0.5252, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 28.82, |
|
"learning_rate": 0.00028248000000000005, |
|
"loss": 0.5126, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 0.00028208000000000004, |
|
"loss": 0.5077, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 0.00028168000000000003, |
|
"loss": 0.4462, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 0.00028128, |
|
"loss": 0.4924, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 29.22, |
|
"learning_rate": 0.00028088, |
|
"loss": 0.4502, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 29.31, |
|
"learning_rate": 0.00028048000000000005, |
|
"loss": 0.4494, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 0.00028008000000000004, |
|
"loss": 0.4389, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 0.00027968000000000003, |
|
"loss": 0.4506, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"learning_rate": 0.00027928, |
|
"loss": 0.4621, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 0.00027888, |
|
"loss": 0.4678, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 0.00027848, |
|
"loss": 0.4587, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 29.9, |
|
"learning_rate": 0.00027808000000000005, |
|
"loss": 0.4719, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.00027768000000000004, |
|
"loss": 0.4271, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 30.1, |
|
"learning_rate": 0.00027728000000000003, |
|
"loss": 0.4325, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 30.2, |
|
"learning_rate": 0.00027688, |
|
"loss": 0.4136, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 30.29, |
|
"learning_rate": 0.00027648, |
|
"loss": 0.4362, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 30.39, |
|
"learning_rate": 0.00027608000000000005, |
|
"loss": 0.4099, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"learning_rate": 0.00027568000000000004, |
|
"loss": 0.4238, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 30.59, |
|
"learning_rate": 0.00027528000000000003, |
|
"loss": 0.4295, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 30.69, |
|
"learning_rate": 0.00027488, |
|
"loss": 0.4264, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 30.78, |
|
"learning_rate": 0.00027448, |
|
"loss": 0.417, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 30.88, |
|
"learning_rate": 0.00027408, |
|
"loss": 0.4304, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"learning_rate": 0.00027368000000000005, |
|
"loss": 0.4272, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 31.08, |
|
"learning_rate": 0.00027328000000000004, |
|
"loss": 0.3643, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 31.18, |
|
"learning_rate": 0.00027288000000000003, |
|
"loss": 0.3797, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 31.27, |
|
"learning_rate": 0.00027248, |
|
"loss": 0.4051, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"learning_rate": 0.00027208, |
|
"loss": 0.3869, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 0.00027168, |
|
"loss": 0.3997, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 31.57, |
|
"learning_rate": 0.00027128000000000005, |
|
"loss": 0.4037, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"learning_rate": 0.00027088000000000004, |
|
"loss": 0.385, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 31.76, |
|
"learning_rate": 0.00027048, |
|
"loss": 0.3919, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 0.00027008, |
|
"loss": 0.3823, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 31.96, |
|
"learning_rate": 0.00026968, |
|
"loss": 0.382, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 32.06, |
|
"learning_rate": 0.00026928, |
|
"loss": 0.349, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 32.16, |
|
"learning_rate": 0.00026888000000000004, |
|
"loss": 0.3416, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"learning_rate": 0.00026848000000000003, |
|
"loss": 0.3733, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 32.35, |
|
"learning_rate": 0.00026808, |
|
"loss": 0.3589, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 0.00026768, |
|
"loss": 0.3697, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"learning_rate": 0.00026728, |
|
"loss": 0.3787, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 32.65, |
|
"learning_rate": 0.00026688, |
|
"loss": 0.3564, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"learning_rate": 0.00026648000000000004, |
|
"loss": 0.3636, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 32.84, |
|
"learning_rate": 0.00026608000000000003, |
|
"loss": 0.3519, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"learning_rate": 0.00026568, |
|
"loss": 0.3526, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 0.00026528, |
|
"loss": 0.3287, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 33.14, |
|
"learning_rate": 0.00026488, |
|
"loss": 0.3138, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 33.24, |
|
"learning_rate": 0.00026448000000000004, |
|
"loss": 0.3274, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00026408000000000003, |
|
"loss": 0.3302, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 33.43, |
|
"learning_rate": 0.00026368, |
|
"loss": 0.3327, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 33.53, |
|
"learning_rate": 0.00026328, |
|
"loss": 0.3383, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 33.63, |
|
"learning_rate": 0.00026288, |
|
"loss": 0.3343, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 33.73, |
|
"learning_rate": 0.00026248, |
|
"loss": 0.3375, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 33.82, |
|
"learning_rate": 0.00026208000000000004, |
|
"loss": 0.3378, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 0.00026168000000000003, |
|
"loss": 0.3298, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 0.00026128, |
|
"loss": 0.2956, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 34.12, |
|
"learning_rate": 0.00026088, |
|
"loss": 0.295, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 34.22, |
|
"learning_rate": 0.00026048, |
|
"loss": 0.2847, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 34.31, |
|
"learning_rate": 0.00026008, |
|
"loss": 0.2993, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 34.41, |
|
"learning_rate": 0.00025968000000000003, |
|
"loss": 0.2974, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 0.00025928, |
|
"loss": 0.3076, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 0.00025888, |
|
"loss": 0.3146, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 34.71, |
|
"learning_rate": 0.00025848, |
|
"loss": 0.3124, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 0.00025808, |
|
"loss": 0.3072, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 34.9, |
|
"learning_rate": 0.00025768, |
|
"loss": 0.3078, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 0.00025728000000000003, |
|
"loss": 0.2738, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 35.1, |
|
"learning_rate": 0.00025688, |
|
"loss": 0.2736, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 0.00025648, |
|
"loss": 0.257, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 0.00025608, |
|
"loss": 0.2671, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 35.39, |
|
"learning_rate": 0.00025568, |
|
"loss": 0.2638, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 35.49, |
|
"learning_rate": 0.00025528, |
|
"loss": 0.2707, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 35.59, |
|
"learning_rate": 0.00025488, |
|
"loss": 0.2795, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"learning_rate": 0.00025448, |
|
"loss": 0.2765, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 35.78, |
|
"learning_rate": 0.00025408, |
|
"loss": 0.2757, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 35.88, |
|
"learning_rate": 0.00025368, |
|
"loss": 0.2779, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 35.98, |
|
"learning_rate": 0.00025328, |
|
"loss": 0.2628, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 36.08, |
|
"learning_rate": 0.00025288000000000003, |
|
"loss": 0.2403, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 36.18, |
|
"learning_rate": 0.00025248, |
|
"loss": 0.2361, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 0.00025208, |
|
"loss": 0.2394, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 36.37, |
|
"learning_rate": 0.00025168, |
|
"loss": 0.2309, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 36.47, |
|
"learning_rate": 0.00025128, |
|
"loss": 0.233, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 36.57, |
|
"learning_rate": 0.00025088, |
|
"loss": 0.2446, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"learning_rate": 0.00025048000000000003, |
|
"loss": 0.2385, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 36.76, |
|
"learning_rate": 0.00025008, |
|
"loss": 0.239, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.00024968, |
|
"loss": 0.2392, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 0.00024928, |
|
"loss": 0.2318, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 37.06, |
|
"learning_rate": 0.00024888, |
|
"loss": 0.218, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 0.00024848, |
|
"loss": 0.2059, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"learning_rate": 0.00024808, |
|
"loss": 0.2082, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 37.35, |
|
"learning_rate": 0.00024768, |
|
"loss": 0.2058, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 37.45, |
|
"learning_rate": 0.00024728, |
|
"loss": 0.2037, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 37.55, |
|
"learning_rate": 0.00024688, |
|
"loss": 0.2083, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 37.65, |
|
"learning_rate": 0.00024648, |
|
"loss": 0.2007, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 37.75, |
|
"learning_rate": 0.00024608, |
|
"loss": 0.2012, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 37.84, |
|
"learning_rate": 0.00024568, |
|
"loss": 0.1972, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"learning_rate": 0.00024528, |
|
"loss": 0.1996, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 38.04, |
|
"learning_rate": 0.00024488, |
|
"loss": 0.1859, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 38.14, |
|
"learning_rate": 0.00024448, |
|
"loss": 0.1801, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 38.24, |
|
"learning_rate": 0.00024408, |
|
"loss": 0.1708, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 38.33, |
|
"learning_rate": 0.00024368, |
|
"loss": 0.1709, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 0.00024328, |
|
"loss": 0.1736, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 38.53, |
|
"learning_rate": 0.00024288, |
|
"loss": 0.1726, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 38.63, |
|
"learning_rate": 0.00024248, |
|
"loss": 0.1681, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 38.73, |
|
"learning_rate": 0.00024207999999999999, |
|
"loss": 0.1674, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 38.82, |
|
"learning_rate": 0.00024168, |
|
"loss": 0.1601, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 38.92, |
|
"learning_rate": 0.00024128, |
|
"loss": 0.1552, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 0.00024087999999999998, |
|
"loss": 0.1414, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 39.12, |
|
"learning_rate": 0.00024048, |
|
"loss": 0.151, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"learning_rate": 0.00024008, |
|
"loss": 0.1418, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 39.31, |
|
"learning_rate": 0.00023967999999999998, |
|
"loss": 0.1372, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"learning_rate": 0.00023928, |
|
"loss": 0.1334, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 39.51, |
|
"learning_rate": 0.00023888, |
|
"loss": 0.1339, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 39.61, |
|
"learning_rate": 0.00023847999999999998, |
|
"loss": 0.137, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 39.71, |
|
"learning_rate": 0.00023808, |
|
"loss": 0.1325, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 39.8, |
|
"learning_rate": 0.00023768, |
|
"loss": 0.1322, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 39.9, |
|
"learning_rate": 0.00023727999999999998, |
|
"loss": 0.1262, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.00023688, |
|
"loss": 0.1111, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 40.1, |
|
"learning_rate": 0.00023647999999999999, |
|
"loss": 0.1125, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 0.00023608, |
|
"loss": 0.1128, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 40.29, |
|
"learning_rate": 0.00023568, |
|
"loss": 0.1098, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 40.39, |
|
"learning_rate": 0.00023527999999999998, |
|
"loss": 0.1057, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 40.49, |
|
"learning_rate": 0.00023488000000000003, |
|
"loss": 0.1019, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 40.59, |
|
"learning_rate": 0.00023448000000000005, |
|
"loss": 0.1024, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 40.69, |
|
"learning_rate": 0.00023408000000000004, |
|
"loss": 0.1014, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 40.78, |
|
"learning_rate": 0.00023368000000000003, |
|
"loss": 0.1038, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 40.88, |
|
"learning_rate": 0.00023328000000000004, |
|
"loss": 0.096, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 0.00023288000000000003, |
|
"loss": 0.0943, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 41.08, |
|
"learning_rate": 0.00023248000000000002, |
|
"loss": 0.0821, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"learning_rate": 0.00023208000000000004, |
|
"loss": 0.0861, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 41.27, |
|
"learning_rate": 0.00023168000000000003, |
|
"loss": 0.0854, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 41.37, |
|
"learning_rate": 0.00023128000000000002, |
|
"loss": 0.081, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 41.47, |
|
"learning_rate": 0.00023088000000000004, |
|
"loss": 0.0787, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 41.57, |
|
"learning_rate": 0.00023048000000000003, |
|
"loss": 0.0775, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.00023008000000000002, |
|
"loss": 0.0742, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 41.76, |
|
"learning_rate": 0.00022968000000000004, |
|
"loss": 0.0727, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 41.86, |
|
"learning_rate": 0.00022928000000000003, |
|
"loss": 0.0716, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"learning_rate": 0.00022888000000000002, |
|
"loss": 0.0701, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 42.06, |
|
"learning_rate": 0.00022848000000000004, |
|
"loss": 0.0639, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 42.16, |
|
"learning_rate": 0.00022808000000000003, |
|
"loss": 0.0636, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"learning_rate": 0.00022768000000000002, |
|
"loss": 0.0625, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 42.35, |
|
"learning_rate": 0.00022728000000000003, |
|
"loss": 0.0607, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 0.00022688000000000002, |
|
"loss": 0.058, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"learning_rate": 0.00022648000000000001, |
|
"loss": 0.0567, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 42.65, |
|
"learning_rate": 0.00022608000000000003, |
|
"loss": 0.0538, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 0.00022568000000000002, |
|
"loss": 0.0528, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 42.84, |
|
"learning_rate": 0.00022528, |
|
"loss": 0.0507, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 0.00022488000000000003, |
|
"loss": 0.0481, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"learning_rate": 0.00022448000000000002, |
|
"loss": 0.0434, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"learning_rate": 0.00022408000000000004, |
|
"loss": 0.0451, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"learning_rate": 0.00022368000000000003, |
|
"loss": 0.0433, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 0.00022328000000000002, |
|
"loss": 0.0419, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"learning_rate": 0.00022288000000000003, |
|
"loss": 0.0408, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 43.53, |
|
"learning_rate": 0.00022248000000000002, |
|
"loss": 0.0386, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 43.63, |
|
"learning_rate": 0.00022208000000000002, |
|
"loss": 0.038, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"learning_rate": 0.00022168000000000003, |
|
"loss": 0.0367, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 43.82, |
|
"learning_rate": 0.00022128000000000002, |
|
"loss": 0.0362, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 0.00022088, |
|
"loss": 0.0338, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 0.00022048000000000003, |
|
"loss": 0.0303, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"learning_rate": 0.00022008000000000002, |
|
"loss": 0.0318, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 44.22, |
|
"learning_rate": 0.00021968, |
|
"loss": 0.031, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"learning_rate": 0.00021928000000000003, |
|
"loss": 0.0295, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 44.41, |
|
"learning_rate": 0.00021888000000000002, |
|
"loss": 0.029, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 44.51, |
|
"learning_rate": 0.00021848, |
|
"loss": 0.0275, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 44.61, |
|
"learning_rate": 0.00021808000000000003, |
|
"loss": 0.0267, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 44.71, |
|
"learning_rate": 0.00021768000000000002, |
|
"loss": 0.0269, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 0.00021728, |
|
"loss": 0.0262, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 44.9, |
|
"learning_rate": 0.00021688000000000002, |
|
"loss": 0.0259, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 0.00021648000000000001, |
|
"loss": 0.0235, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 0.00021608, |
|
"loss": 0.0225, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"learning_rate": 0.00021568000000000002, |
|
"loss": 0.0238, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 45.29, |
|
"learning_rate": 0.00021528, |
|
"loss": 0.0233, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 45.39, |
|
"learning_rate": 0.00021488, |
|
"loss": 0.0222, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 45.49, |
|
"learning_rate": 0.00021448000000000002, |
|
"loss": 0.0218, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 45.59, |
|
"learning_rate": 0.00021408, |
|
"loss": 0.0207, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 45.69, |
|
"learning_rate": 0.00021368, |
|
"loss": 0.0207, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 45.78, |
|
"learning_rate": 0.00021328000000000002, |
|
"loss": 0.0214, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 45.88, |
|
"learning_rate": 0.00021288, |
|
"loss": 0.021, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 45.98, |
|
"learning_rate": 0.00021248000000000003, |
|
"loss": 0.0205, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 46.08, |
|
"learning_rate": 0.00021208000000000002, |
|
"loss": 0.0179, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 46.18, |
|
"learning_rate": 0.00021168, |
|
"loss": 0.019, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 46.27, |
|
"learning_rate": 0.00021128000000000002, |
|
"loss": 0.0192, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 46.37, |
|
"learning_rate": 0.00021088000000000001, |
|
"loss": 0.0187, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.00021048, |
|
"loss": 0.0183, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 46.57, |
|
"learning_rate": 0.00021008000000000002, |
|
"loss": 0.0176, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"learning_rate": 0.00020968, |
|
"loss": 0.0169, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 46.76, |
|
"learning_rate": 0.00020928, |
|
"loss": 0.0171, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"learning_rate": 0.00020888000000000002, |
|
"loss": 0.0172, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 0.00020848, |
|
"loss": 0.0175, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"learning_rate": 0.00020808, |
|
"loss": 0.0153, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 47.16, |
|
"learning_rate": 0.00020768000000000002, |
|
"loss": 0.0162, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 47.25, |
|
"learning_rate": 0.00020728, |
|
"loss": 0.0161, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 47.35, |
|
"learning_rate": 0.00020688, |
|
"loss": 0.0161, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 0.00020648000000000002, |
|
"loss": 0.0157, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 47.55, |
|
"learning_rate": 0.00020608, |
|
"loss": 0.0149, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 47.65, |
|
"learning_rate": 0.00020568, |
|
"loss": 0.0143, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 47.75, |
|
"learning_rate": 0.00020528, |
|
"loss": 0.0138, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 47.84, |
|
"learning_rate": 0.00020488, |
|
"loss": 0.0136, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"learning_rate": 0.00020448, |
|
"loss": 0.014, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 48.04, |
|
"learning_rate": 0.00020408, |
|
"loss": 0.0131, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 48.14, |
|
"learning_rate": 0.00020368, |
|
"loss": 0.0141, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 48.24, |
|
"learning_rate": 0.00020328, |
|
"loss": 0.0134, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"learning_rate": 0.00020288, |
|
"loss": 0.013, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 48.43, |
|
"learning_rate": 0.00020248, |
|
"loss": 0.0128, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 48.53, |
|
"learning_rate": 0.00020208, |
|
"loss": 0.0127, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 48.63, |
|
"learning_rate": 0.00020168, |
|
"loss": 0.012, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 48.73, |
|
"learning_rate": 0.00020128, |
|
"loss": 0.0114, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 48.82, |
|
"learning_rate": 0.00020088000000000001, |
|
"loss": 0.0113, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 48.92, |
|
"learning_rate": 0.00020048, |
|
"loss": 0.0118, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 0.00020008, |
|
"loss": 0.011, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10000, |
|
"num_train_epochs": 99, |
|
"save_steps": 1000, |
|
"total_flos": 3.3405215440896e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|