|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.803921568627452, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0003996, |
|
"loss": 9.2736, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0003992, |
|
"loss": 7.7626, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00039880000000000004, |
|
"loss": 7.5202, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00039840000000000003, |
|
"loss": 7.371, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000398, |
|
"loss": 7.2352, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003976, |
|
"loss": 7.1352, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0003972, |
|
"loss": 7.0625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0003968, |
|
"loss": 6.911, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00039640000000000004, |
|
"loss": 6.7717, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00039600000000000003, |
|
"loss": 6.6484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003956, |
|
"loss": 6.5492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0003952, |
|
"loss": 6.4417, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0003948, |
|
"loss": 6.3311, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0003944, |
|
"loss": 6.2206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00039400000000000004, |
|
"loss": 6.1179, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0003936, |
|
"loss": 6.0151, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0003932, |
|
"loss": 5.9035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0003928, |
|
"loss": 5.8222, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0003924, |
|
"loss": 5.7042, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.000392, |
|
"loss": 5.6265, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00039160000000000003, |
|
"loss": 5.5338, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0003912, |
|
"loss": 5.4521, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0003908, |
|
"loss": 5.3552, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0003904, |
|
"loss": 5.2771, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00039, |
|
"loss": 5.1587, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0003896, |
|
"loss": 5.0899, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00038920000000000003, |
|
"loss": 5.0191, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0003888, |
|
"loss": 4.9602, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0003884, |
|
"loss": 4.8366, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.000388, |
|
"loss": 4.7848, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0003876, |
|
"loss": 4.7199, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00038720000000000003, |
|
"loss": 4.6306, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0003868, |
|
"loss": 4.5337, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0003864, |
|
"loss": 4.4796, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.000386, |
|
"loss": 4.3881, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0003856, |
|
"loss": 4.2989, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0003852, |
|
"loss": 4.2533, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00038480000000000003, |
|
"loss": 4.2379, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0003844, |
|
"loss": 4.142, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.000384, |
|
"loss": 4.0793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0003836, |
|
"loss": 4.0005, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0003832, |
|
"loss": 3.9619, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0003828, |
|
"loss": 3.8687, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0003824, |
|
"loss": 3.8486, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.000382, |
|
"loss": 3.7684, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0003816, |
|
"loss": 3.7013, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0003812, |
|
"loss": 3.6851, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0003808, |
|
"loss": 3.6585, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0003804, |
|
"loss": 3.6172, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00038, |
|
"loss": 3.5557, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0003796, |
|
"loss": 3.4746, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0003792, |
|
"loss": 3.4473, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0003788, |
|
"loss": 3.3828, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0003784, |
|
"loss": 3.3868, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00037799999999999997, |
|
"loss": 3.31, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0003776, |
|
"loss": 3.2628, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0003772, |
|
"loss": 3.2541, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.0003768, |
|
"loss": 3.223, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0003764, |
|
"loss": 3.2028, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.000376, |
|
"loss": 3.1659, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.0003756, |
|
"loss": 3.0847, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0003752, |
|
"loss": 3.0215, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00037480000000000006, |
|
"loss": 3.0149, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00037440000000000005, |
|
"loss": 3.0177, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00037400000000000004, |
|
"loss": 2.9474, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00037360000000000003, |
|
"loss": 2.9245, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.0003732, |
|
"loss": 2.9218, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00037280000000000006, |
|
"loss": 2.8666, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00037240000000000005, |
|
"loss": 2.8821, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00037200000000000004, |
|
"loss": 2.8243, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00037160000000000003, |
|
"loss": 2.7753, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0003712, |
|
"loss": 2.7086, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0003708, |
|
"loss": 2.7104, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00037040000000000006, |
|
"loss": 2.7103, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00037000000000000005, |
|
"loss": 2.6707, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00036960000000000004, |
|
"loss": 2.6413, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00036920000000000003, |
|
"loss": 2.6359, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0003688, |
|
"loss": 2.5838, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0003684, |
|
"loss": 2.6212, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00036800000000000005, |
|
"loss": 2.5718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00036760000000000004, |
|
"loss": 2.5348, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00036720000000000004, |
|
"loss": 2.4195, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0003668, |
|
"loss": 2.4938, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0003664, |
|
"loss": 2.4372, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.000366, |
|
"loss": 2.4567, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.00036560000000000005, |
|
"loss": 2.4108, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00036520000000000004, |
|
"loss": 2.3993, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.00036480000000000003, |
|
"loss": 2.3739, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 0.0003644, |
|
"loss": 2.391, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.000364, |
|
"loss": 2.38, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.00036360000000000006, |
|
"loss": 2.3257, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.00036320000000000005, |
|
"loss": 2.1956, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00036280000000000004, |
|
"loss": 2.2944, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.0003624, |
|
"loss": 2.2163, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.000362, |
|
"loss": 2.2641, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.0003616, |
|
"loss": 2.2035, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.00036120000000000005, |
|
"loss": 2.188, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.00036080000000000004, |
|
"loss": 2.197, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00036040000000000003, |
|
"loss": 2.18, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00036, |
|
"loss": 2.1994, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10000, |
|
"num_train_epochs": 99, |
|
"save_steps": 1000, |
|
"total_flos": 6.6810430881792e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|