|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 3000, |
|
"global_step": 1080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019814814814814814, |
|
"loss": 2.1983, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001962962962962963, |
|
"loss": 2.1208, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 1.8985, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001925925925925926, |
|
"loss": 1.726, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019074074074074075, |
|
"loss": 1.6799, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00018888888888888888, |
|
"loss": 1.5807, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018703703703703704, |
|
"loss": 1.5165, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001851851851851852, |
|
"loss": 1.4893, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00018333333333333334, |
|
"loss": 1.24, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001814814814814815, |
|
"loss": 1.2977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017962962962962963, |
|
"loss": 1.2492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 1.1566, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00017592592592592595, |
|
"loss": 1.3443, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00017407407407407408, |
|
"loss": 1.2112, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00017222222222222224, |
|
"loss": 1.0302, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00017037037037037037, |
|
"loss": 1.1856, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00016851851851851853, |
|
"loss": 1.1745, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 1.0659, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00016481481481481482, |
|
"loss": 1.0134, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016296296296296295, |
|
"loss": 0.9692, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001611111111111111, |
|
"loss": 0.8877, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00015925925925925927, |
|
"loss": 0.8419, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00015740740740740743, |
|
"loss": 0.9147, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 1.0111, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001537037037037037, |
|
"loss": 0.9457, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015185185185185185, |
|
"loss": 0.9867, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.1366, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.7648, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001462962962962963, |
|
"loss": 0.8693, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00014444444444444444, |
|
"loss": 0.8179, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001425925925925926, |
|
"loss": 0.8077, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014074074074074076, |
|
"loss": 0.941, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.8906, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013703703703703705, |
|
"loss": 0.8433, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013518518518518518, |
|
"loss": 0.7654, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.7217, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001314814814814815, |
|
"loss": 0.8744, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00012962962962962963, |
|
"loss": 0.6337, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00012777777777777776, |
|
"loss": 0.6369, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00012592592592592592, |
|
"loss": 0.5268, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00012407407407407408, |
|
"loss": 0.8015, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00012222222222222224, |
|
"loss": 0.5013, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00012037037037037037, |
|
"loss": 0.6063, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00011851851851851852, |
|
"loss": 0.7767, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00011666666666666668, |
|
"loss": 0.5174, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00011481481481481482, |
|
"loss": 0.6391, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011296296296296296, |
|
"loss": 0.4966, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.5991, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00010925925925925926, |
|
"loss": 0.5499, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00010740740740740742, |
|
"loss": 0.5488, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00010555555555555557, |
|
"loss": 0.5834, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001037037037037037, |
|
"loss": 0.6238, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010185185185185186, |
|
"loss": 0.6365, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5337, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.814814814814815e-05, |
|
"loss": 0.4326, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.62962962962963e-05, |
|
"loss": 0.4197, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 0.3268, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 0.3066, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 9.074074074074075e-05, |
|
"loss": 0.4737, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.3185, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.703703703703704e-05, |
|
"loss": 0.4233, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.518518518518518e-05, |
|
"loss": 0.3377, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.3957, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.148148148148148e-05, |
|
"loss": 0.3915, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.962962962962964e-05, |
|
"loss": 0.3025, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 0.2896, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.592592592592593e-05, |
|
"loss": 0.2558, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.3477, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 0.2111, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.037037037037038e-05, |
|
"loss": 0.2885, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.851851851851852e-05, |
|
"loss": 0.2953, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.2415, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 0.3242, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.296296296296296e-05, |
|
"loss": 0.2616, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 0.2853, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.925925925925926e-05, |
|
"loss": 0.2828, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.740740740740741e-05, |
|
"loss": 0.2382, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.3508, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.370370370370371e-05, |
|
"loss": 0.2794, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.185185185185185e-05, |
|
"loss": 0.3247, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2753, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.1453, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.1666, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1369, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.1086, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.0967, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1327, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0848, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.1173, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.135, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.1979, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1181, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0957, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0927, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.094, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1197, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.0927, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1523, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2582, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1101, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1582, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0504, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0788, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0607, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1061, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0645, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0971, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.128, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1080, |
|
"total_flos": 1.339145591637934e+18, |
|
"train_loss": 0.6042599819324634, |
|
"train_runtime": 412.4773, |
|
"train_samples_per_second": 41.893, |
|
"train_steps_per_second": 2.618 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1080, |
|
"num_train_epochs": 4, |
|
"save_steps": 3000, |
|
"total_flos": 1.339145591637934e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|