|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 68.0, |
|
"eval_steps": 500, |
|
"global_step": 3094, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 2.3054, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.155555555555556e-05, |
|
"loss": 2.1949, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.755555555555556e-05, |
|
"loss": 1.978, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 6.355555555555556e-05, |
|
"loss": 1.9161, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 7.955555555555556e-05, |
|
"loss": 1.8945, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 9.555555555555557e-05, |
|
"loss": 1.8682, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.00011155555555555556, |
|
"loss": 1.8492, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00012755555555555556, |
|
"loss": 1.8337, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.0001431111111111111, |
|
"loss": 1.8144, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00015911111111111112, |
|
"loss": 1.7907, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.00017511111111111113, |
|
"loss": 1.7382, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.00019111111111111114, |
|
"loss": 1.6991, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.00019925925925925927, |
|
"loss": 1.6405, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.00019748148148148148, |
|
"loss": 1.5929, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 0.0001957037037037037, |
|
"loss": 1.5414, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 0.00019392592592592592, |
|
"loss": 1.447, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 0.00019214814814814816, |
|
"loss": 1.3947, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 0.00019037037037037037, |
|
"loss": 1.3655, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 0.0001885925925925926, |
|
"loss": 1.2873, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 0.0001868148148148148, |
|
"loss": 1.2198, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 0.00018503703703703705, |
|
"loss": 1.1512, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 0.00018325925925925926, |
|
"loss": 1.1286, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 0.0001814814814814815, |
|
"loss": 1.0709, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001797037037037037, |
|
"loss": 1.0442, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.00017792592592592594, |
|
"loss": 0.971, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"learning_rate": 0.00017614814814814815, |
|
"loss": 0.9412, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 0.00017437037037037039, |
|
"loss": 0.9084, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"learning_rate": 0.0001725925925925926, |
|
"loss": 0.885, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 0.00017081481481481483, |
|
"loss": 0.844, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 0.00016903703703703704, |
|
"loss": 0.7975, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 0.00016725925925925928, |
|
"loss": 0.7786, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 0.00016548148148148149, |
|
"loss": 0.7465, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 26.11, |
|
"learning_rate": 0.00016370370370370372, |
|
"loss": 0.7311, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 0.00016192592592592593, |
|
"loss": 0.6955, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 27.69, |
|
"learning_rate": 0.00016014814814814817, |
|
"loss": 0.6656, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"learning_rate": 0.00015837037037037038, |
|
"loss": 0.6507, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"learning_rate": 0.0001565925925925926, |
|
"loss": 0.6336, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 30.07, |
|
"learning_rate": 0.00015481481481481482, |
|
"loss": 0.6142, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"learning_rate": 0.00015303703703703706, |
|
"loss": 0.5865, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"learning_rate": 0.00015125925925925927, |
|
"loss": 0.5649, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"learning_rate": 0.00014948148148148148, |
|
"loss": 0.5511, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 33.23, |
|
"learning_rate": 0.0001477037037037037, |
|
"loss": 0.5329, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 0.00014592592592592592, |
|
"loss": 0.5265, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 34.81, |
|
"learning_rate": 0.00014414814814814816, |
|
"loss": 0.4931, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"learning_rate": 0.00014237037037037037, |
|
"loss": 0.4904, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"learning_rate": 0.0001405925925925926, |
|
"loss": 0.466, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"learning_rate": 0.0001388148148148148, |
|
"loss": 0.4585, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 37.98, |
|
"learning_rate": 0.00013703703703703705, |
|
"loss": 0.4545, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"learning_rate": 0.00013525925925925926, |
|
"loss": 0.4244, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 39.56, |
|
"learning_rate": 0.0001334814814814815, |
|
"loss": 0.4143, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 40.35, |
|
"learning_rate": 0.0001317037037037037, |
|
"loss": 0.4118, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"learning_rate": 0.00012992592592592594, |
|
"loss": 0.3963, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 41.93, |
|
"learning_rate": 0.00012814814814814815, |
|
"loss": 0.3901, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 42.73, |
|
"learning_rate": 0.00012637037037037038, |
|
"loss": 0.3697, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 43.52, |
|
"learning_rate": 0.0001245925925925926, |
|
"loss": 0.3595, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"learning_rate": 0.00012281481481481483, |
|
"loss": 0.3609, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 0.00012103703703703704, |
|
"loss": 0.3457, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"learning_rate": 0.00011925925925925927, |
|
"loss": 0.3342, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 46.68, |
|
"learning_rate": 0.00011748148148148148, |
|
"loss": 0.3213, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 47.47, |
|
"learning_rate": 0.00011570370370370372, |
|
"loss": 0.3183, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 48.26, |
|
"learning_rate": 0.00011392592592592593, |
|
"loss": 0.3067, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 49.05, |
|
"learning_rate": 0.00011214814814814815, |
|
"loss": 0.3062, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"learning_rate": 0.00011037037037037037, |
|
"loss": 0.291, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 50.64, |
|
"learning_rate": 0.0001085925925925926, |
|
"loss": 0.2837, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 51.43, |
|
"learning_rate": 0.0001068148148148148, |
|
"loss": 0.2768, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 52.22, |
|
"learning_rate": 0.00010503703703703704, |
|
"loss": 0.2733, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 0.00010325925925925925, |
|
"loss": 0.2622, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 53.8, |
|
"learning_rate": 0.00010148148148148149, |
|
"loss": 0.254, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 54.59, |
|
"learning_rate": 9.970370370370371e-05, |
|
"loss": 0.247, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 55.38, |
|
"learning_rate": 9.792592592592593e-05, |
|
"loss": 0.2415, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 56.18, |
|
"learning_rate": 9.614814814814816e-05, |
|
"loss": 0.2391, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 56.97, |
|
"learning_rate": 9.437037037037038e-05, |
|
"loss": 0.2318, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 57.76, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 0.221, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 58.55, |
|
"learning_rate": 9.081481481481482e-05, |
|
"loss": 0.2179, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 59.34, |
|
"learning_rate": 8.903703703703705e-05, |
|
"loss": 0.2145, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 60.13, |
|
"learning_rate": 8.725925925925927e-05, |
|
"loss": 0.209, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"learning_rate": 8.548148148148148e-05, |
|
"loss": 0.205, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 61.71, |
|
"learning_rate": 8.37037037037037e-05, |
|
"loss": 0.197, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 62.51, |
|
"learning_rate": 8.192592592592592e-05, |
|
"loss": 0.1948, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 63.3, |
|
"learning_rate": 8.014814814814815e-05, |
|
"loss": 0.188, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 64.09, |
|
"learning_rate": 7.837037037037037e-05, |
|
"loss": 0.188, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 64.88, |
|
"learning_rate": 7.659259259259259e-05, |
|
"loss": 0.1785, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 65.67, |
|
"learning_rate": 7.481481481481481e-05, |
|
"loss": 0.1758, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 66.46, |
|
"learning_rate": 7.303703703703704e-05, |
|
"loss": 0.1703, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 67.25, |
|
"learning_rate": 7.125925925925926e-05, |
|
"loss": 0.1702, |
|
"step": 3060 |
|
} |
|
], |
|
"logging_steps": 36, |
|
"max_steps": 4500, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 2.6941037663276237e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|