|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.2067952933391237, |
|
"global_step": 700000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.893176444637453e-06, |
|
"loss": 11.9733, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.3786352889274907e-05, |
|
"loss": 1.1347, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.067952933391236e-05, |
|
"loss": 0.9519, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7572705778549813e-05, |
|
"loss": 0.8738, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.446588222318727e-05, |
|
"loss": 0.8298, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.135905866782472e-05, |
|
"loss": 0.7905, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.825223511246217e-05, |
|
"loss": 0.6585, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.5145411557099626e-05, |
|
"loss": 0.5349, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.203858800173708e-05, |
|
"loss": 0.4895, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 6.893176444637454e-05, |
|
"loss": 0.4541, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.582494089101199e-05, |
|
"loss": 0.4319, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.271811733564945e-05, |
|
"loss": 0.4165, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.96112937802869e-05, |
|
"loss": 0.4059, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.650447022492434e-05, |
|
"loss": 0.3957, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.982117649107569e-05, |
|
"loss": 0.388, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.945837773083162e-05, |
|
"loss": 0.3787, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.909557897058755e-05, |
|
"loss": 0.3705, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.873278021034346e-05, |
|
"loss": 0.3678, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.836998145009939e-05, |
|
"loss": 0.3595, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.800718268985532e-05, |
|
"loss": 0.3543, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.764438392961124e-05, |
|
"loss": 0.3495, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.728158516936716e-05, |
|
"loss": 0.3452, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.691878640912308e-05, |
|
"loss": 0.3419, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.655598764887901e-05, |
|
"loss": 0.3365, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.619318888863494e-05, |
|
"loss": 0.3336, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.583039012839085e-05, |
|
"loss": 0.331, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.546759136814678e-05, |
|
"loss": 0.3282, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.510479260790271e-05, |
|
"loss": 0.3263, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.474199384765862e-05, |
|
"loss": 0.3234, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.437919508741455e-05, |
|
"loss": 0.3213, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.401639632717048e-05, |
|
"loss": 0.3173, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.36535975669264e-05, |
|
"loss": 0.3166, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.329079880668232e-05, |
|
"loss": 0.3137, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.292800004643825e-05, |
|
"loss": 0.3129, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.256520128619417e-05, |
|
"loss": 0.3125, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.22024025259501e-05, |
|
"loss": 0.3079, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.183960376570602e-05, |
|
"loss": 0.3074, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.147680500546194e-05, |
|
"loss": 0.3062, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.111400624521787e-05, |
|
"loss": 0.3052, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.07512074849738e-05, |
|
"loss": 0.3037, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.038840872472971e-05, |
|
"loss": 0.3003, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.002560996448564e-05, |
|
"loss": 0.3007, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.966281120424155e-05, |
|
"loss": 0.2991, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.930001244399748e-05, |
|
"loss": 0.2972, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.893721368375341e-05, |
|
"loss": 0.2974, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.857441492350932e-05, |
|
"loss": 0.2938, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.821161616326525e-05, |
|
"loss": 0.2921, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.784881740302118e-05, |
|
"loss": 0.2932, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.74860186427771e-05, |
|
"loss": 0.2914, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.712321988253302e-05, |
|
"loss": 0.292, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.676042112228895e-05, |
|
"loss": 0.2871, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.639762236204487e-05, |
|
"loss": 0.2884, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.60348236018008e-05, |
|
"loss": 0.2874, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.567202484155672e-05, |
|
"loss": 0.2859, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.530922608131264e-05, |
|
"loss": 0.2867, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.494642732106857e-05, |
|
"loss": 0.2828, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.45836285608245e-05, |
|
"loss": 0.2829, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.422082980058041e-05, |
|
"loss": 0.2817, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.385803104033634e-05, |
|
"loss": 0.281, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.349523228009227e-05, |
|
"loss": 0.2819, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.313243351984818e-05, |
|
"loss": 0.2803, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.276963475960411e-05, |
|
"loss": 0.2782, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 8.240683599936003e-05, |
|
"loss": 0.2779, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.204403723911594e-05, |
|
"loss": 0.2793, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 8.168123847887187e-05, |
|
"loss": 0.2774, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.13184397186278e-05, |
|
"loss": 0.2765, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.095564095838371e-05, |
|
"loss": 0.2738, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 8.059284219813964e-05, |
|
"loss": 0.2742, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.023004343789557e-05, |
|
"loss": 0.2746, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.986724467765148e-05, |
|
"loss": 0.274, |
|
"step": 700000 |
|
} |
|
], |
|
"max_steps": 2901420, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.6750927872e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|