|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 14358, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.826577517760134e-05, |
|
"loss": 2.376, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.652458559687979e-05, |
|
"loss": 1.9479, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.478339601615824e-05, |
|
"loss": 1.8326, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.304568881459813e-05, |
|
"loss": 1.7996, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1304499233876586e-05, |
|
"loss": 1.7476, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.956330965315504e-05, |
|
"loss": 1.7537, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.782212007243349e-05, |
|
"loss": 1.7133, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.608093049171194e-05, |
|
"loss": 1.6888, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.434322329015183e-05, |
|
"loss": 1.6906, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.6714558601379395, |
|
"eval_runtime": 47.5103, |
|
"eval_samples_per_second": 9.577, |
|
"eval_steps_per_second": 1.2, |
|
"step": 4786 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2602033709430284e-05, |
|
"loss": 1.6771, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.086084412870874e-05, |
|
"loss": 1.6319, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9119654547987185e-05, |
|
"loss": 1.6327, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.737846496726564e-05, |
|
"loss": 1.6368, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.563727538654409e-05, |
|
"loss": 1.6329, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3899568184983983e-05, |
|
"loss": 1.6143, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2158378604262433e-05, |
|
"loss": 1.6044, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0417189023540884e-05, |
|
"loss": 1.6295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8675999442819337e-05, |
|
"loss": 1.6023, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6934809862097788e-05, |
|
"loss": 1.6021, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.6306262016296387, |
|
"eval_runtime": 43.7183, |
|
"eval_samples_per_second": 10.408, |
|
"eval_steps_per_second": 1.304, |
|
"step": 9572 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5193620281376237e-05, |
|
"loss": 1.5842, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3452430700654689e-05, |
|
"loss": 1.587, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1714723499094582e-05, |
|
"loss": 1.5566, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.973533918373034e-06, |
|
"loss": 1.6022, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.232344337651483e-06, |
|
"loss": 1.5785, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.491154756929935e-06, |
|
"loss": 1.5775, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.753447555369829e-06, |
|
"loss": 1.5684, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.01225797464828e-06, |
|
"loss": 1.5719, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2710683939267307e-06, |
|
"loss": 1.5819, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.6228220462799072, |
|
"eval_runtime": 42.6413, |
|
"eval_samples_per_second": 10.67, |
|
"eval_steps_per_second": 1.337, |
|
"step": 14358 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 14358, |
|
"total_flos": 1.5006523981824e+16, |
|
"train_loss": 1.6758505107466766, |
|
"train_runtime": 8429.8505, |
|
"train_samples_per_second": 3.406, |
|
"train_steps_per_second": 1.703 |
|
} |
|
], |
|
"max_steps": 14358, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.5006523981824e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|