|
{ |
|
"best_metric": 3.4699020385742188, |
|
"best_model_checkpoint": "output/logic/checkpoint-166", |
|
"epoch": 1.0, |
|
"global_step": 166, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00013689310231466715, |
|
"loss": 4.1499, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001359751552117083, |
|
"loss": 3.8329, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001344543719809531, |
|
"loss": 3.9919, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00013234435976098498, |
|
"loss": 3.6771, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00012966399778988942, |
|
"loss": 3.7582, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00012643726848413238, |
|
"loss": 3.8118, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001226930428569831, |
|
"loss": 3.6305, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00011846482219643966, |
|
"loss": 3.8223, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00011379043831398306, |
|
"loss": 3.7137, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00010871171504616964, |
|
"loss": 3.8248, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00010327409403776145, |
|
"loss": 3.5061, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.752622815468262e-05, |
|
"loss": 3.5028, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.151954616472095e-05, |
|
"loss": 3.599, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.530779258097404e-05, |
|
"loss": 3.5615, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.894654678527026e-05, |
|
"loss": 3.7261, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.249272573418489e-05, |
|
"loss": 3.6186, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.600407469716782e-05, |
|
"loss": 3.6312, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.9538650583379234e-05, |
|
"loss": 3.4797, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.315430248014319e-05, |
|
"loss": 3.515, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.6908154050877874e-05, |
|
"loss": 3.7826, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.0856092423722846e-05, |
|
"loss": 3.3625, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.505226814400944e-05, |
|
"loss": 3.5743, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.954861066472669e-05, |
|
"loss": 3.6889, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.4394363710109435e-05, |
|
"loss": 3.5212, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.963564466966226e-05, |
|
"loss": 3.5788, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5315031964920546e-05, |
|
"loss": 3.3559, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1471184080966003e-05, |
|
"loss": 3.4134, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.138493671395088e-06, |
|
"loss": 3.6262, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.34677983161971e-06, |
|
"loss": 3.577, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.121021293871321e-06, |
|
"loss": 3.3871, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4811329311337175e-06, |
|
"loss": 3.5072, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.4178756972551454e-07, |
|
"loss": 3.5187, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2284704855989247e-08, |
|
"loss": 3.5082, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.4699020385742188, |
|
"eval_runtime": 10.9862, |
|
"eval_samples_per_second": 22.847, |
|
"eval_steps_per_second": 2.913, |
|
"step": 166 |
|
} |
|
], |
|
"max_steps": 166, |
|
"num_train_epochs": 1, |
|
"total_flos": 172844679168000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|