|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.0, |
|
"global_step": 15678, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.68108177063401e-05, |
|
"loss": 2.3589, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.2804882526397705, |
|
"eval_runtime": 46.5333, |
|
"eval_samples_per_second": 63.03, |
|
"eval_steps_per_second": 7.887, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.36216354126802e-05, |
|
"loss": 2.3359, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.043245311902028e-05, |
|
"loss": 2.2563, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.2500908374786377, |
|
"eval_runtime": 46.4819, |
|
"eval_samples_per_second": 63.1, |
|
"eval_steps_per_second": 7.896, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.724327082536038e-05, |
|
"loss": 2.2043, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.405408853170048e-05, |
|
"loss": 2.1936, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.2418746948242188, |
|
"eval_runtime": 46.4009, |
|
"eval_samples_per_second": 63.21, |
|
"eval_steps_per_second": 7.909, |
|
"step": 2613 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 8.086490623804057e-05, |
|
"loss": 2.11, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.230112314224243, |
|
"eval_runtime": 46.3837, |
|
"eval_samples_per_second": 63.233, |
|
"eval_steps_per_second": 7.912, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.767572394438066e-05, |
|
"loss": 2.1018, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 7.448654165072075e-05, |
|
"loss": 2.0311, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.231987476348877, |
|
"eval_runtime": 46.4154, |
|
"eval_samples_per_second": 63.19, |
|
"eval_steps_per_second": 7.907, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 7.129735935706085e-05, |
|
"loss": 2.0174, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 6.810817706340095e-05, |
|
"loss": 1.969, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.227581262588501, |
|
"eval_runtime": 46.0273, |
|
"eval_samples_per_second": 63.723, |
|
"eval_steps_per_second": 7.974, |
|
"step": 5226 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 6.491899476974103e-05, |
|
"loss": 1.9427, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 6.172981247608114e-05, |
|
"loss": 1.9148, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.1621322631835938, |
|
"eval_runtime": 45.9847, |
|
"eval_samples_per_second": 63.782, |
|
"eval_steps_per_second": 7.981, |
|
"step": 6097 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 5.854063018242123e-05, |
|
"loss": 1.8569, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.1876232624053955, |
|
"eval_runtime": 45.9436, |
|
"eval_samples_per_second": 63.839, |
|
"eval_steps_per_second": 7.988, |
|
"step": 6968 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5.535144788876132e-05, |
|
"loss": 1.8523, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 5.216226559510142e-05, |
|
"loss": 1.7978, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.201099395751953, |
|
"eval_runtime": 45.9305, |
|
"eval_samples_per_second": 63.857, |
|
"eval_steps_per_second": 7.99, |
|
"step": 7839 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.897308330144152e-05, |
|
"loss": 1.7922, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 4.578390100778161e-05, |
|
"loss": 1.7602, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.1279566287994385, |
|
"eval_runtime": 45.9065, |
|
"eval_samples_per_second": 63.891, |
|
"eval_steps_per_second": 7.995, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 4.2594718714121704e-05, |
|
"loss": 1.7371, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 3.9405536420461794e-05, |
|
"loss": 1.7166, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.1643788814544678, |
|
"eval_runtime": 45.9375, |
|
"eval_samples_per_second": 63.848, |
|
"eval_steps_per_second": 7.989, |
|
"step": 9581 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 3.621635412680189e-05, |
|
"loss": 1.6651, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.1245827674865723, |
|
"eval_runtime": 45.9431, |
|
"eval_samples_per_second": 63.84, |
|
"eval_steps_per_second": 7.988, |
|
"step": 10452 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.302717183314198e-05, |
|
"loss": 1.6508, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 2.9837989539482082e-05, |
|
"loss": 1.6141, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.126392126083374, |
|
"eval_runtime": 45.9202, |
|
"eval_samples_per_second": 63.872, |
|
"eval_steps_per_second": 7.992, |
|
"step": 11323 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 2.6648807245822172e-05, |
|
"loss": 1.6312, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 2.345962495216227e-05, |
|
"loss": 1.5759, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.114293098449707, |
|
"eval_runtime": 45.9021, |
|
"eval_samples_per_second": 63.897, |
|
"eval_steps_per_second": 7.995, |
|
"step": 12194 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 2.0270442658502363e-05, |
|
"loss": 1.572, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 1.7081260364842456e-05, |
|
"loss": 1.5478, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.098154306411743, |
|
"eval_runtime": 45.9315, |
|
"eval_samples_per_second": 63.856, |
|
"eval_steps_per_second": 7.99, |
|
"step": 13065 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 1.3892078071182548e-05, |
|
"loss": 1.5311, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.099287509918213, |
|
"eval_runtime": 45.9245, |
|
"eval_samples_per_second": 63.866, |
|
"eval_steps_per_second": 7.991, |
|
"step": 13936 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 1.0702895777522644e-05, |
|
"loss": 1.5187, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 7.513713483862737e-06, |
|
"loss": 1.5187, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.0979349613189697, |
|
"eval_runtime": 45.9022, |
|
"eval_samples_per_second": 63.897, |
|
"eval_steps_per_second": 7.995, |
|
"step": 14807 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 4.324531190202833e-06, |
|
"loss": 1.4819, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 1.1353488965429266e-06, |
|
"loss": 1.4809, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.0337767601013184, |
|
"eval_runtime": 45.9061, |
|
"eval_samples_per_second": 63.891, |
|
"eval_steps_per_second": 7.995, |
|
"step": 15678 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"step": 15678, |
|
"total_flos": 2.6405831822391706e+17, |
|
"train_loss": 1.8265393278187887, |
|
"train_runtime": 59534.5244, |
|
"train_samples_per_second": 16.852, |
|
"train_steps_per_second": 0.263 |
|
} |
|
], |
|
"max_steps": 15678, |
|
"num_train_epochs": 18, |
|
"total_flos": 2.6405831822391706e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|