|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 23.998851894374283, |
|
"global_step": 10440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.075101852416992, |
|
"eval_runtime": 45.9619, |
|
"eval_samples_per_second": 63.814, |
|
"eval_steps_per_second": 7.985, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.760536398467433e-05, |
|
"loss": 2.1982, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.046468496322632, |
|
"eval_runtime": 45.9297, |
|
"eval_samples_per_second": 63.859, |
|
"eval_steps_per_second": 7.99, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.5210727969348656e-05, |
|
"loss": 2.0841, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.042027235031128, |
|
"eval_runtime": 45.9311, |
|
"eval_samples_per_second": 63.857, |
|
"eval_steps_per_second": 7.99, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.2816091954022994e-05, |
|
"loss": 2.0374, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.032456398010254, |
|
"eval_runtime": 45.8552, |
|
"eval_samples_per_second": 63.962, |
|
"eval_steps_per_second": 8.003, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 4.0421455938697324e-05, |
|
"loss": 1.9731, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.007476806640625, |
|
"eval_runtime": 45.8552, |
|
"eval_samples_per_second": 63.962, |
|
"eval_steps_per_second": 8.003, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.802681992337165e-05, |
|
"loss": 1.9248, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.021885871887207, |
|
"eval_runtime": 45.9257, |
|
"eval_samples_per_second": 63.864, |
|
"eval_steps_per_second": 7.991, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.563218390804598e-05, |
|
"loss": 1.8848, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.9770045280456543, |
|
"eval_runtime": 45.8935, |
|
"eval_samples_per_second": 63.909, |
|
"eval_steps_per_second": 7.997, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.0093371868133545, |
|
"eval_runtime": 45.9038, |
|
"eval_samples_per_second": 63.895, |
|
"eval_steps_per_second": 7.995, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 3.323754789272031e-05, |
|
"loss": 1.8419, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.0297892093658447, |
|
"eval_runtime": 45.9137, |
|
"eval_samples_per_second": 63.881, |
|
"eval_steps_per_second": 7.993, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 3.084291187739464e-05, |
|
"loss": 1.804, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.9681042432785034, |
|
"eval_runtime": 45.9244, |
|
"eval_samples_per_second": 63.866, |
|
"eval_steps_per_second": 7.991, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 2.844827586206897e-05, |
|
"loss": 1.7817, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.9937865734100342, |
|
"eval_runtime": 45.8978, |
|
"eval_samples_per_second": 63.903, |
|
"eval_steps_per_second": 7.996, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 2.6053639846743293e-05, |
|
"loss": 1.7472, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.9653546810150146, |
|
"eval_runtime": 45.9667, |
|
"eval_samples_per_second": 63.807, |
|
"eval_steps_per_second": 7.984, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 2.3659003831417627e-05, |
|
"loss": 1.7075, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.9797016382217407, |
|
"eval_runtime": 45.9403, |
|
"eval_samples_per_second": 63.844, |
|
"eval_steps_per_second": 7.989, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 2.1264367816091954e-05, |
|
"loss": 1.6976, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.9690951108932495, |
|
"eval_runtime": 45.9506, |
|
"eval_samples_per_second": 63.829, |
|
"eval_steps_per_second": 7.987, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 1.8869731800766285e-05, |
|
"loss": 1.6748, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.9567583799362183, |
|
"eval_runtime": 46.1194, |
|
"eval_samples_per_second": 63.596, |
|
"eval_steps_per_second": 7.958, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.9617565870285034, |
|
"eval_runtime": 46.1884, |
|
"eval_samples_per_second": 63.501, |
|
"eval_steps_per_second": 7.946, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 1.6475095785440615e-05, |
|
"loss": 1.6528, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.9842615127563477, |
|
"eval_runtime": 45.9636, |
|
"eval_samples_per_second": 63.811, |
|
"eval_steps_per_second": 7.985, |
|
"step": 7395 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 1.4080459770114942e-05, |
|
"loss": 1.6335, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.926523208618164, |
|
"eval_runtime": 45.9116, |
|
"eval_samples_per_second": 63.884, |
|
"eval_steps_per_second": 7.994, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"learning_rate": 1.1685823754789272e-05, |
|
"loss": 1.6179, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.959792137145996, |
|
"eval_runtime": 45.9236, |
|
"eval_samples_per_second": 63.867, |
|
"eval_steps_per_second": 7.992, |
|
"step": 8265 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 9.291187739463603e-06, |
|
"loss": 1.5992, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.933074951171875, |
|
"eval_runtime": 45.9682, |
|
"eval_samples_per_second": 63.805, |
|
"eval_steps_per_second": 7.984, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 1.583, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.9795454740524292, |
|
"eval_runtime": 45.9526, |
|
"eval_samples_per_second": 63.827, |
|
"eval_steps_per_second": 7.986, |
|
"step": 9135 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"learning_rate": 4.50191570881226e-06, |
|
"loss": 1.5699, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 2.007305860519409, |
|
"eval_runtime": 45.9532, |
|
"eval_samples_per_second": 63.826, |
|
"eval_steps_per_second": 7.986, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 2.1072796934865904e-06, |
|
"loss": 1.5703, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.9308319091796875, |
|
"eval_runtime": 45.9891, |
|
"eval_samples_per_second": 63.776, |
|
"eval_steps_per_second": 7.98, |
|
"step": 10005 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.9284570217132568, |
|
"eval_runtime": 45.932, |
|
"eval_samples_per_second": 63.855, |
|
"eval_steps_per_second": 7.99, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"step": 10440, |
|
"total_flos": 3.52063018239615e+17, |
|
"train_loss": 1.7702036539713542, |
|
"train_runtime": 78929.442, |
|
"train_samples_per_second": 16.948, |
|
"train_steps_per_second": 0.132 |
|
} |
|
], |
|
"max_steps": 10440, |
|
"num_train_epochs": 24, |
|
"total_flos": 3.52063018239615e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|