|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.999384615384617, |
|
"global_step": 8120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.3488372266292572, |
|
"eval_mse": 0.3488372266292572, |
|
"eval_runtime": 3.205, |
|
"eval_samples_per_second": 156.005, |
|
"eval_steps_per_second": 19.657, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1989617996694624e-05, |
|
"loss": 0.8208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.3489202857017517, |
|
"eval_mse": 0.3489202857017517, |
|
"eval_runtime": 3.0732, |
|
"eval_samples_per_second": 162.695, |
|
"eval_steps_per_second": 20.5, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.1202897655704163e-05, |
|
"loss": 0.1606, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.3954257369041443, |
|
"eval_mse": 0.3954257369041443, |
|
"eval_runtime": 3.1411, |
|
"eval_samples_per_second": 159.182, |
|
"eval_steps_per_second": 20.057, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.0416177314713701e-05, |
|
"loss": 0.1174, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.22338131070137024, |
|
"eval_mse": 0.22338134050369263, |
|
"eval_runtime": 3.2318, |
|
"eval_samples_per_second": 154.71, |
|
"eval_steps_per_second": 19.493, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 9.62945697372324e-06, |
|
"loss": 0.0932, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.22499865293502808, |
|
"eval_mse": 0.22499865293502808, |
|
"eval_runtime": 3.0935, |
|
"eval_samples_per_second": 161.631, |
|
"eval_steps_per_second": 20.366, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.24710388481616974, |
|
"eval_mse": 0.24710386991500854, |
|
"eval_runtime": 3.1479, |
|
"eval_samples_per_second": 158.836, |
|
"eval_steps_per_second": 20.013, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 8.84273663273278e-06, |
|
"loss": 0.0729, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.16448178887367249, |
|
"eval_mse": 0.16448178887367249, |
|
"eval_runtime": 3.1325, |
|
"eval_samples_per_second": 159.618, |
|
"eval_steps_per_second": 20.112, |
|
"step": 2842 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 8.056016291742319e-06, |
|
"loss": 0.0628, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.16940604150295258, |
|
"eval_mse": 0.16940604150295258, |
|
"eval_runtime": 3.1022, |
|
"eval_samples_per_second": 161.174, |
|
"eval_steps_per_second": 20.308, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.2692959507518586e-06, |
|
"loss": 0.0542, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.13145290315151215, |
|
"eval_mse": 0.13145291805267334, |
|
"eval_runtime": 3.1898, |
|
"eval_samples_per_second": 156.749, |
|
"eval_steps_per_second": 19.75, |
|
"step": 3654 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 6.482575609761398e-06, |
|
"loss": 0.0477, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.16126562654972076, |
|
"eval_mse": 0.16126562654972076, |
|
"eval_runtime": 3.1805, |
|
"eval_samples_per_second": 157.209, |
|
"eval_steps_per_second": 19.808, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.13891386985778809, |
|
"eval_mse": 0.13891386985778809, |
|
"eval_runtime": 3.0894, |
|
"eval_samples_per_second": 161.843, |
|
"eval_steps_per_second": 20.392, |
|
"step": 4466 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 5.695855268770936e-06, |
|
"loss": 0.0416, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.11093080043792725, |
|
"eval_mse": 0.11093080043792725, |
|
"eval_runtime": 3.178, |
|
"eval_samples_per_second": 157.331, |
|
"eval_steps_per_second": 19.824, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 4.909134927780475e-06, |
|
"loss": 0.0369, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.15438133478164673, |
|
"eval_mse": 0.15438130497932434, |
|
"eval_runtime": 3.1682, |
|
"eval_samples_per_second": 157.816, |
|
"eval_steps_per_second": 19.885, |
|
"step": 5278 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 4.122414586790015e-06, |
|
"loss": 0.0337, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.11555473506450653, |
|
"eval_mse": 0.11555473506450653, |
|
"eval_runtime": 3.1267, |
|
"eval_samples_per_second": 159.911, |
|
"eval_steps_per_second": 20.149, |
|
"step": 5684 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 3.335694245799554e-06, |
|
"loss": 0.0299, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.14125587046146393, |
|
"eval_mse": 0.14125585556030273, |
|
"eval_runtime": 3.1173, |
|
"eval_samples_per_second": 160.395, |
|
"eval_steps_per_second": 20.21, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.12824614346027374, |
|
"eval_mse": 0.12824612855911255, |
|
"eval_runtime": 3.141, |
|
"eval_samples_per_second": 159.185, |
|
"eval_steps_per_second": 20.057, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 2.5489739048090933e-06, |
|
"loss": 0.0288, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.11662615835666656, |
|
"eval_mse": 0.11662616580724716, |
|
"eval_runtime": 3.2355, |
|
"eval_samples_per_second": 154.536, |
|
"eval_steps_per_second": 19.471, |
|
"step": 6902 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 1.7622535638186322e-06, |
|
"loss": 0.0263, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.12519659101963043, |
|
"eval_mse": 0.12519659101963043, |
|
"eval_runtime": 3.1453, |
|
"eval_samples_per_second": 158.968, |
|
"eval_steps_per_second": 20.03, |
|
"step": 7308 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"learning_rate": 9.755332228281715e-07, |
|
"loss": 0.0252, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.13828542828559875, |
|
"eval_mse": 0.13828542828559875, |
|
"eval_runtime": 3.121, |
|
"eval_samples_per_second": 160.203, |
|
"eval_steps_per_second": 20.186, |
|
"step": 7714 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 1.8881288183771059e-07, |
|
"loss": 0.0236, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.11976799368858337, |
|
"eval_mse": 0.11976799368858337, |
|
"eval_runtime": 3.0971, |
|
"eval_samples_per_second": 161.439, |
|
"eval_steps_per_second": 20.341, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 8120, |
|
"total_flos": 8550922763068416.0, |
|
"train_loss": 0.10351273032832028, |
|
"train_runtime": 5208.5032, |
|
"train_samples_per_second": 24.959, |
|
"train_steps_per_second": 1.559 |
|
} |
|
], |
|
"max_steps": 8120, |
|
"num_train_epochs": 20, |
|
"total_flos": 8550922763068416.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|