|
{ |
|
"best_metric": 0.031336501240730286, |
|
"best_model_checkpoint": "logs/google-bert/bert-large-cased_overall-coherence/checkpoint-772", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1351, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.359694242477417, |
|
"learning_rate": 1.901554404145078e-05, |
|
"loss": 0.2918, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_MAE": 0.1806364357471466, |
|
"eval_R2": -0.5334044478520719, |
|
"eval_RMSE": 0.24513159692287445, |
|
"eval_loss": 0.06008950248360634, |
|
"eval_runtime": 14.0168, |
|
"eval_samples_per_second": 205.539, |
|
"eval_steps_per_second": 3.496, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.046112060546875, |
|
"learning_rate": 1.8015544041450778e-05, |
|
"loss": 0.0417, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_MAE": 0.1616782695055008, |
|
"eval_R2": -0.20231869056611274, |
|
"eval_RMSE": 0.21706047654151917, |
|
"eval_loss": 0.047115251421928406, |
|
"eval_runtime": 14.1157, |
|
"eval_samples_per_second": 204.099, |
|
"eval_steps_per_second": 3.471, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.841380596160889, |
|
"learning_rate": 1.7015544041450777e-05, |
|
"loss": 0.0358, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_MAE": 0.14208842813968658, |
|
"eval_R2": 0.06907855860957834, |
|
"eval_RMSE": 0.1909974068403244, |
|
"eval_loss": 0.03648001328110695, |
|
"eval_runtime": 14.1019, |
|
"eval_samples_per_second": 204.299, |
|
"eval_steps_per_second": 3.475, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.935973882675171, |
|
"learning_rate": 1.601554404145078e-05, |
|
"loss": 0.0298, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_MAE": 0.13905900716781616, |
|
"eval_R2": 0.20033410036457755, |
|
"eval_RMSE": 0.17702119052410126, |
|
"eval_loss": 0.031336501240730286, |
|
"eval_runtime": 14.1051, |
|
"eval_samples_per_second": 204.253, |
|
"eval_steps_per_second": 3.474, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.4018096923828125, |
|
"learning_rate": 1.5015544041450778e-05, |
|
"loss": 0.0259, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_MAE": 0.13839736580848694, |
|
"eval_R2": 0.09617620841026953, |
|
"eval_RMSE": 0.188197061419487, |
|
"eval_loss": 0.035418134182691574, |
|
"eval_runtime": 14.0324, |
|
"eval_samples_per_second": 205.311, |
|
"eval_steps_per_second": 3.492, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.8566466569900513, |
|
"learning_rate": 1.4015544041450779e-05, |
|
"loss": 0.0232, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_MAE": 0.13626991212368011, |
|
"eval_R2": 0.18108005143449513, |
|
"eval_RMSE": 0.1791396290063858, |
|
"eval_loss": 0.03209100663661957, |
|
"eval_runtime": 14.0546, |
|
"eval_samples_per_second": 204.987, |
|
"eval_steps_per_second": 3.486, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.615726947784424, |
|
"learning_rate": 1.3015544041450778e-05, |
|
"loss": 0.0203, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_MAE": 0.1330207884311676, |
|
"eval_R2": 0.17193841716478142, |
|
"eval_RMSE": 0.18013672530651093, |
|
"eval_loss": 0.03244924172759056, |
|
"eval_runtime": 14.0517, |
|
"eval_samples_per_second": 205.029, |
|
"eval_steps_per_second": 3.487, |
|
"step": 1351 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7.515647390691597e+16, |
|
"train_batch_size": 60, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|