SushantGautam's picture
Upload folder using huggingface_hub
a47b665 verified
{
"best_metric": 0.031336501240730286,
"best_model_checkpoint": "logs/google-bert/bert-large-cased_overall-coherence/checkpoint-772",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1351,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.359694242477417,
"learning_rate": 1.901554404145078e-05,
"loss": 0.2918,
"step": 193
},
{
"epoch": 1.0,
"eval_MAE": 0.1806364357471466,
"eval_R2": -0.5334044478520719,
"eval_RMSE": 0.24513159692287445,
"eval_loss": 0.06008950248360634,
"eval_runtime": 14.0168,
"eval_samples_per_second": 205.539,
"eval_steps_per_second": 3.496,
"step": 193
},
{
"epoch": 2.0,
"grad_norm": 4.046112060546875,
"learning_rate": 1.8015544041450778e-05,
"loss": 0.0417,
"step": 386
},
{
"epoch": 2.0,
"eval_MAE": 0.1616782695055008,
"eval_R2": -0.20231869056611274,
"eval_RMSE": 0.21706047654151917,
"eval_loss": 0.047115251421928406,
"eval_runtime": 14.1157,
"eval_samples_per_second": 204.099,
"eval_steps_per_second": 3.471,
"step": 386
},
{
"epoch": 3.0,
"grad_norm": 4.841380596160889,
"learning_rate": 1.7015544041450777e-05,
"loss": 0.0358,
"step": 579
},
{
"epoch": 3.0,
"eval_MAE": 0.14208842813968658,
"eval_R2": 0.06907855860957834,
"eval_RMSE": 0.1909974068403244,
"eval_loss": 0.03648001328110695,
"eval_runtime": 14.1019,
"eval_samples_per_second": 204.299,
"eval_steps_per_second": 3.475,
"step": 579
},
{
"epoch": 4.0,
"grad_norm": 1.935973882675171,
"learning_rate": 1.601554404145078e-05,
"loss": 0.0298,
"step": 772
},
{
"epoch": 4.0,
"eval_MAE": 0.13905900716781616,
"eval_R2": 0.20033410036457755,
"eval_RMSE": 0.17702119052410126,
"eval_loss": 0.031336501240730286,
"eval_runtime": 14.1051,
"eval_samples_per_second": 204.253,
"eval_steps_per_second": 3.474,
"step": 772
},
{
"epoch": 5.0,
"grad_norm": 1.4018096923828125,
"learning_rate": 1.5015544041450778e-05,
"loss": 0.0259,
"step": 965
},
{
"epoch": 5.0,
"eval_MAE": 0.13839736580848694,
"eval_R2": 0.09617620841026953,
"eval_RMSE": 0.188197061419487,
"eval_loss": 0.035418134182691574,
"eval_runtime": 14.0324,
"eval_samples_per_second": 205.311,
"eval_steps_per_second": 3.492,
"step": 965
},
{
"epoch": 6.0,
"grad_norm": 1.8566466569900513,
"learning_rate": 1.4015544041450779e-05,
"loss": 0.0232,
"step": 1158
},
{
"epoch": 6.0,
"eval_MAE": 0.13626991212368011,
"eval_R2": 0.18108005143449513,
"eval_RMSE": 0.1791396290063858,
"eval_loss": 0.03209100663661957,
"eval_runtime": 14.0546,
"eval_samples_per_second": 204.987,
"eval_steps_per_second": 3.486,
"step": 1158
},
{
"epoch": 7.0,
"grad_norm": 6.615726947784424,
"learning_rate": 1.3015544041450778e-05,
"loss": 0.0203,
"step": 1351
},
{
"epoch": 7.0,
"eval_MAE": 0.1330207884311676,
"eval_R2": 0.17193841716478142,
"eval_RMSE": 0.18013672530651093,
"eval_loss": 0.03244924172759056,
"eval_runtime": 14.0517,
"eval_samples_per_second": 205.029,
"eval_steps_per_second": 3.487,
"step": 1351
}
],
"logging_steps": 500,
"max_steps": 3860,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7.515647390691597e+16,
"train_batch_size": 60,
"trial_name": null,
"trial_params": null
}