|
{ |
|
"best_metric": 0.03419998660683632, |
|
"best_model_checkpoint": "logs/FacebookAI/roberta-large_accuracy-coverage/checkpoint-772", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1351, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.81976318359375, |
|
"learning_rate": 1.901554404145078e-05, |
|
"loss": 0.0819, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_MAE": 0.1725175678730011, |
|
"eval_R2": -0.1497003860798276, |
|
"eval_RMSE": 0.2353871762752533, |
|
"eval_loss": 0.05540712550282478, |
|
"eval_runtime": 14.0688, |
|
"eval_samples_per_second": 204.779, |
|
"eval_steps_per_second": 3.483, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 13.916872024536133, |
|
"learning_rate": 1.8015544041450778e-05, |
|
"loss": 0.0552, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_MAE": 0.14744172990322113, |
|
"eval_R2": 0.2686269017946187, |
|
"eval_RMSE": 0.18774136900901794, |
|
"eval_loss": 0.035246819257736206, |
|
"eval_runtime": 14.01, |
|
"eval_samples_per_second": 205.64, |
|
"eval_steps_per_second": 3.498, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.4010157585144043, |
|
"learning_rate": 1.7015544041450777e-05, |
|
"loss": 0.044, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_MAE": 0.1637636125087738, |
|
"eval_R2": -0.005046362468473786, |
|
"eval_RMSE": 0.22008149325847626, |
|
"eval_loss": 0.048435866832733154, |
|
"eval_runtime": 13.9579, |
|
"eval_samples_per_second": 206.406, |
|
"eval_steps_per_second": 3.511, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.166985034942627, |
|
"learning_rate": 1.601554404145078e-05, |
|
"loss": 0.0377, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_MAE": 0.13679075241088867, |
|
"eval_R2": 0.2903487127030966, |
|
"eval_RMSE": 0.1849323809146881, |
|
"eval_loss": 0.03419998660683632, |
|
"eval_runtime": 13.9498, |
|
"eval_samples_per_second": 206.527, |
|
"eval_steps_per_second": 3.513, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.7769083976745605, |
|
"learning_rate": 1.5015544041450778e-05, |
|
"loss": 0.0309, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_MAE": 0.1612362116575241, |
|
"eval_R2": 0.01970298411235538, |
|
"eval_RMSE": 0.21735484898090363, |
|
"eval_loss": 0.04724312946200371, |
|
"eval_runtime": 13.9679, |
|
"eval_samples_per_second": 206.259, |
|
"eval_steps_per_second": 3.508, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.735123157501221, |
|
"learning_rate": 1.4015544041450779e-05, |
|
"loss": 0.0269, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_MAE": 0.1395827978849411, |
|
"eval_R2": 0.2431203329693593, |
|
"eval_RMSE": 0.19098703563213348, |
|
"eval_loss": 0.036476049572229385, |
|
"eval_runtime": 14.0051, |
|
"eval_samples_per_second": 205.711, |
|
"eval_steps_per_second": 3.499, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 11.766636848449707, |
|
"learning_rate": 1.3015544041450778e-05, |
|
"loss": 0.0232, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_MAE": 0.1361502856016159, |
|
"eval_R2": 0.28310775750843686, |
|
"eval_RMSE": 0.18587347865104675, |
|
"eval_loss": 0.03454894945025444, |
|
"eval_runtime": 13.9169, |
|
"eval_samples_per_second": 207.014, |
|
"eval_steps_per_second": 3.521, |
|
"step": 1351 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7.515640838071338e+16, |
|
"train_batch_size": 60, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|