|
{ |
|
"best_metric": 0.025636129081249237, |
|
"best_model_checkpoint": "logs/google-bert/bert-large-cased_synt_flan_/checkpoint-4204", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 7357, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.776539146900177, |
|
"learning_rate": 1.900190294957184e-05, |
|
"loss": 0.0605, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_MAE": 0.1328457146883011, |
|
"eval_R2": 0.5911350949440748, |
|
"eval_RMSE": 0.18202443420886993, |
|
"eval_loss": 0.033132895827293396, |
|
"eval_runtime": 57.2605, |
|
"eval_samples_per_second": 137.634, |
|
"eval_steps_per_second": 2.305, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3320234417915344, |
|
"learning_rate": 1.800190294957184e-05, |
|
"loss": 0.0315, |
|
"step": 2102 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_MAE": 0.12664468586444855, |
|
"eval_R2": 0.6459152872970175, |
|
"eval_RMSE": 0.1693921983242035, |
|
"eval_loss": 0.02869371511042118, |
|
"eval_runtime": 63.3854, |
|
"eval_samples_per_second": 124.335, |
|
"eval_steps_per_second": 2.082, |
|
"step": 2102 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.6608996391296387, |
|
"learning_rate": 1.7001902949571838e-05, |
|
"loss": 0.0235, |
|
"step": 3153 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_MAE": 0.11449417471885681, |
|
"eval_R2": 0.6718858103880652, |
|
"eval_RMSE": 0.16306181252002716, |
|
"eval_loss": 0.026589158922433853, |
|
"eval_runtime": 63.2127, |
|
"eval_samples_per_second": 124.674, |
|
"eval_steps_per_second": 2.088, |
|
"step": 3153 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.4667591452598572, |
|
"learning_rate": 1.6002854424357757e-05, |
|
"loss": 0.0183, |
|
"step": 4204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_MAE": 0.11234939843416214, |
|
"eval_R2": 0.6836463521967036, |
|
"eval_RMSE": 0.16011285781860352, |
|
"eval_loss": 0.025636129081249237, |
|
"eval_runtime": 63.8057, |
|
"eval_samples_per_second": 123.516, |
|
"eval_steps_per_second": 2.069, |
|
"step": 4204 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.5772440433502197, |
|
"learning_rate": 1.5002854424357756e-05, |
|
"loss": 0.0146, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_MAE": 0.11249976605176926, |
|
"eval_R2": 0.6823521370731253, |
|
"eval_RMSE": 0.16044004261493683, |
|
"eval_loss": 0.025741007179021835, |
|
"eval_runtime": 63.9385, |
|
"eval_samples_per_second": 123.259, |
|
"eval_steps_per_second": 2.064, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.5639244914054871, |
|
"learning_rate": 1.4002854424357755e-05, |
|
"loss": 0.012, |
|
"step": 6306 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_MAE": 0.11984019726514816, |
|
"eval_R2": 0.6479695405721535, |
|
"eval_RMSE": 0.16890010237693787, |
|
"eval_loss": 0.02852724678814411, |
|
"eval_runtime": 63.5115, |
|
"eval_samples_per_second": 124.088, |
|
"eval_steps_per_second": 2.078, |
|
"step": 6306 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.34999290108680725, |
|
"learning_rate": 1.3002854424357757e-05, |
|
"loss": 0.0099, |
|
"step": 7357 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_MAE": 0.11679080128669739, |
|
"eval_R2": 0.6535357331636864, |
|
"eval_RMSE": 0.16755947470664978, |
|
"eval_loss": 0.028076183050870895, |
|
"eval_runtime": 63.3285, |
|
"eval_samples_per_second": 124.446, |
|
"eval_steps_per_second": 2.084, |
|
"step": 7357 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.052312281323627e+17, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|