bert-large-cased_synt_flan / trainer_state.json
SushantGautam's picture
Upload folder using huggingface_hub
9f88ec9 verified
{
"best_metric": 0.025636129081249237,
"best_model_checkpoint": "logs/google-bert/bert-large-cased_synt_flan_/checkpoint-4204",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 7357,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.776539146900177,
"learning_rate": 1.900190294957184e-05,
"loss": 0.0605,
"step": 1051
},
{
"epoch": 1.0,
"eval_MAE": 0.1328457146883011,
"eval_R2": 0.5911350949440748,
"eval_RMSE": 0.18202443420886993,
"eval_loss": 0.033132895827293396,
"eval_runtime": 57.2605,
"eval_samples_per_second": 137.634,
"eval_steps_per_second": 2.305,
"step": 1051
},
{
"epoch": 2.0,
"grad_norm": 0.3320234417915344,
"learning_rate": 1.800190294957184e-05,
"loss": 0.0315,
"step": 2102
},
{
"epoch": 2.0,
"eval_MAE": 0.12664468586444855,
"eval_R2": 0.6459152872970175,
"eval_RMSE": 0.1693921983242035,
"eval_loss": 0.02869371511042118,
"eval_runtime": 63.3854,
"eval_samples_per_second": 124.335,
"eval_steps_per_second": 2.082,
"step": 2102
},
{
"epoch": 3.0,
"grad_norm": 0.6608996391296387,
"learning_rate": 1.7001902949571838e-05,
"loss": 0.0235,
"step": 3153
},
{
"epoch": 3.0,
"eval_MAE": 0.11449417471885681,
"eval_R2": 0.6718858103880652,
"eval_RMSE": 0.16306181252002716,
"eval_loss": 0.026589158922433853,
"eval_runtime": 63.2127,
"eval_samples_per_second": 124.674,
"eval_steps_per_second": 2.088,
"step": 3153
},
{
"epoch": 4.0,
"grad_norm": 0.4667591452598572,
"learning_rate": 1.6002854424357757e-05,
"loss": 0.0183,
"step": 4204
},
{
"epoch": 4.0,
"eval_MAE": 0.11234939843416214,
"eval_R2": 0.6836463521967036,
"eval_RMSE": 0.16011285781860352,
"eval_loss": 0.025636129081249237,
"eval_runtime": 63.8057,
"eval_samples_per_second": 123.516,
"eval_steps_per_second": 2.069,
"step": 4204
},
{
"epoch": 5.0,
"grad_norm": 0.5772440433502197,
"learning_rate": 1.5002854424357756e-05,
"loss": 0.0146,
"step": 5255
},
{
"epoch": 5.0,
"eval_MAE": 0.11249976605176926,
"eval_R2": 0.6823521370731253,
"eval_RMSE": 0.16044004261493683,
"eval_loss": 0.025741007179021835,
"eval_runtime": 63.9385,
"eval_samples_per_second": 123.259,
"eval_steps_per_second": 2.064,
"step": 5255
},
{
"epoch": 6.0,
"grad_norm": 0.5639244914054871,
"learning_rate": 1.4002854424357755e-05,
"loss": 0.012,
"step": 6306
},
{
"epoch": 6.0,
"eval_MAE": 0.11984019726514816,
"eval_R2": 0.6479695405721535,
"eval_RMSE": 0.16890010237693787,
"eval_loss": 0.02852724678814411,
"eval_runtime": 63.5115,
"eval_samples_per_second": 124.088,
"eval_steps_per_second": 2.078,
"step": 6306
},
{
"epoch": 7.0,
"grad_norm": 0.34999290108680725,
"learning_rate": 1.3002854424357757e-05,
"loss": 0.0099,
"step": 7357
},
{
"epoch": 7.0,
"eval_MAE": 0.11679080128669739,
"eval_R2": 0.6535357331636864,
"eval_RMSE": 0.16755947470664978,
"eval_loss": 0.028076183050870895,
"eval_runtime": 63.3285,
"eval_samples_per_second": 124.446,
"eval_steps_per_second": 2.084,
"step": 7357
}
],
"logging_steps": 500,
"max_steps": 21020,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2.052312281323627e+17,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}