|
{ |
|
"best_metric": 0.86832522726694, |
|
"best_model_checkpoint": "./save_models/mnli/roberta-base_lr1e-05/checkpoint-8286", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 13810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.031363088057901e-06, |
|
"loss": 0.8769, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.86826900855096e-06, |
|
"loss": 0.4908, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8439306358381503, |
|
"eval_loss": 0.40705999732017517, |
|
"eval_runtime": 30.2499, |
|
"eval_samples_per_second": 1298.219, |
|
"eval_steps_per_second": 5.091, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.483090670980664e-06, |
|
"loss": 0.4345, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.09791233341037e-06, |
|
"loss": 0.3978, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.712733995840074e-06, |
|
"loss": 0.3855, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8618318861246212, |
|
"eval_loss": 0.3706605136394501, |
|
"eval_runtime": 30.5079, |
|
"eval_samples_per_second": 1287.241, |
|
"eval_steps_per_second": 5.048, |
|
"step": 2762 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.32755565826978e-06, |
|
"loss": 0.3572, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.942377320699485e-06, |
|
"loss": 0.3368, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.557198983129189e-06, |
|
"loss": 0.3354, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8657533548929235, |
|
"eval_loss": 0.36279234290122986, |
|
"eval_runtime": 30.3356, |
|
"eval_samples_per_second": 1294.551, |
|
"eval_steps_per_second": 5.077, |
|
"step": 4143 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 7.172020645558895e-06, |
|
"loss": 0.3097, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 6.7868423079885995e-06, |
|
"loss": 0.2998, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 6.401663970418303e-06, |
|
"loss": 0.3005, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8673066639504978, |
|
"eval_loss": 0.36057987809181213, |
|
"eval_runtime": 30.2539, |
|
"eval_samples_per_second": 1298.049, |
|
"eval_steps_per_second": 5.09, |
|
"step": 5524 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 6.016485632848009e-06, |
|
"loss": 0.2734, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.631307295277714e-06, |
|
"loss": 0.2724, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8679178019403632, |
|
"eval_loss": 0.3738739490509033, |
|
"eval_runtime": 30.6701, |
|
"eval_samples_per_second": 1280.434, |
|
"eval_steps_per_second": 5.021, |
|
"step": 6905 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 5.2461289577074194e-06, |
|
"loss": 0.2667, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.860950620137123e-06, |
|
"loss": 0.2466, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.475772282566829e-06, |
|
"loss": 0.2481, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.86832522726694, |
|
"eval_loss": 0.3865111470222473, |
|
"eval_runtime": 30.6524, |
|
"eval_samples_per_second": 1281.174, |
|
"eval_steps_per_second": 5.024, |
|
"step": 8286 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.090593944996534e-06, |
|
"loss": 0.2394, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.705415607426239e-06, |
|
"loss": 0.2269, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 3.3202372698559437e-06, |
|
"loss": 0.2264, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8672557357846757, |
|
"eval_loss": 0.3937914967536926, |
|
"eval_runtime": 30.6252, |
|
"eval_samples_per_second": 1282.311, |
|
"eval_steps_per_second": 5.029, |
|
"step": 9667 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 2.935058932285649e-06, |
|
"loss": 0.2181, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 2.5498805947153533e-06, |
|
"loss": 0.2134, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 2.164702257145058e-06, |
|
"loss": 0.2145, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8681469786865627, |
|
"eval_loss": 0.4055633544921875, |
|
"eval_runtime": 30.7036, |
|
"eval_samples_per_second": 1279.035, |
|
"eval_steps_per_second": 5.016, |
|
"step": 11048 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.7795239195747632e-06, |
|
"loss": 0.2017, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 1.3943455820044682e-06, |
|
"loss": 0.2008, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8672811998675868, |
|
"eval_loss": 0.41524767875671387, |
|
"eval_runtime": 30.8769, |
|
"eval_samples_per_second": 1271.856, |
|
"eval_steps_per_second": 4.988, |
|
"step": 12429 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 1.0091672444341732e-06, |
|
"loss": 0.2013, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 6.239889068638781e-07, |
|
"loss": 0.1909, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.3881056929358295e-07, |
|
"loss": 0.1936, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8680960505207405, |
|
"eval_loss": 0.42063575983047485, |
|
"eval_runtime": 30.2907, |
|
"eval_samples_per_second": 1296.472, |
|
"eval_steps_per_second": 5.084, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 13810, |
|
"total_flos": 2.1610790039821677e+17, |
|
"train_loss": 0.29975197396357794, |
|
"train_runtime": 5291.5699, |
|
"train_samples_per_second": 667.913, |
|
"train_steps_per_second": 2.61 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 13810, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.1610790039821677e+17, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|