lu-vae's picture
Upload folder using huggingface_hub
e52c1e9 verified
{
"best_metric": 0.86832522726694,
"best_model_checkpoint": "./save_models/mnli/roberta-base_lr1e-05/checkpoint-8286",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 13810,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.36,
"learning_rate": 6.031363088057901e-06,
"loss": 0.8769,
"step": 500
},
{
"epoch": 0.72,
"learning_rate": 9.86826900855096e-06,
"loss": 0.4908,
"step": 1000
},
{
"epoch": 1.0,
"eval_accuracy": 0.8439306358381503,
"eval_loss": 0.40705999732017517,
"eval_runtime": 30.2499,
"eval_samples_per_second": 1298.219,
"eval_steps_per_second": 5.091,
"step": 1381
},
{
"epoch": 1.09,
"learning_rate": 9.483090670980664e-06,
"loss": 0.4345,
"step": 1500
},
{
"epoch": 1.45,
"learning_rate": 9.09791233341037e-06,
"loss": 0.3978,
"step": 2000
},
{
"epoch": 1.81,
"learning_rate": 8.712733995840074e-06,
"loss": 0.3855,
"step": 2500
},
{
"epoch": 2.0,
"eval_accuracy": 0.8618318861246212,
"eval_loss": 0.3706605136394501,
"eval_runtime": 30.5079,
"eval_samples_per_second": 1287.241,
"eval_steps_per_second": 5.048,
"step": 2762
},
{
"epoch": 2.17,
"learning_rate": 8.32755565826978e-06,
"loss": 0.3572,
"step": 3000
},
{
"epoch": 2.53,
"learning_rate": 7.942377320699485e-06,
"loss": 0.3368,
"step": 3500
},
{
"epoch": 2.9,
"learning_rate": 7.557198983129189e-06,
"loss": 0.3354,
"step": 4000
},
{
"epoch": 3.0,
"eval_accuracy": 0.8657533548929235,
"eval_loss": 0.36279234290122986,
"eval_runtime": 30.3356,
"eval_samples_per_second": 1294.551,
"eval_steps_per_second": 5.077,
"step": 4143
},
{
"epoch": 3.26,
"learning_rate": 7.172020645558895e-06,
"loss": 0.3097,
"step": 4500
},
{
"epoch": 3.62,
"learning_rate": 6.7868423079885995e-06,
"loss": 0.2998,
"step": 5000
},
{
"epoch": 3.98,
"learning_rate": 6.401663970418303e-06,
"loss": 0.3005,
"step": 5500
},
{
"epoch": 4.0,
"eval_accuracy": 0.8673066639504978,
"eval_loss": 0.36057987809181213,
"eval_runtime": 30.2539,
"eval_samples_per_second": 1298.049,
"eval_steps_per_second": 5.09,
"step": 5524
},
{
"epoch": 4.34,
"learning_rate": 6.016485632848009e-06,
"loss": 0.2734,
"step": 6000
},
{
"epoch": 4.71,
"learning_rate": 5.631307295277714e-06,
"loss": 0.2724,
"step": 6500
},
{
"epoch": 5.0,
"eval_accuracy": 0.8679178019403632,
"eval_loss": 0.3738739490509033,
"eval_runtime": 30.6701,
"eval_samples_per_second": 1280.434,
"eval_steps_per_second": 5.021,
"step": 6905
},
{
"epoch": 5.07,
"learning_rate": 5.2461289577074194e-06,
"loss": 0.2667,
"step": 7000
},
{
"epoch": 5.43,
"learning_rate": 4.860950620137123e-06,
"loss": 0.2466,
"step": 7500
},
{
"epoch": 5.79,
"learning_rate": 4.475772282566829e-06,
"loss": 0.2481,
"step": 8000
},
{
"epoch": 6.0,
"eval_accuracy": 0.86832522726694,
"eval_loss": 0.3865111470222473,
"eval_runtime": 30.6524,
"eval_samples_per_second": 1281.174,
"eval_steps_per_second": 5.024,
"step": 8286
},
{
"epoch": 6.15,
"learning_rate": 4.090593944996534e-06,
"loss": 0.2394,
"step": 8500
},
{
"epoch": 6.52,
"learning_rate": 3.705415607426239e-06,
"loss": 0.2269,
"step": 9000
},
{
"epoch": 6.88,
"learning_rate": 3.3202372698559437e-06,
"loss": 0.2264,
"step": 9500
},
{
"epoch": 7.0,
"eval_accuracy": 0.8672557357846757,
"eval_loss": 0.3937914967536926,
"eval_runtime": 30.6252,
"eval_samples_per_second": 1282.311,
"eval_steps_per_second": 5.029,
"step": 9667
},
{
"epoch": 7.24,
"learning_rate": 2.935058932285649e-06,
"loss": 0.2181,
"step": 10000
},
{
"epoch": 7.6,
"learning_rate": 2.5498805947153533e-06,
"loss": 0.2134,
"step": 10500
},
{
"epoch": 7.97,
"learning_rate": 2.164702257145058e-06,
"loss": 0.2145,
"step": 11000
},
{
"epoch": 8.0,
"eval_accuracy": 0.8681469786865627,
"eval_loss": 0.4055633544921875,
"eval_runtime": 30.7036,
"eval_samples_per_second": 1279.035,
"eval_steps_per_second": 5.016,
"step": 11048
},
{
"epoch": 8.33,
"learning_rate": 1.7795239195747632e-06,
"loss": 0.2017,
"step": 11500
},
{
"epoch": 8.69,
"learning_rate": 1.3943455820044682e-06,
"loss": 0.2008,
"step": 12000
},
{
"epoch": 9.0,
"eval_accuracy": 0.8672811998675868,
"eval_loss": 0.41524767875671387,
"eval_runtime": 30.8769,
"eval_samples_per_second": 1271.856,
"eval_steps_per_second": 4.988,
"step": 12429
},
{
"epoch": 9.05,
"learning_rate": 1.0091672444341732e-06,
"loss": 0.2013,
"step": 12500
},
{
"epoch": 9.41,
"learning_rate": 6.239889068638781e-07,
"loss": 0.1909,
"step": 13000
},
{
"epoch": 9.78,
"learning_rate": 2.3881056929358295e-07,
"loss": 0.1936,
"step": 13500
},
{
"epoch": 10.0,
"eval_accuracy": 0.8680960505207405,
"eval_loss": 0.42063575983047485,
"eval_runtime": 30.2907,
"eval_samples_per_second": 1296.472,
"eval_steps_per_second": 5.084,
"step": 13810
},
{
"epoch": 10.0,
"step": 13810,
"total_flos": 2.1610790039821677e+17,
"train_loss": 0.29975197396357794,
"train_runtime": 5291.5699,
"train_samples_per_second": 667.913,
"train_steps_per_second": 2.61
}
],
"logging_steps": 500,
"max_steps": 13810,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.1610790039821677e+17,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}