Jeska's picture
End of training
6e44fdb
raw history blame
No virus
5.36 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 4950,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.22303473949432373,
"eval_loss": 3.9924213886260986,
"eval_runtime": 3.5667,
"eval_samples_per_second": 306.729,
"eval_steps_per_second": 38.411,
"step": 330
},
{
"epoch": 1.52,
"learning_rate": 8.98989898989899e-06,
"loss": 4.3795,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.3957952558994293,
"eval_loss": 3.1812195777893066,
"eval_runtime": 3.5362,
"eval_samples_per_second": 309.369,
"eval_steps_per_second": 38.742,
"step": 660
},
{
"epoch": 3.0,
"eval_accuracy": 0.5511882901191711,
"eval_loss": 2.5903849601745605,
"eval_runtime": 3.5231,
"eval_samples_per_second": 310.519,
"eval_steps_per_second": 38.886,
"step": 990
},
{
"epoch": 3.03,
"learning_rate": 7.97979797979798e-06,
"loss": 3.2046,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.6407678127288818,
"eval_loss": 2.1536314487457275,
"eval_runtime": 3.5444,
"eval_samples_per_second": 308.659,
"eval_steps_per_second": 38.653,
"step": 1320
},
{
"epoch": 4.55,
"learning_rate": 6.969696969696971e-06,
"loss": 2.3683,
"step": 1500
},
{
"epoch": 5.0,
"eval_accuracy": 0.7129799127578735,
"eval_loss": 1.8079293966293335,
"eval_runtime": 3.555,
"eval_samples_per_second": 307.739,
"eval_steps_per_second": 38.538,
"step": 1650
},
{
"epoch": 6.0,
"eval_accuracy": 0.7687385678291321,
"eval_loss": 1.5419940948486328,
"eval_runtime": 3.5056,
"eval_samples_per_second": 312.075,
"eval_steps_per_second": 39.081,
"step": 1980
},
{
"epoch": 6.06,
"learning_rate": 5.95959595959596e-06,
"loss": 1.8065,
"step": 2000
},
{
"epoch": 7.0,
"eval_accuracy": 0.8053016662597656,
"eval_loss": 1.3433690071105957,
"eval_runtime": 3.4889,
"eval_samples_per_second": 313.564,
"eval_steps_per_second": 39.267,
"step": 2310
},
{
"epoch": 7.58,
"learning_rate": 4.94949494949495e-06,
"loss": 1.373,
"step": 2500
},
{
"epoch": 8.0,
"eval_accuracy": 0.825411319732666,
"eval_loss": 1.1881896257400513,
"eval_runtime": 3.5056,
"eval_samples_per_second": 312.068,
"eval_steps_per_second": 39.08,
"step": 2640
},
{
"epoch": 9.0,
"eval_accuracy": 0.8400365710258484,
"eval_loss": 1.0700345039367676,
"eval_runtime": 3.4951,
"eval_samples_per_second": 313.014,
"eval_steps_per_second": 39.198,
"step": 2970
},
{
"epoch": 9.09,
"learning_rate": 3.93939393939394e-06,
"loss": 1.0931,
"step": 3000
},
{
"epoch": 10.0,
"eval_accuracy": 0.851005494594574,
"eval_loss": 0.9790602326393127,
"eval_runtime": 3.4892,
"eval_samples_per_second": 313.542,
"eval_steps_per_second": 39.264,
"step": 3300
},
{
"epoch": 10.61,
"learning_rate": 2.9292929292929295e-06,
"loss": 0.8714,
"step": 3500
},
{
"epoch": 11.0,
"eval_accuracy": 0.8619744181632996,
"eval_loss": 0.9201710224151611,
"eval_runtime": 3.4883,
"eval_samples_per_second": 313.616,
"eval_steps_per_second": 39.274,
"step": 3630
},
{
"epoch": 12.0,
"eval_accuracy": 0.868372917175293,
"eval_loss": 0.8640827536582947,
"eval_runtime": 3.5075,
"eval_samples_per_second": 311.902,
"eval_steps_per_second": 39.059,
"step": 3960
},
{
"epoch": 12.12,
"learning_rate": 1.9191919191919192e-06,
"loss": 0.7428,
"step": 4000
},
{
"epoch": 13.0,
"eval_accuracy": 0.8747714757919312,
"eval_loss": 0.8372448086738586,
"eval_runtime": 3.5209,
"eval_samples_per_second": 310.718,
"eval_steps_per_second": 38.911,
"step": 4290
},
{
"epoch": 13.64,
"learning_rate": 9.090909090909091e-07,
"loss": 0.6531,
"step": 4500
},
{
"epoch": 14.0,
"eval_accuracy": 0.8765996098518372,
"eval_loss": 0.8168175220489502,
"eval_runtime": 3.5294,
"eval_samples_per_second": 309.965,
"eval_steps_per_second": 38.816,
"step": 4620
},
{
"epoch": 15.0,
"eval_accuracy": 0.8756855726242065,
"eval_loss": 0.810522198677063,
"eval_runtime": 3.5261,
"eval_samples_per_second": 310.256,
"eval_steps_per_second": 38.853,
"step": 4950
},
{
"epoch": 15.0,
"step": 4950,
"total_flos": 1.043199019279872e+16,
"train_loss": 1.72243186873619,
"train_runtime": 2003.3488,
"train_samples_per_second": 79.038,
"train_steps_per_second": 2.471
}
],
"max_steps": 4950,
"num_train_epochs": 15,
"total_flos": 1.043199019279872e+16,
"trial_name": null,
"trial_params": null
}