|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 4950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.22303473949432373, |
|
"eval_loss": 3.9924213886260986, |
|
"eval_runtime": 3.5667, |
|
"eval_samples_per_second": 306.729, |
|
"eval_steps_per_second": 38.411, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.98989898989899e-06, |
|
"loss": 4.3795, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3957952558994293, |
|
"eval_loss": 3.1812195777893066, |
|
"eval_runtime": 3.5362, |
|
"eval_samples_per_second": 309.369, |
|
"eval_steps_per_second": 38.742, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5511882901191711, |
|
"eval_loss": 2.5903849601745605, |
|
"eval_runtime": 3.5231, |
|
"eval_samples_per_second": 310.519, |
|
"eval_steps_per_second": 38.886, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 7.97979797979798e-06, |
|
"loss": 3.2046, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6407678127288818, |
|
"eval_loss": 2.1536314487457275, |
|
"eval_runtime": 3.5444, |
|
"eval_samples_per_second": 308.659, |
|
"eval_steps_per_second": 38.653, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 6.969696969696971e-06, |
|
"loss": 2.3683, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7129799127578735, |
|
"eval_loss": 1.8079293966293335, |
|
"eval_runtime": 3.555, |
|
"eval_samples_per_second": 307.739, |
|
"eval_steps_per_second": 38.538, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7687385678291321, |
|
"eval_loss": 1.5419940948486328, |
|
"eval_runtime": 3.5056, |
|
"eval_samples_per_second": 312.075, |
|
"eval_steps_per_second": 39.081, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 5.95959595959596e-06, |
|
"loss": 1.8065, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8053016662597656, |
|
"eval_loss": 1.3433690071105957, |
|
"eval_runtime": 3.4889, |
|
"eval_samples_per_second": 313.564, |
|
"eval_steps_per_second": 39.267, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 4.94949494949495e-06, |
|
"loss": 1.373, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.825411319732666, |
|
"eval_loss": 1.1881896257400513, |
|
"eval_runtime": 3.5056, |
|
"eval_samples_per_second": 312.068, |
|
"eval_steps_per_second": 39.08, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8400365710258484, |
|
"eval_loss": 1.0700345039367676, |
|
"eval_runtime": 3.4951, |
|
"eval_samples_per_second": 313.014, |
|
"eval_steps_per_second": 39.198, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.93939393939394e-06, |
|
"loss": 1.0931, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.851005494594574, |
|
"eval_loss": 0.9790602326393127, |
|
"eval_runtime": 3.4892, |
|
"eval_samples_per_second": 313.542, |
|
"eval_steps_per_second": 39.264, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 2.9292929292929295e-06, |
|
"loss": 0.8714, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8619744181632996, |
|
"eval_loss": 0.9201710224151611, |
|
"eval_runtime": 3.4883, |
|
"eval_samples_per_second": 313.616, |
|
"eval_steps_per_second": 39.274, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.868372917175293, |
|
"eval_loss": 0.8640827536582947, |
|
"eval_runtime": 3.5075, |
|
"eval_samples_per_second": 311.902, |
|
"eval_steps_per_second": 39.059, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 1.9191919191919192e-06, |
|
"loss": 0.7428, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8747714757919312, |
|
"eval_loss": 0.8372448086738586, |
|
"eval_runtime": 3.5209, |
|
"eval_samples_per_second": 310.718, |
|
"eval_steps_per_second": 38.911, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 0.6531, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8765996098518372, |
|
"eval_loss": 0.8168175220489502, |
|
"eval_runtime": 3.5294, |
|
"eval_samples_per_second": 309.965, |
|
"eval_steps_per_second": 38.816, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8756855726242065, |
|
"eval_loss": 0.810522198677063, |
|
"eval_runtime": 3.5261, |
|
"eval_samples_per_second": 310.256, |
|
"eval_steps_per_second": 38.853, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 4950, |
|
"total_flos": 1.043199019279872e+16, |
|
"train_loss": 1.72243186873619, |
|
"train_runtime": 2003.3488, |
|
"train_samples_per_second": 79.038, |
|
"train_steps_per_second": 2.471 |
|
} |
|
], |
|
"max_steps": 4950, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.043199019279872e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|