5e-06_AmpGPT2 / trainer_state.json
wabu's picture
Rename trainer_state(1).json to trainer_state.json
5b79e58 verified
raw
history blame contribute delete
No virus
14.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"eval_steps": 500,
"global_step": 7400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.218454865161452,
"eval_loss": 6.010812759399414,
"eval_runtime": 12.8832,
"eval_samples_per_second": 90.738,
"eval_steps_per_second": 2.872,
"step": 148
},
{
"epoch": 2.0,
"eval_accuracy": 0.2297482956165591,
"eval_loss": 5.830421447753906,
"eval_runtime": 12.8689,
"eval_samples_per_second": 90.839,
"eval_steps_per_second": 2.875,
"step": 296
},
{
"epoch": 3.0,
"eval_accuracy": 0.23940862863018553,
"eval_loss": 5.686750888824463,
"eval_runtime": 12.8635,
"eval_samples_per_second": 90.877,
"eval_steps_per_second": 2.876,
"step": 444
},
{
"epoch": 3.38,
"learning_rate": 4.6621621621621625e-06,
"loss": 5.9726,
"step": 500
},
{
"epoch": 4.0,
"eval_accuracy": 0.24915536891584797,
"eval_loss": 5.57318639755249,
"eval_runtime": 12.8537,
"eval_samples_per_second": 90.947,
"eval_steps_per_second": 2.879,
"step": 592
},
{
"epoch": 5.0,
"eval_accuracy": 0.2586860910214204,
"eval_loss": 5.474750518798828,
"eval_runtime": 12.849,
"eval_samples_per_second": 90.98,
"eval_steps_per_second": 2.88,
"step": 740
},
{
"epoch": 6.0,
"eval_accuracy": 0.26733545895222544,
"eval_loss": 5.387089729309082,
"eval_runtime": 12.8597,
"eval_samples_per_second": 90.904,
"eval_steps_per_second": 2.877,
"step": 888
},
{
"epoch": 6.76,
"learning_rate": 4.324324324324325e-06,
"loss": 5.5397,
"step": 1000
},
{
"epoch": 7.0,
"eval_accuracy": 0.27559599415886843,
"eval_loss": 5.308462142944336,
"eval_runtime": 12.8451,
"eval_samples_per_second": 91.008,
"eval_steps_per_second": 2.88,
"step": 1036
},
{
"epoch": 8.0,
"eval_accuracy": 0.2826554682842108,
"eval_loss": 5.240093231201172,
"eval_runtime": 12.8308,
"eval_samples_per_second": 91.109,
"eval_steps_per_second": 2.884,
"step": 1184
},
{
"epoch": 9.0,
"eval_accuracy": 0.2887731031443606,
"eval_loss": 5.181127071380615,
"eval_runtime": 12.8466,
"eval_samples_per_second": 90.997,
"eval_steps_per_second": 2.88,
"step": 1332
},
{
"epoch": 10.0,
"eval_accuracy": 0.2933094849262514,
"eval_loss": 5.127747535705566,
"eval_runtime": 12.8511,
"eval_samples_per_second": 90.965,
"eval_steps_per_second": 2.879,
"step": 1480
},
{
"epoch": 10.14,
"learning_rate": 3.986486486486487e-06,
"loss": 5.2883,
"step": 1500
},
{
"epoch": 11.0,
"eval_accuracy": 0.29827790306832225,
"eval_loss": 5.07957124710083,
"eval_runtime": 12.8494,
"eval_samples_per_second": 90.977,
"eval_steps_per_second": 2.88,
"step": 1628
},
{
"epoch": 12.0,
"eval_accuracy": 0.3029957401214886,
"eval_loss": 5.035754680633545,
"eval_runtime": 12.8272,
"eval_samples_per_second": 91.135,
"eval_steps_per_second": 2.885,
"step": 1776
},
{
"epoch": 13.0,
"eval_accuracy": 0.30672853427344443,
"eval_loss": 4.995058059692383,
"eval_runtime": 12.8564,
"eval_samples_per_second": 90.927,
"eval_steps_per_second": 2.878,
"step": 1924
},
{
"epoch": 13.51,
"learning_rate": 3.648648648648649e-06,
"loss": 5.1076,
"step": 2000
},
{
"epoch": 14.0,
"eval_accuracy": 0.31034035824454986,
"eval_loss": 4.957174777984619,
"eval_runtime": 12.8689,
"eval_samples_per_second": 90.839,
"eval_steps_per_second": 2.875,
"step": 2072
},
{
"epoch": 15.0,
"eval_accuracy": 0.3138916971252301,
"eval_loss": 4.919981479644775,
"eval_runtime": 12.8448,
"eval_samples_per_second": 91.009,
"eval_steps_per_second": 2.881,
"step": 2220
},
{
"epoch": 16.0,
"eval_accuracy": 0.3172183770986166,
"eval_loss": 4.887693405151367,
"eval_runtime": 12.8684,
"eval_samples_per_second": 90.842,
"eval_steps_per_second": 2.875,
"step": 2368
},
{
"epoch": 16.89,
"learning_rate": 3.310810810810811e-06,
"loss": 4.9674,
"step": 2500
},
{
"epoch": 17.0,
"eval_accuracy": 0.32033767961911674,
"eval_loss": 4.855071544647217,
"eval_runtime": 12.87,
"eval_samples_per_second": 90.832,
"eval_steps_per_second": 2.875,
"step": 2516
},
{
"epoch": 18.0,
"eval_accuracy": 0.32318911959630525,
"eval_loss": 4.825829982757568,
"eval_runtime": 12.8411,
"eval_samples_per_second": 91.036,
"eval_steps_per_second": 2.881,
"step": 2664
},
{
"epoch": 19.0,
"eval_accuracy": 0.3264725959336738,
"eval_loss": 4.800779342651367,
"eval_runtime": 12.8589,
"eval_samples_per_second": 90.91,
"eval_steps_per_second": 2.877,
"step": 2812
},
{
"epoch": 20.0,
"eval_accuracy": 0.3288919995506822,
"eval_loss": 4.774311542510986,
"eval_runtime": 12.8368,
"eval_samples_per_second": 91.066,
"eval_steps_per_second": 2.882,
"step": 2960
},
{
"epoch": 20.27,
"learning_rate": 2.9729729729729736e-06,
"loss": 4.858,
"step": 3000
},
{
"epoch": 21.0,
"eval_accuracy": 0.3316915951646491,
"eval_loss": 4.749689102172852,
"eval_runtime": 12.8577,
"eval_samples_per_second": 90.919,
"eval_steps_per_second": 2.878,
"step": 3108
},
{
"epoch": 22.0,
"eval_accuracy": 0.33381721405673503,
"eval_loss": 4.727055549621582,
"eval_runtime": 12.849,
"eval_samples_per_second": 90.98,
"eval_steps_per_second": 2.88,
"step": 3256
},
{
"epoch": 23.0,
"eval_accuracy": 0.33634030640018664,
"eval_loss": 4.705799102783203,
"eval_runtime": 12.8414,
"eval_samples_per_second": 91.034,
"eval_steps_per_second": 2.881,
"step": 3404
},
{
"epoch": 23.65,
"learning_rate": 2.6351351351351353e-06,
"loss": 4.76,
"step": 3500
},
{
"epoch": 24.0,
"eval_accuracy": 0.3384486438378654,
"eval_loss": 4.686633586883545,
"eval_runtime": 12.852,
"eval_samples_per_second": 90.959,
"eval_steps_per_second": 2.879,
"step": 3552
},
{
"epoch": 25.0,
"eval_accuracy": 0.3403841667314721,
"eval_loss": 4.668373107910156,
"eval_runtime": 12.8631,
"eval_samples_per_second": 90.88,
"eval_steps_per_second": 2.876,
"step": 3700
},
{
"epoch": 26.0,
"eval_accuracy": 0.34248386344194726,
"eval_loss": 4.648622512817383,
"eval_runtime": 12.8633,
"eval_samples_per_second": 90.879,
"eval_steps_per_second": 2.876,
"step": 3848
},
{
"epoch": 27.0,
"eval_accuracy": 0.3443416197907216,
"eval_loss": 4.632272243499756,
"eval_runtime": 12.8435,
"eval_samples_per_second": 91.019,
"eval_steps_per_second": 2.881,
"step": 3996
},
{
"epoch": 27.03,
"learning_rate": 2.297297297297298e-06,
"loss": 4.6863,
"step": 4000
},
{
"epoch": 28.0,
"eval_accuracy": 0.34594015432338787,
"eval_loss": 4.615506172180176,
"eval_runtime": 12.8661,
"eval_samples_per_second": 90.859,
"eval_steps_per_second": 2.876,
"step": 4144
},
{
"epoch": 29.0,
"eval_accuracy": 0.34756461103766495,
"eval_loss": 4.601576805114746,
"eval_runtime": 12.8739,
"eval_samples_per_second": 90.804,
"eval_steps_per_second": 2.874,
"step": 4292
},
{
"epoch": 30.0,
"eval_accuracy": 0.34898169029905557,
"eval_loss": 4.5874128341674805,
"eval_runtime": 12.8754,
"eval_samples_per_second": 90.793,
"eval_steps_per_second": 2.874,
"step": 4440
},
{
"epoch": 30.41,
"learning_rate": 1.9594594594594595e-06,
"loss": 4.6168,
"step": 4500
},
{
"epoch": 31.0,
"eval_accuracy": 0.3504592546508714,
"eval_loss": 4.574199676513672,
"eval_runtime": 12.8467,
"eval_samples_per_second": 90.996,
"eval_steps_per_second": 2.88,
"step": 4588
},
{
"epoch": 32.0,
"eval_accuracy": 0.35180720809463323,
"eval_loss": 4.562849998474121,
"eval_runtime": 12.8628,
"eval_samples_per_second": 90.882,
"eval_steps_per_second": 2.877,
"step": 4736
},
{
"epoch": 33.0,
"eval_accuracy": 0.3534230240817067,
"eval_loss": 4.550704479217529,
"eval_runtime": 12.8788,
"eval_samples_per_second": 90.769,
"eval_steps_per_second": 2.873,
"step": 4884
},
{
"epoch": 33.78,
"learning_rate": 1.6216216216216219e-06,
"loss": 4.5684,
"step": 5000
},
{
"epoch": 34.0,
"eval_accuracy": 0.35426117462045603,
"eval_loss": 4.541166305541992,
"eval_runtime": 12.9077,
"eval_samples_per_second": 90.566,
"eval_steps_per_second": 2.866,
"step": 5032
},
{
"epoch": 35.0,
"eval_accuracy": 0.3557905833354935,
"eval_loss": 4.531555652618408,
"eval_runtime": 12.8613,
"eval_samples_per_second": 90.893,
"eval_steps_per_second": 2.877,
"step": 5180
},
{
"epoch": 36.0,
"eval_accuracy": 0.3569570815079797,
"eval_loss": 4.520727157592773,
"eval_runtime": 12.9563,
"eval_samples_per_second": 90.226,
"eval_steps_per_second": 2.856,
"step": 5328
},
{
"epoch": 37.0,
"eval_accuracy": 0.3579594058635975,
"eval_loss": 4.513218879699707,
"eval_runtime": 12.9069,
"eval_samples_per_second": 90.572,
"eval_steps_per_second": 2.867,
"step": 5476
},
{
"epoch": 37.16,
"learning_rate": 1.2837837837837838e-06,
"loss": 4.5277,
"step": 5500
},
{
"epoch": 38.0,
"eval_accuracy": 0.3587629934935324,
"eval_loss": 4.505405426025391,
"eval_runtime": 12.8784,
"eval_samples_per_second": 90.772,
"eval_steps_per_second": 2.873,
"step": 5624
},
{
"epoch": 39.0,
"eval_accuracy": 0.35967891057711415,
"eval_loss": 4.499256610870361,
"eval_runtime": 12.8813,
"eval_samples_per_second": 90.752,
"eval_steps_per_second": 2.872,
"step": 5772
},
{
"epoch": 40.0,
"eval_accuracy": 0.3604306538438275,
"eval_loss": 4.493128776550293,
"eval_runtime": 12.8448,
"eval_samples_per_second": 91.009,
"eval_steps_per_second": 2.881,
"step": 5920
},
{
"epoch": 40.54,
"learning_rate": 9.459459459459461e-07,
"loss": 4.4886,
"step": 6000
},
{
"epoch": 41.0,
"eval_accuracy": 0.3610959898385048,
"eval_loss": 4.487875461578369,
"eval_runtime": 12.8279,
"eval_samples_per_second": 91.13,
"eval_steps_per_second": 2.884,
"step": 6068
},
{
"epoch": 42.0,
"eval_accuracy": 0.3616749185611461,
"eval_loss": 4.482149124145508,
"eval_runtime": 12.9187,
"eval_samples_per_second": 90.489,
"eval_steps_per_second": 2.864,
"step": 6216
},
{
"epoch": 43.0,
"eval_accuracy": 0.3622365658293802,
"eval_loss": 4.477830410003662,
"eval_runtime": 12.8586,
"eval_samples_per_second": 90.912,
"eval_steps_per_second": 2.877,
"step": 6364
},
{
"epoch": 43.92,
"learning_rate": 6.081081081081082e-07,
"loss": 4.4727,
"step": 6500
},
{
"epoch": 44.0,
"eval_accuracy": 0.36262539855354226,
"eval_loss": 4.474149703979492,
"eval_runtime": 12.8383,
"eval_samples_per_second": 91.056,
"eval_steps_per_second": 2.882,
"step": 6512
},
{
"epoch": 45.0,
"eval_accuracy": 0.3629969498232971,
"eval_loss": 4.471028804779053,
"eval_runtime": 12.8363,
"eval_samples_per_second": 91.07,
"eval_steps_per_second": 2.882,
"step": 6660
},
{
"epoch": 46.0,
"eval_accuracy": 0.36328209382101595,
"eval_loss": 4.469077110290527,
"eval_runtime": 12.8487,
"eval_samples_per_second": 90.982,
"eval_steps_per_second": 2.88,
"step": 6808
},
{
"epoch": 47.0,
"eval_accuracy": 0.3634462676378844,
"eval_loss": 4.4663591384887695,
"eval_runtime": 12.8695,
"eval_samples_per_second": 90.835,
"eval_steps_per_second": 2.875,
"step": 6956
},
{
"epoch": 47.3,
"learning_rate": 2.702702702702703e-07,
"loss": 4.4542,
"step": 7000
},
{
"epoch": 48.0,
"eval_accuracy": 0.36362772290916,
"eval_loss": 4.465246200561523,
"eval_runtime": 12.8565,
"eval_samples_per_second": 90.926,
"eval_steps_per_second": 2.878,
"step": 7104
},
{
"epoch": 49.0,
"eval_accuracy": 0.36365364509077086,
"eval_loss": 4.464395046234131,
"eval_runtime": 12.8452,
"eval_samples_per_second": 91.007,
"eval_steps_per_second": 2.88,
"step": 7252
},
{
"epoch": 50.0,
"eval_accuracy": 0.36367956727238165,
"eval_loss": 4.464205741882324,
"eval_runtime": 12.8483,
"eval_samples_per_second": 90.984,
"eval_steps_per_second": 2.88,
"step": 7400
},
{
"epoch": 50.0,
"step": 7400,
"total_flos": 1.00265577216e+17,
"train_loss": 4.855868909681165,
"train_runtime": 6356.4736,
"train_samples_per_second": 37.112,
"train_steps_per_second": 1.164
}
],
"logging_steps": 500,
"max_steps": 7400,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 1.00265577216e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}