compatible-codeparrot-small / trainer_state.json
danielrcardenas's picture
Compatibile model codeparrot-small for code rationales (#1)
5014a75 verified
raw
history blame
18.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.002146082027547257,
"global_step": 29000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.6876,
"step": 500
},
{
"epoch": 0.0,
"eval_loss": 1.4209991693496704,
"eval_runtime": 29774.1937,
"eval_samples_per_second": 24.175,
"eval_steps_per_second": 6.044,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.4255,
"step": 1000
},
{
"epoch": 0.0,
"eval_loss": 1.4117300510406494,
"eval_runtime": 29117.2533,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 6.18,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2975,
"step": 1500
},
{
"epoch": 0.0,
"eval_loss": 1.3992412090301514,
"eval_runtime": 29718.2883,
"eval_samples_per_second": 24.22,
"eval_steps_per_second": 6.055,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.3514,
"step": 2000
},
{
"epoch": 0.0,
"eval_loss": 1.4060639142990112,
"eval_runtime": 28900.2542,
"eval_samples_per_second": 24.906,
"eval_steps_per_second": 6.226,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.3757,
"step": 2500
},
{
"epoch": 0.0,
"eval_loss": 1.399295687675476,
"eval_runtime": 29509.4537,
"eval_samples_per_second": 24.392,
"eval_steps_per_second": 6.098,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.295,
"step": 3000
},
{
"epoch": 0.0,
"eval_loss": 1.393278956413269,
"eval_runtime": 28449.3173,
"eval_samples_per_second": 25.301,
"eval_steps_per_second": 6.325,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2565,
"step": 3500
},
{
"epoch": 0.0,
"eval_loss": 1.3863052129745483,
"eval_runtime": 28789.7552,
"eval_samples_per_second": 25.001,
"eval_steps_per_second": 6.25,
"step": 3500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.3095,
"step": 4000
},
{
"epoch": 0.0,
"eval_loss": 1.3882980346679688,
"eval_runtime": 28567.3896,
"eval_samples_per_second": 25.196,
"eval_steps_per_second": 6.299,
"step": 4000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.3021,
"step": 4500
},
{
"epoch": 0.0,
"eval_loss": 1.3863459825515747,
"eval_runtime": 28424.5337,
"eval_samples_per_second": 25.323,
"eval_steps_per_second": 6.331,
"step": 4500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2299,
"step": 5000
},
{
"epoch": 0.0,
"eval_loss": 1.3776334524154663,
"eval_runtime": 28753.4447,
"eval_samples_per_second": 25.033,
"eval_steps_per_second": 6.258,
"step": 5000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2306,
"step": 5500
},
{
"epoch": 0.0,
"eval_loss": 1.3795045614242554,
"eval_runtime": 28113.3911,
"eval_samples_per_second": 25.603,
"eval_steps_per_second": 6.401,
"step": 5500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2425,
"step": 6000
},
{
"epoch": 0.0,
"eval_loss": 1.3737467527389526,
"eval_runtime": 28752.4369,
"eval_samples_per_second": 25.034,
"eval_steps_per_second": 6.258,
"step": 6000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1932,
"step": 6500
},
{
"epoch": 0.0,
"eval_loss": 1.3799411058425903,
"eval_runtime": 27577.5799,
"eval_samples_per_second": 26.1,
"eval_steps_per_second": 6.525,
"step": 6500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2312,
"step": 7000
},
{
"epoch": 0.0,
"eval_loss": 1.3715393543243408,
"eval_runtime": 28034.4873,
"eval_samples_per_second": 25.675,
"eval_steps_per_second": 6.419,
"step": 7000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2841,
"step": 7500
},
{
"epoch": 0.0,
"eval_loss": 1.3804839849472046,
"eval_runtime": 28127.7804,
"eval_samples_per_second": 25.59,
"eval_steps_per_second": 6.397,
"step": 7500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1463,
"step": 8000
},
{
"epoch": 0.0,
"eval_loss": 1.3776183128356934,
"eval_runtime": 27577.1163,
"eval_samples_per_second": 26.101,
"eval_steps_per_second": 6.525,
"step": 8000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1313,
"step": 8500
},
{
"epoch": 0.0,
"eval_loss": 1.3699731826782227,
"eval_runtime": 28190.8614,
"eval_samples_per_second": 25.533,
"eval_steps_per_second": 6.383,
"step": 8500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2267,
"step": 9000
},
{
"epoch": 0.0,
"eval_loss": 1.366495132446289,
"eval_runtime": 28103.2881,
"eval_samples_per_second": 25.612,
"eval_steps_per_second": 6.403,
"step": 9000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1866,
"step": 9500
},
{
"epoch": 0.0,
"eval_loss": 1.3763595819473267,
"eval_runtime": 27696.0614,
"eval_samples_per_second": 25.989,
"eval_steps_per_second": 6.497,
"step": 9500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2347,
"step": 10000
},
{
"epoch": 0.0,
"eval_loss": 1.3777934312820435,
"eval_runtime": 28118.8859,
"eval_samples_per_second": 25.598,
"eval_steps_per_second": 6.399,
"step": 10000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1514,
"step": 10500
},
{
"epoch": 0.0,
"eval_loss": 1.370295763015747,
"eval_runtime": 27665.6655,
"eval_samples_per_second": 26.017,
"eval_steps_per_second": 6.504,
"step": 10500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2867,
"step": 11000
},
{
"epoch": 0.0,
"eval_loss": 1.372216820716858,
"eval_runtime": 28029.069,
"eval_samples_per_second": 25.68,
"eval_steps_per_second": 6.42,
"step": 11000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.3031,
"step": 11500
},
{
"epoch": 0.0,
"eval_loss": 1.3675533533096313,
"eval_runtime": 28060.4111,
"eval_samples_per_second": 25.651,
"eval_steps_per_second": 6.413,
"step": 11500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2353,
"step": 12000
},
{
"epoch": 0.0,
"eval_loss": 1.3742448091506958,
"eval_runtime": 27439.4619,
"eval_samples_per_second": 26.232,
"eval_steps_per_second": 6.558,
"step": 12000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2022,
"step": 12500
},
{
"epoch": 0.0,
"eval_loss": 1.3690038919448853,
"eval_runtime": 28177.5616,
"eval_samples_per_second": 25.545,
"eval_steps_per_second": 6.386,
"step": 12500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1925,
"step": 13000
},
{
"epoch": 0.0,
"eval_loss": 1.3660128116607666,
"eval_runtime": 28183.7194,
"eval_samples_per_second": 25.539,
"eval_steps_per_second": 6.385,
"step": 13000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2097,
"step": 13500
},
{
"epoch": 0.0,
"eval_loss": 1.3706327676773071,
"eval_runtime": 27638.252,
"eval_samples_per_second": 26.043,
"eval_steps_per_second": 6.511,
"step": 13500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1606,
"step": 14000
},
{
"epoch": 0.0,
"eval_loss": 1.369661569595337,
"eval_runtime": 28259.5204,
"eval_samples_per_second": 25.47,
"eval_steps_per_second": 6.368,
"step": 14000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2216,
"step": 14500
},
{
"epoch": 0.0,
"eval_loss": 1.368302822113037,
"eval_runtime": 28149.9769,
"eval_samples_per_second": 25.57,
"eval_steps_per_second": 6.392,
"step": 14500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1916,
"step": 15000
},
{
"epoch": 0.0,
"eval_loss": 1.3689770698547363,
"eval_runtime": 27702.5821,
"eval_samples_per_second": 25.983,
"eval_steps_per_second": 6.496,
"step": 15000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2369,
"step": 15500
},
{
"epoch": 0.0,
"eval_loss": 1.3695650100708008,
"eval_runtime": 28082.9192,
"eval_samples_per_second": 25.631,
"eval_steps_per_second": 6.408,
"step": 15500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1862,
"step": 16000
},
{
"epoch": 0.0,
"eval_loss": 1.3672432899475098,
"eval_runtime": 27790.3978,
"eval_samples_per_second": 25.9,
"eval_steps_per_second": 6.475,
"step": 16000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1875,
"step": 16500
},
{
"epoch": 0.0,
"eval_loss": 1.372326135635376,
"eval_runtime": 27957.8295,
"eval_samples_per_second": 25.745,
"eval_steps_per_second": 6.436,
"step": 16500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2237,
"step": 17000
},
{
"epoch": 0.0,
"eval_loss": 1.3708332777023315,
"eval_runtime": 28123.0687,
"eval_samples_per_second": 25.594,
"eval_steps_per_second": 6.399,
"step": 17000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1123,
"step": 17500
},
{
"epoch": 0.0,
"eval_loss": 1.3687807321548462,
"eval_runtime": 27597.4681,
"eval_samples_per_second": 26.082,
"eval_steps_per_second": 6.52,
"step": 17500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1219,
"step": 18000
},
{
"epoch": 0.0,
"eval_loss": 1.376206874847412,
"eval_runtime": 28127.0268,
"eval_samples_per_second": 25.59,
"eval_steps_per_second": 6.398,
"step": 18000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1776,
"step": 18500
},
{
"epoch": 0.0,
"eval_loss": 1.368283748626709,
"eval_runtime": 28116.6088,
"eval_samples_per_second": 25.6,
"eval_steps_per_second": 6.4,
"step": 18500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1627,
"step": 19000
},
{
"epoch": 0.0,
"eval_loss": 1.3710017204284668,
"eval_runtime": 27585.1366,
"eval_samples_per_second": 26.093,
"eval_steps_per_second": 6.523,
"step": 19000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0627,
"step": 19500
},
{
"epoch": 0.0,
"eval_loss": 1.3697084188461304,
"eval_runtime": 28272.5185,
"eval_samples_per_second": 25.459,
"eval_steps_per_second": 6.365,
"step": 19500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0632,
"step": 20000
},
{
"epoch": 0.0,
"eval_loss": 1.3792474269866943,
"eval_runtime": 28381.2307,
"eval_samples_per_second": 25.361,
"eval_steps_per_second": 6.34,
"step": 20000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2426,
"step": 20500
},
{
"epoch": 0.0,
"eval_loss": 1.373166799545288,
"eval_runtime": 28634.6137,
"eval_samples_per_second": 25.137,
"eval_steps_per_second": 6.284,
"step": 20500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1263,
"step": 21000
},
{
"epoch": 0.0,
"eval_loss": 1.3743404150009155,
"eval_runtime": 28674.6776,
"eval_samples_per_second": 25.102,
"eval_steps_per_second": 6.275,
"step": 21000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1131,
"step": 21500
},
{
"epoch": 0.0,
"eval_loss": 1.3682280778884888,
"eval_runtime": 28099.2766,
"eval_samples_per_second": 25.616,
"eval_steps_per_second": 6.404,
"step": 21500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0595,
"step": 22000
},
{
"epoch": 0.0,
"eval_loss": 1.3719111680984497,
"eval_runtime": 28695.4529,
"eval_samples_per_second": 25.084,
"eval_steps_per_second": 6.271,
"step": 22000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1468,
"step": 22500
},
{
"epoch": 0.0,
"eval_loss": 1.3663983345031738,
"eval_runtime": 28026.4183,
"eval_samples_per_second": 25.682,
"eval_steps_per_second": 6.421,
"step": 22500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.184,
"step": 23000
},
{
"epoch": 0.0,
"eval_loss": 1.3723489046096802,
"eval_runtime": 28690.9854,
"eval_samples_per_second": 25.087,
"eval_steps_per_second": 6.272,
"step": 23000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1262,
"step": 23500
},
{
"epoch": 0.0,
"eval_loss": 1.3714051246643066,
"eval_runtime": 28156.2291,
"eval_samples_per_second": 25.564,
"eval_steps_per_second": 6.391,
"step": 23500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1758,
"step": 24000
},
{
"epoch": 0.0,
"eval_loss": 1.368726134300232,
"eval_runtime": 28657.2462,
"eval_samples_per_second": 25.117,
"eval_steps_per_second": 6.279,
"step": 24000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0438,
"step": 24500
},
{
"epoch": 0.0,
"eval_loss": 1.3730684518814087,
"eval_runtime": 28686.5378,
"eval_samples_per_second": 25.091,
"eval_steps_per_second": 6.273,
"step": 24500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1404,
"step": 25000
},
{
"epoch": 0.0,
"eval_loss": 1.3706409931182861,
"eval_runtime": 28123.1244,
"eval_samples_per_second": 25.594,
"eval_steps_per_second": 6.399,
"step": 25000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1135,
"step": 25500
},
{
"epoch": 0.0,
"eval_loss": 1.3763220310211182,
"eval_runtime": 28682.6176,
"eval_samples_per_second": 25.095,
"eval_steps_per_second": 6.274,
"step": 25500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0536,
"step": 26000
},
{
"epoch": 0.0,
"eval_loss": 1.3709115982055664,
"eval_runtime": 28032.6358,
"eval_samples_per_second": 25.677,
"eval_steps_per_second": 6.419,
"step": 26000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1203,
"step": 26500
},
{
"epoch": 0.0,
"eval_loss": 1.3710169792175293,
"eval_runtime": 28678.3157,
"eval_samples_per_second": 25.099,
"eval_steps_per_second": 6.275,
"step": 26500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0784,
"step": 27000
},
{
"epoch": 0.0,
"eval_loss": 1.370123028755188,
"eval_runtime": 28008.2176,
"eval_samples_per_second": 25.699,
"eval_steps_per_second": 6.425,
"step": 27000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0521,
"step": 27500
},
{
"epoch": 0.0,
"eval_loss": 1.3733536005020142,
"eval_runtime": 17990.0293,
"eval_samples_per_second": 40.01,
"eval_steps_per_second": 10.003,
"step": 27500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1773,
"step": 28000
},
{
"epoch": 0.0,
"eval_loss": 1.3757646083831787,
"eval_runtime": 18000.6521,
"eval_samples_per_second": 39.987,
"eval_steps_per_second": 9.997,
"step": 28000
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.1688,
"step": 28500
},
{
"epoch": 0.0,
"eval_loss": 1.3776638507843018,
"eval_runtime": 17989.8584,
"eval_samples_per_second": 40.011,
"eval_steps_per_second": 10.003,
"step": 28500
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.0855,
"step": 29000
},
{
"epoch": 0.0,
"eval_loss": 1.3749734163284302,
"eval_runtime": 17990.6544,
"eval_samples_per_second": 40.009,
"eval_steps_per_second": 10.002,
"step": 29000
}
],
"max_steps": 30000,
"num_train_epochs": 1,
"total_flos": 1.5154937856e+16,
"trial_name": null,
"trial_params": null
}