|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 116.47855530474041, |
|
"global_step": 774000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.988713318284425e-05, |
|
"loss": 6.9537, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 6.797055244445801, |
|
"eval_runtime": 118.7849, |
|
"eval_samples_per_second": 103.456, |
|
"eval_steps_per_second": 6.474, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.9774266365688486e-05, |
|
"loss": 6.6451, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 6.689827919006348, |
|
"eval_runtime": 118.3538, |
|
"eval_samples_per_second": 103.833, |
|
"eval_steps_per_second": 6.497, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.966139954853273e-05, |
|
"loss": 6.5518, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 6.630918979644775, |
|
"eval_runtime": 118.3302, |
|
"eval_samples_per_second": 103.853, |
|
"eval_steps_per_second": 6.499, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.954853273137698e-05, |
|
"loss": 6.4713, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 6.497533798217773, |
|
"eval_runtime": 118.3323, |
|
"eval_samples_per_second": 103.852, |
|
"eval_steps_per_second": 6.499, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.9435665914221216e-05, |
|
"loss": 6.0827, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 5.684892654418945, |
|
"eval_runtime": 118.3572, |
|
"eval_samples_per_second": 103.83, |
|
"eval_steps_per_second": 6.497, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.932279909706546e-05, |
|
"loss": 5.0663, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 4.617100715637207, |
|
"eval_runtime": 118.3432, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 6.498, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 4.920993227990971e-05, |
|
"loss": 4.3025, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 4.159748077392578, |
|
"eval_runtime": 118.3398, |
|
"eval_samples_per_second": 103.845, |
|
"eval_steps_per_second": 6.498, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.909706546275395e-05, |
|
"loss": 3.9214, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 3.8544375896453857, |
|
"eval_runtime": 118.3517, |
|
"eval_samples_per_second": 103.835, |
|
"eval_steps_per_second": 6.498, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 4.89841986455982e-05, |
|
"loss": 3.6779, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_loss": 3.664363384246826, |
|
"eval_runtime": 118.2895, |
|
"eval_samples_per_second": 103.889, |
|
"eval_steps_per_second": 6.501, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 4.887133182844244e-05, |
|
"loss": 3.502, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 3.5141005516052246, |
|
"eval_runtime": 118.2983, |
|
"eval_samples_per_second": 103.881, |
|
"eval_steps_per_second": 6.501, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 4.875846501128669e-05, |
|
"loss": 3.366, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"eval_loss": 3.4036142826080322, |
|
"eval_runtime": 118.3098, |
|
"eval_samples_per_second": 103.871, |
|
"eval_steps_per_second": 6.5, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 4.864559819413093e-05, |
|
"loss": 3.2695, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"eval_loss": 3.321831226348877, |
|
"eval_runtime": 118.3723, |
|
"eval_samples_per_second": 103.817, |
|
"eval_steps_per_second": 6.496, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.853273137697517e-05, |
|
"loss": 3.1758, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 3.2432045936584473, |
|
"eval_runtime": 118.355, |
|
"eval_samples_per_second": 103.832, |
|
"eval_steps_per_second": 6.497, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 4.841986455981942e-05, |
|
"loss": 3.1008, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"eval_loss": 3.186511754989624, |
|
"eval_runtime": 118.3602, |
|
"eval_samples_per_second": 103.827, |
|
"eval_steps_per_second": 6.497, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 4.830699774266366e-05, |
|
"loss": 3.0354, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"eval_loss": 3.115652561187744, |
|
"eval_runtime": 118.3431, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 6.498, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 4.81941309255079e-05, |
|
"loss": 2.9798, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"eval_loss": 3.0752041339874268, |
|
"eval_runtime": 118.2033, |
|
"eval_samples_per_second": 103.965, |
|
"eval_steps_per_second": 6.506, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 4.808126410835215e-05, |
|
"loss": 2.9252, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"eval_loss": 3.028315305709839, |
|
"eval_runtime": 118.2129, |
|
"eval_samples_per_second": 103.956, |
|
"eval_steps_per_second": 6.505, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 4.796839729119639e-05, |
|
"loss": 2.881, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"eval_loss": 2.9871439933776855, |
|
"eval_runtime": 118.1982, |
|
"eval_samples_per_second": 103.969, |
|
"eval_steps_per_second": 6.506, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 4.785553047404063e-05, |
|
"loss": 2.8366, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"eval_loss": 2.9422881603240967, |
|
"eval_runtime": 118.1796, |
|
"eval_samples_per_second": 103.986, |
|
"eval_steps_per_second": 6.507, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 4.774266365688488e-05, |
|
"loss": 2.7917, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_loss": 2.9027907848358154, |
|
"eval_runtime": 118.1933, |
|
"eval_samples_per_second": 103.974, |
|
"eval_steps_per_second": 6.506, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 4.762979683972912e-05, |
|
"loss": 2.7592, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"eval_loss": 2.8720462322235107, |
|
"eval_runtime": 118.2133, |
|
"eval_samples_per_second": 103.956, |
|
"eval_steps_per_second": 6.505, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.751693002257336e-05, |
|
"loss": 2.7278, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"eval_loss": 2.8500328063964844, |
|
"eval_runtime": 118.2044, |
|
"eval_samples_per_second": 103.964, |
|
"eval_steps_per_second": 6.506, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 4.740406320541761e-05, |
|
"loss": 2.693, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"eval_loss": 2.817178249359131, |
|
"eval_runtime": 118.1867, |
|
"eval_samples_per_second": 103.98, |
|
"eval_steps_per_second": 6.507, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 4.729119638826185e-05, |
|
"loss": 2.6645, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"eval_loss": 2.786304235458374, |
|
"eval_runtime": 118.2219, |
|
"eval_samples_per_second": 103.949, |
|
"eval_steps_per_second": 6.505, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 4.71783295711061e-05, |
|
"loss": 2.6361, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"eval_loss": 2.770569324493408, |
|
"eval_runtime": 118.2234, |
|
"eval_samples_per_second": 103.947, |
|
"eval_steps_per_second": 6.505, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 4.706546275395034e-05, |
|
"loss": 2.6083, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"eval_loss": 2.7391059398651123, |
|
"eval_runtime": 118.2576, |
|
"eval_samples_per_second": 103.917, |
|
"eval_steps_per_second": 6.503, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 4.695259593679459e-05, |
|
"loss": 2.5847, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"eval_loss": 2.718665838241577, |
|
"eval_runtime": 118.2124, |
|
"eval_samples_per_second": 103.957, |
|
"eval_steps_per_second": 6.505, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 4.6839729119638834e-05, |
|
"loss": 2.5619, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_loss": 2.7032158374786377, |
|
"eval_runtime": 118.2283, |
|
"eval_samples_per_second": 103.943, |
|
"eval_steps_per_second": 6.504, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 4.672686230248307e-05, |
|
"loss": 2.5368, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"eval_loss": 2.6911468505859375, |
|
"eval_runtime": 118.3184, |
|
"eval_samples_per_second": 103.864, |
|
"eval_steps_per_second": 6.499, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 4.661399548532732e-05, |
|
"loss": 2.5203, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"eval_loss": 2.666966676712036, |
|
"eval_runtime": 118.3095, |
|
"eval_samples_per_second": 103.872, |
|
"eval_steps_per_second": 6.5, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"learning_rate": 4.6501128668171564e-05, |
|
"loss": 2.4997, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_loss": 2.6472320556640625, |
|
"eval_runtime": 118.1415, |
|
"eval_samples_per_second": 104.019, |
|
"eval_steps_per_second": 6.509, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 4.63882618510158e-05, |
|
"loss": 2.4755, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"eval_loss": 2.6281678676605225, |
|
"eval_runtime": 118.147, |
|
"eval_samples_per_second": 104.014, |
|
"eval_steps_per_second": 6.509, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"learning_rate": 4.627539503386005e-05, |
|
"loss": 2.4593, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"eval_loss": 2.6077518463134766, |
|
"eval_runtime": 118.4066, |
|
"eval_samples_per_second": 103.786, |
|
"eval_steps_per_second": 6.495, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 4.616252821670429e-05, |
|
"loss": 2.4468, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"eval_loss": 2.60119366645813, |
|
"eval_runtime": 118.2017, |
|
"eval_samples_per_second": 103.966, |
|
"eval_steps_per_second": 6.506, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"learning_rate": 4.604966139954853e-05, |
|
"loss": 2.4243, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"eval_loss": 2.583709239959717, |
|
"eval_runtime": 118.0992, |
|
"eval_samples_per_second": 104.057, |
|
"eval_steps_per_second": 6.511, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 24.38, |
|
"learning_rate": 4.593679458239278e-05, |
|
"loss": 2.4093, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 24.38, |
|
"eval_loss": 2.5716421604156494, |
|
"eval_runtime": 118.1155, |
|
"eval_samples_per_second": 104.042, |
|
"eval_steps_per_second": 6.511, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"learning_rate": 4.582392776523702e-05, |
|
"loss": 2.396, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"eval_loss": 2.561039686203003, |
|
"eval_runtime": 118.1545, |
|
"eval_samples_per_second": 104.008, |
|
"eval_steps_per_second": 6.508, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 25.73, |
|
"learning_rate": 4.571106094808127e-05, |
|
"loss": 2.3764, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 25.73, |
|
"eval_loss": 2.543470859527588, |
|
"eval_runtime": 118.1796, |
|
"eval_samples_per_second": 103.986, |
|
"eval_steps_per_second": 6.507, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 4.559819413092551e-05, |
|
"loss": 2.3623, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"eval_loss": 2.5341155529022217, |
|
"eval_runtime": 118.1214, |
|
"eval_samples_per_second": 104.037, |
|
"eval_steps_per_second": 6.51, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 27.09, |
|
"learning_rate": 4.548532731376975e-05, |
|
"loss": 2.3529, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 27.09, |
|
"eval_loss": 2.5200819969177246, |
|
"eval_runtime": 118.324, |
|
"eval_samples_per_second": 103.859, |
|
"eval_steps_per_second": 6.499, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 27.77, |
|
"learning_rate": 4.5372460496614e-05, |
|
"loss": 2.3393, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 27.77, |
|
"eval_loss": 2.5085155963897705, |
|
"eval_runtime": 118.3431, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 6.498, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"learning_rate": 4.525959367945824e-05, |
|
"loss": 2.3247, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"eval_loss": 2.5002756118774414, |
|
"eval_runtime": 118.3427, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 6.498, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 4.514672686230249e-05, |
|
"loss": 2.3127, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"eval_loss": 2.4838666915893555, |
|
"eval_runtime": 118.3361, |
|
"eval_samples_per_second": 103.848, |
|
"eval_steps_per_second": 6.498, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 4.5033860045146734e-05, |
|
"loss": 2.3006, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"eval_loss": 2.480976104736328, |
|
"eval_runtime": 118.3346, |
|
"eval_samples_per_second": 103.85, |
|
"eval_steps_per_second": 6.499, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 30.47, |
|
"learning_rate": 4.492099322799097e-05, |
|
"loss": 2.2896, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 30.47, |
|
"eval_loss": 2.4641942977905273, |
|
"eval_runtime": 118.3398, |
|
"eval_samples_per_second": 103.845, |
|
"eval_steps_per_second": 6.498, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"learning_rate": 4.480812641083522e-05, |
|
"loss": 2.2789, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"eval_loss": 2.4657058715820312, |
|
"eval_runtime": 118.3281, |
|
"eval_samples_per_second": 103.855, |
|
"eval_steps_per_second": 6.499, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 31.83, |
|
"learning_rate": 4.4695259593679463e-05, |
|
"loss": 2.2665, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 31.83, |
|
"eval_loss": 2.4447216987609863, |
|
"eval_runtime": 118.3432, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 6.498, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 32.51, |
|
"learning_rate": 4.45823927765237e-05, |
|
"loss": 2.2545, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 32.51, |
|
"eval_loss": 2.4484477043151855, |
|
"eval_runtime": 118.3442, |
|
"eval_samples_per_second": 103.841, |
|
"eval_steps_per_second": 6.498, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"learning_rate": 4.446952595936795e-05, |
|
"loss": 2.2446, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"eval_loss": 2.4324302673339844, |
|
"eval_runtime": 118.3492, |
|
"eval_samples_per_second": 103.837, |
|
"eval_steps_per_second": 6.498, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"learning_rate": 4.435665914221219e-05, |
|
"loss": 2.2352, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"eval_loss": 2.426417827606201, |
|
"eval_runtime": 118.3545, |
|
"eval_samples_per_second": 103.832, |
|
"eval_steps_per_second": 6.497, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 34.54, |
|
"learning_rate": 4.424379232505644e-05, |
|
"loss": 2.2218, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 34.54, |
|
"eval_loss": 2.4175431728363037, |
|
"eval_runtime": 121.4712, |
|
"eval_samples_per_second": 101.168, |
|
"eval_steps_per_second": 6.331, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 4.413092550790068e-05, |
|
"loss": 2.2153, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"eval_loss": 2.4122180938720703, |
|
"eval_runtime": 121.3685, |
|
"eval_samples_per_second": 101.254, |
|
"eval_steps_per_second": 6.336, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 35.89, |
|
"learning_rate": 4.401805869074492e-05, |
|
"loss": 2.206, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 35.89, |
|
"eval_loss": 2.392340660095215, |
|
"eval_runtime": 121.4694, |
|
"eval_samples_per_second": 101.169, |
|
"eval_steps_per_second": 6.331, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 36.57, |
|
"learning_rate": 4.390519187358917e-05, |
|
"loss": 2.1931, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 36.57, |
|
"eval_loss": 2.386526584625244, |
|
"eval_runtime": 121.4241, |
|
"eval_samples_per_second": 101.207, |
|
"eval_steps_per_second": 6.333, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"learning_rate": 4.379232505643341e-05, |
|
"loss": 2.1876, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"eval_loss": 2.383101224899292, |
|
"eval_runtime": 121.3529, |
|
"eval_samples_per_second": 101.267, |
|
"eval_steps_per_second": 6.337, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 37.92, |
|
"learning_rate": 4.367945823927765e-05, |
|
"loss": 2.1817, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 37.92, |
|
"eval_loss": 2.3782711029052734, |
|
"eval_runtime": 121.4029, |
|
"eval_samples_per_second": 101.225, |
|
"eval_steps_per_second": 6.334, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"learning_rate": 4.35665914221219e-05, |
|
"loss": 2.1661, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"eval_loss": 2.3761754035949707, |
|
"eval_runtime": 121.5223, |
|
"eval_samples_per_second": 101.126, |
|
"eval_steps_per_second": 6.328, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 39.28, |
|
"learning_rate": 4.3453724604966136e-05, |
|
"loss": 2.1635, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 39.28, |
|
"eval_loss": 2.365755319595337, |
|
"eval_runtime": 121.4762, |
|
"eval_samples_per_second": 101.164, |
|
"eval_steps_per_second": 6.33, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"learning_rate": 4.334085778781038e-05, |
|
"loss": 2.1533, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"eval_loss": 2.359434127807617, |
|
"eval_runtime": 121.2612, |
|
"eval_samples_per_second": 101.343, |
|
"eval_steps_per_second": 6.342, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 40.63, |
|
"learning_rate": 4.322799097065463e-05, |
|
"loss": 2.1444, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 40.63, |
|
"eval_loss": 2.3534085750579834, |
|
"eval_runtime": 121.4588, |
|
"eval_samples_per_second": 101.178, |
|
"eval_steps_per_second": 6.331, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 41.31, |
|
"learning_rate": 4.311512415349887e-05, |
|
"loss": 2.1389, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 41.31, |
|
"eval_loss": 2.3499608039855957, |
|
"eval_runtime": 121.4347, |
|
"eval_samples_per_second": 101.198, |
|
"eval_steps_per_second": 6.333, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"learning_rate": 4.300225733634312e-05, |
|
"loss": 2.1343, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"eval_loss": 2.33479642868042, |
|
"eval_runtime": 121.4769, |
|
"eval_samples_per_second": 101.163, |
|
"eval_steps_per_second": 6.33, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 42.66, |
|
"learning_rate": 4.2889390519187363e-05, |
|
"loss": 2.1204, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 42.66, |
|
"eval_loss": 2.338609457015991, |
|
"eval_runtime": 119.9199, |
|
"eval_samples_per_second": 102.477, |
|
"eval_steps_per_second": 6.413, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 43.34, |
|
"learning_rate": 4.277652370203161e-05, |
|
"loss": 2.1149, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 43.34, |
|
"eval_loss": 2.3366451263427734, |
|
"eval_runtime": 121.3615, |
|
"eval_samples_per_second": 101.259, |
|
"eval_steps_per_second": 6.336, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 4.266365688487585e-05, |
|
"loss": 2.1124, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"eval_loss": 2.3272287845611572, |
|
"eval_runtime": 121.4223, |
|
"eval_samples_per_second": 101.209, |
|
"eval_steps_per_second": 6.333, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 44.7, |
|
"learning_rate": 4.255079006772009e-05, |
|
"loss": 2.0996, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 44.7, |
|
"eval_loss": 2.3160288333892822, |
|
"eval_runtime": 121.4331, |
|
"eval_samples_per_second": 101.2, |
|
"eval_steps_per_second": 6.333, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"learning_rate": 4.243792325056434e-05, |
|
"loss": 2.0954, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"eval_loss": 2.3165717124938965, |
|
"eval_runtime": 121.3269, |
|
"eval_samples_per_second": 101.288, |
|
"eval_steps_per_second": 6.338, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 46.05, |
|
"learning_rate": 4.232505643340858e-05, |
|
"loss": 2.0901, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 46.05, |
|
"eval_loss": 2.303679943084717, |
|
"eval_runtime": 121.3845, |
|
"eval_samples_per_second": 101.24, |
|
"eval_steps_per_second": 6.335, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 46.73, |
|
"learning_rate": 4.221218961625282e-05, |
|
"loss": 2.0819, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 46.73, |
|
"eval_loss": 2.2977073192596436, |
|
"eval_runtime": 121.4379, |
|
"eval_samples_per_second": 101.196, |
|
"eval_steps_per_second": 6.332, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"learning_rate": 4.209932279909707e-05, |
|
"loss": 2.0725, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"eval_loss": 2.3036298751831055, |
|
"eval_runtime": 121.4278, |
|
"eval_samples_per_second": 101.204, |
|
"eval_steps_per_second": 6.333, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 4.198645598194131e-05, |
|
"loss": 2.0729, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"eval_loss": 2.2955193519592285, |
|
"eval_runtime": 121.3513, |
|
"eval_samples_per_second": 101.268, |
|
"eval_steps_per_second": 6.337, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 48.76, |
|
"learning_rate": 4.187358916478555e-05, |
|
"loss": 2.0621, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 48.76, |
|
"eval_loss": 2.284827947616577, |
|
"eval_runtime": 121.3713, |
|
"eval_samples_per_second": 101.251, |
|
"eval_steps_per_second": 6.336, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"learning_rate": 4.17607223476298e-05, |
|
"loss": 2.055, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"eval_loss": 2.2865021228790283, |
|
"eval_runtime": 121.4042, |
|
"eval_samples_per_second": 101.224, |
|
"eval_steps_per_second": 6.334, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 50.11, |
|
"learning_rate": 4.164785553047404e-05, |
|
"loss": 2.0514, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 50.11, |
|
"eval_loss": 2.2722549438476562, |
|
"eval_runtime": 121.3871, |
|
"eval_samples_per_second": 101.238, |
|
"eval_steps_per_second": 6.335, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 50.79, |
|
"learning_rate": 4.153498871331828e-05, |
|
"loss": 2.0427, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 50.79, |
|
"eval_loss": 2.2747364044189453, |
|
"eval_runtime": 121.4348, |
|
"eval_samples_per_second": 101.198, |
|
"eval_steps_per_second": 6.333, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 51.47, |
|
"learning_rate": 4.142212189616253e-05, |
|
"loss": 2.0398, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 51.47, |
|
"eval_loss": 2.275329113006592, |
|
"eval_runtime": 121.4165, |
|
"eval_samples_per_second": 101.214, |
|
"eval_steps_per_second": 6.334, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 52.14, |
|
"learning_rate": 4.130925507900677e-05, |
|
"loss": 2.0373, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 52.14, |
|
"eval_loss": 2.263934850692749, |
|
"eval_runtime": 121.4832, |
|
"eval_samples_per_second": 101.158, |
|
"eval_steps_per_second": 6.33, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 52.82, |
|
"learning_rate": 4.119638826185102e-05, |
|
"loss": 2.0293, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 52.82, |
|
"eval_loss": 2.2591116428375244, |
|
"eval_runtime": 121.453, |
|
"eval_samples_per_second": 101.183, |
|
"eval_steps_per_second": 6.332, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 53.5, |
|
"learning_rate": 4.108352144469526e-05, |
|
"loss": 2.0222, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 53.5, |
|
"eval_loss": 2.251147508621216, |
|
"eval_runtime": 121.2819, |
|
"eval_samples_per_second": 101.326, |
|
"eval_steps_per_second": 6.341, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 54.18, |
|
"learning_rate": 4.097065462753951e-05, |
|
"loss": 2.018, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 54.18, |
|
"eval_loss": 2.2465593814849854, |
|
"eval_runtime": 121.4341, |
|
"eval_samples_per_second": 101.199, |
|
"eval_steps_per_second": 6.333, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 54.85, |
|
"learning_rate": 4.085778781038375e-05, |
|
"loss": 2.0129, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 54.85, |
|
"eval_loss": 2.244495153427124, |
|
"eval_runtime": 121.4224, |
|
"eval_samples_per_second": 101.209, |
|
"eval_steps_per_second": 6.333, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 55.53, |
|
"learning_rate": 4.074492099322799e-05, |
|
"loss": 2.0071, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 55.53, |
|
"eval_loss": 2.244058609008789, |
|
"eval_runtime": 121.4271, |
|
"eval_samples_per_second": 101.205, |
|
"eval_steps_per_second": 6.333, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 56.21, |
|
"learning_rate": 4.063205417607224e-05, |
|
"loss": 2.0026, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 56.21, |
|
"eval_loss": 2.2374625205993652, |
|
"eval_runtime": 121.4591, |
|
"eval_samples_per_second": 101.178, |
|
"eval_steps_per_second": 6.331, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 56.88, |
|
"learning_rate": 4.0519187358916484e-05, |
|
"loss": 1.9989, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 56.88, |
|
"eval_loss": 2.2266647815704346, |
|
"eval_runtime": 121.4333, |
|
"eval_samples_per_second": 101.2, |
|
"eval_steps_per_second": 6.333, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 57.56, |
|
"learning_rate": 4.040632054176072e-05, |
|
"loss": 1.9917, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 57.56, |
|
"eval_loss": 2.2338218688964844, |
|
"eval_runtime": 121.4506, |
|
"eval_samples_per_second": 101.185, |
|
"eval_steps_per_second": 6.332, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"learning_rate": 4.029345372460497e-05, |
|
"loss": 1.9869, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"eval_loss": 2.226421594619751, |
|
"eval_runtime": 121.395, |
|
"eval_samples_per_second": 101.232, |
|
"eval_steps_per_second": 6.335, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 58.92, |
|
"learning_rate": 4.018058690744921e-05, |
|
"loss": 1.9855, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 58.92, |
|
"eval_loss": 2.22316837310791, |
|
"eval_runtime": 121.4361, |
|
"eval_samples_per_second": 101.197, |
|
"eval_steps_per_second": 6.333, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 59.59, |
|
"learning_rate": 4.006772009029345e-05, |
|
"loss": 1.975, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 59.59, |
|
"eval_loss": 2.221580743789673, |
|
"eval_runtime": 121.4217, |
|
"eval_samples_per_second": 101.209, |
|
"eval_steps_per_second": 6.333, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 60.27, |
|
"learning_rate": 3.99548532731377e-05, |
|
"loss": 1.9738, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 60.27, |
|
"eval_loss": 2.2099127769470215, |
|
"eval_runtime": 121.4414, |
|
"eval_samples_per_second": 101.193, |
|
"eval_steps_per_second": 6.332, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"learning_rate": 3.984198645598194e-05, |
|
"loss": 1.9724, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"eval_loss": 2.217116355895996, |
|
"eval_runtime": 121.4225, |
|
"eval_samples_per_second": 101.209, |
|
"eval_steps_per_second": 6.333, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 61.63, |
|
"learning_rate": 3.972911963882618e-05, |
|
"loss": 1.9643, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 61.63, |
|
"eval_loss": 2.2091891765594482, |
|
"eval_runtime": 121.3749, |
|
"eval_samples_per_second": 101.248, |
|
"eval_steps_per_second": 6.336, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 62.3, |
|
"learning_rate": 3.961625282167043e-05, |
|
"loss": 1.9582, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 62.3, |
|
"eval_loss": 2.2050740718841553, |
|
"eval_runtime": 121.3877, |
|
"eval_samples_per_second": 101.238, |
|
"eval_steps_per_second": 6.335, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 62.98, |
|
"learning_rate": 3.950338600451467e-05, |
|
"loss": 1.9596, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 62.98, |
|
"eval_loss": 2.2095320224761963, |
|
"eval_runtime": 121.3734, |
|
"eval_samples_per_second": 101.25, |
|
"eval_steps_per_second": 6.336, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 63.66, |
|
"learning_rate": 3.939051918735892e-05, |
|
"loss": 1.9491, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 63.66, |
|
"eval_loss": 2.201195478439331, |
|
"eval_runtime": 121.4462, |
|
"eval_samples_per_second": 101.189, |
|
"eval_steps_per_second": 6.332, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 64.33, |
|
"learning_rate": 3.927765237020316e-05, |
|
"loss": 1.9493, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 64.33, |
|
"eval_loss": 2.1953182220458984, |
|
"eval_runtime": 121.4109, |
|
"eval_samples_per_second": 101.218, |
|
"eval_steps_per_second": 6.334, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"learning_rate": 3.916478555304741e-05, |
|
"loss": 1.946, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"eval_loss": 2.1867878437042236, |
|
"eval_runtime": 119.4236, |
|
"eval_samples_per_second": 102.903, |
|
"eval_steps_per_second": 6.439, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 65.69, |
|
"learning_rate": 3.9051918735891654e-05, |
|
"loss": 1.9359, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 65.69, |
|
"eval_loss": 2.1980998516082764, |
|
"eval_runtime": 121.4109, |
|
"eval_samples_per_second": 101.218, |
|
"eval_steps_per_second": 6.334, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"learning_rate": 3.893905191873589e-05, |
|
"loss": 1.9333, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"eval_loss": 2.1945624351501465, |
|
"eval_runtime": 121.3328, |
|
"eval_samples_per_second": 101.283, |
|
"eval_steps_per_second": 6.338, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 67.04, |
|
"learning_rate": 3.882618510158014e-05, |
|
"loss": 1.9327, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 67.04, |
|
"eval_loss": 2.1841721534729004, |
|
"eval_runtime": 121.4364, |
|
"eval_samples_per_second": 101.197, |
|
"eval_steps_per_second": 6.333, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 67.72, |
|
"learning_rate": 3.8713318284424384e-05, |
|
"loss": 1.9243, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 67.72, |
|
"eval_loss": 2.189509391784668, |
|
"eval_runtime": 121.4244, |
|
"eval_samples_per_second": 101.207, |
|
"eval_steps_per_second": 6.333, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"learning_rate": 3.860045146726862e-05, |
|
"loss": 1.9241, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"eval_loss": 2.170930862426758, |
|
"eval_runtime": 121.4233, |
|
"eval_samples_per_second": 101.208, |
|
"eval_steps_per_second": 6.333, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 69.07, |
|
"learning_rate": 3.848758465011287e-05, |
|
"loss": 1.9183, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 69.07, |
|
"eval_loss": 2.18388032913208, |
|
"eval_runtime": 121.4294, |
|
"eval_samples_per_second": 101.203, |
|
"eval_steps_per_second": 6.333, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 69.75, |
|
"learning_rate": 3.837471783295711e-05, |
|
"loss": 1.916, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 69.75, |
|
"eval_loss": 2.179508686065674, |
|
"eval_runtime": 121.3882, |
|
"eval_samples_per_second": 101.237, |
|
"eval_steps_per_second": 6.335, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 70.43, |
|
"learning_rate": 3.826185101580136e-05, |
|
"loss": 1.9105, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 70.43, |
|
"eval_loss": 2.1771745681762695, |
|
"eval_runtime": 121.4394, |
|
"eval_samples_per_second": 101.194, |
|
"eval_steps_per_second": 6.332, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"learning_rate": 3.81489841986456e-05, |
|
"loss": 1.9117, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"eval_loss": 2.181852340698242, |
|
"eval_runtime": 121.3465, |
|
"eval_samples_per_second": 101.272, |
|
"eval_steps_per_second": 6.337, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 71.78, |
|
"learning_rate": 3.803611738148984e-05, |
|
"loss": 1.9041, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 71.78, |
|
"eval_loss": 2.1662580966949463, |
|
"eval_runtime": 121.4449, |
|
"eval_samples_per_second": 101.19, |
|
"eval_steps_per_second": 6.332, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 72.46, |
|
"learning_rate": 3.792325056433409e-05, |
|
"loss": 1.9021, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 72.46, |
|
"eval_loss": 2.165590524673462, |
|
"eval_runtime": 121.4569, |
|
"eval_samples_per_second": 101.18, |
|
"eval_steps_per_second": 6.331, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 73.14, |
|
"learning_rate": 3.781038374717833e-05, |
|
"loss": 1.8995, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 73.14, |
|
"eval_loss": 2.1610703468322754, |
|
"eval_runtime": 121.4207, |
|
"eval_samples_per_second": 101.21, |
|
"eval_steps_per_second": 6.333, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 73.81, |
|
"learning_rate": 3.769751693002257e-05, |
|
"loss": 1.896, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 73.81, |
|
"eval_loss": 2.1603007316589355, |
|
"eval_runtime": 121.3989, |
|
"eval_samples_per_second": 101.228, |
|
"eval_steps_per_second": 6.334, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 74.49, |
|
"learning_rate": 3.758465011286682e-05, |
|
"loss": 1.8901, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 74.49, |
|
"eval_loss": 2.158705711364746, |
|
"eval_runtime": 121.4675, |
|
"eval_samples_per_second": 101.171, |
|
"eval_steps_per_second": 6.331, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 75.17, |
|
"learning_rate": 3.747178329571106e-05, |
|
"loss": 1.8887, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 75.17, |
|
"eval_loss": 2.152022123336792, |
|
"eval_runtime": 121.4184, |
|
"eval_samples_per_second": 101.212, |
|
"eval_steps_per_second": 6.333, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 75.85, |
|
"learning_rate": 3.735891647855531e-05, |
|
"loss": 1.8855, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 75.85, |
|
"eval_loss": 2.1545896530151367, |
|
"eval_runtime": 121.4036, |
|
"eval_samples_per_second": 101.224, |
|
"eval_steps_per_second": 6.334, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 76.52, |
|
"learning_rate": 3.7246049661399554e-05, |
|
"loss": 1.8802, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 76.52, |
|
"eval_loss": 2.1495370864868164, |
|
"eval_runtime": 121.4898, |
|
"eval_samples_per_second": 101.153, |
|
"eval_steps_per_second": 6.33, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 77.2, |
|
"learning_rate": 3.71331828442438e-05, |
|
"loss": 1.8783, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 77.2, |
|
"eval_loss": 2.1447861194610596, |
|
"eval_runtime": 121.3997, |
|
"eval_samples_per_second": 101.228, |
|
"eval_steps_per_second": 6.334, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 77.88, |
|
"learning_rate": 3.702031602708804e-05, |
|
"loss": 1.8743, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 77.88, |
|
"eval_loss": 2.1373064517974854, |
|
"eval_runtime": 121.3755, |
|
"eval_samples_per_second": 101.248, |
|
"eval_steps_per_second": 6.336, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 78.56, |
|
"learning_rate": 3.6907449209932284e-05, |
|
"loss": 1.8679, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 78.56, |
|
"eval_loss": 2.1462478637695312, |
|
"eval_runtime": 121.3988, |
|
"eval_samples_per_second": 101.228, |
|
"eval_steps_per_second": 6.334, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 79.23, |
|
"learning_rate": 3.679458239277653e-05, |
|
"loss": 1.8681, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 79.23, |
|
"eval_loss": 2.14402174949646, |
|
"eval_runtime": 121.4163, |
|
"eval_samples_per_second": 101.214, |
|
"eval_steps_per_second": 6.334, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 79.91, |
|
"learning_rate": 3.668171557562077e-05, |
|
"loss": 1.8649, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 79.91, |
|
"eval_loss": 2.1338822841644287, |
|
"eval_runtime": 121.3916, |
|
"eval_samples_per_second": 101.234, |
|
"eval_steps_per_second": 6.335, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 80.59, |
|
"learning_rate": 3.656884875846501e-05, |
|
"loss": 1.8593, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 80.59, |
|
"eval_loss": 2.139404296875, |
|
"eval_runtime": 121.4277, |
|
"eval_samples_per_second": 101.204, |
|
"eval_steps_per_second": 6.333, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 81.26, |
|
"learning_rate": 3.645598194130926e-05, |
|
"loss": 1.8592, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 81.26, |
|
"eval_loss": 2.1354503631591797, |
|
"eval_runtime": 121.402, |
|
"eval_samples_per_second": 101.226, |
|
"eval_steps_per_second": 6.334, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 81.94, |
|
"learning_rate": 3.63431151241535e-05, |
|
"loss": 1.8569, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 81.94, |
|
"eval_loss": 2.135469436645508, |
|
"eval_runtime": 121.3789, |
|
"eval_samples_per_second": 101.245, |
|
"eval_steps_per_second": 6.336, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 82.62, |
|
"learning_rate": 3.623024830699774e-05, |
|
"loss": 1.849, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 82.62, |
|
"eval_loss": 2.1346044540405273, |
|
"eval_runtime": 121.4745, |
|
"eval_samples_per_second": 101.165, |
|
"eval_steps_per_second": 6.331, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 83.3, |
|
"learning_rate": 3.611738148984199e-05, |
|
"loss": 1.8481, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 83.3, |
|
"eval_loss": 2.1314146518707275, |
|
"eval_runtime": 121.3262, |
|
"eval_samples_per_second": 101.289, |
|
"eval_steps_per_second": 6.338, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"learning_rate": 3.600451467268623e-05, |
|
"loss": 1.8499, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"eval_loss": 2.126936197280884, |
|
"eval_runtime": 121.4278, |
|
"eval_samples_per_second": 101.204, |
|
"eval_steps_per_second": 6.333, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 84.65, |
|
"learning_rate": 3.589164785553047e-05, |
|
"loss": 1.8394, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 84.65, |
|
"eval_loss": 2.118168592453003, |
|
"eval_runtime": 121.4516, |
|
"eval_samples_per_second": 101.184, |
|
"eval_steps_per_second": 6.332, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 85.33, |
|
"learning_rate": 3.577878103837472e-05, |
|
"loss": 1.8394, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 85.33, |
|
"eval_loss": 2.1206483840942383, |
|
"eval_runtime": 121.4259, |
|
"eval_samples_per_second": 101.206, |
|
"eval_steps_per_second": 6.333, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 3.566591422121896e-05, |
|
"loss": 1.8408, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 2.1219327449798584, |
|
"eval_runtime": 118.4077, |
|
"eval_samples_per_second": 103.785, |
|
"eval_steps_per_second": 6.495, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 86.68, |
|
"learning_rate": 3.555304740406321e-05, |
|
"loss": 1.832, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 86.68, |
|
"eval_loss": 2.1104817390441895, |
|
"eval_runtime": 118.1444, |
|
"eval_samples_per_second": 104.017, |
|
"eval_steps_per_second": 6.509, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 87.36, |
|
"learning_rate": 3.5440180586907454e-05, |
|
"loss": 1.8305, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 87.36, |
|
"eval_loss": 2.1246631145477295, |
|
"eval_runtime": 118.1028, |
|
"eval_samples_per_second": 104.053, |
|
"eval_steps_per_second": 6.511, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 88.04, |
|
"learning_rate": 3.53273137697517e-05, |
|
"loss": 1.8333, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 88.04, |
|
"eval_loss": 2.1201488971710205, |
|
"eval_runtime": 118.0958, |
|
"eval_samples_per_second": 104.06, |
|
"eval_steps_per_second": 6.512, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 88.71, |
|
"learning_rate": 3.521444695259594e-05, |
|
"loss": 1.8227, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 88.71, |
|
"eval_loss": 2.104985475540161, |
|
"eval_runtime": 118.0975, |
|
"eval_samples_per_second": 104.058, |
|
"eval_steps_per_second": 6.512, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 89.39, |
|
"learning_rate": 3.5101580135440183e-05, |
|
"loss": 1.8216, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 89.39, |
|
"eval_loss": 2.1118545532226562, |
|
"eval_runtime": 118.0937, |
|
"eval_samples_per_second": 104.061, |
|
"eval_steps_per_second": 6.512, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 90.07, |
|
"learning_rate": 3.498871331828443e-05, |
|
"loss": 1.8234, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 90.07, |
|
"eval_loss": 2.109297037124634, |
|
"eval_runtime": 118.1025, |
|
"eval_samples_per_second": 104.054, |
|
"eval_steps_per_second": 6.511, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 90.74, |
|
"learning_rate": 3.487584650112867e-05, |
|
"loss": 1.8162, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 90.74, |
|
"eval_loss": 2.0999834537506104, |
|
"eval_runtime": 118.0968, |
|
"eval_samples_per_second": 104.059, |
|
"eval_steps_per_second": 6.512, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 91.42, |
|
"learning_rate": 3.476297968397291e-05, |
|
"loss": 1.8153, |
|
"step": 607500 |
|
}, |
|
{ |
|
"epoch": 91.42, |
|
"eval_loss": 2.110783576965332, |
|
"eval_runtime": 118.1749, |
|
"eval_samples_per_second": 103.99, |
|
"eval_steps_per_second": 6.507, |
|
"step": 607500 |
|
}, |
|
{ |
|
"epoch": 92.1, |
|
"learning_rate": 3.465011286681716e-05, |
|
"loss": 1.8153, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 92.1, |
|
"eval_loss": 2.1009647846221924, |
|
"eval_runtime": 118.1986, |
|
"eval_samples_per_second": 103.969, |
|
"eval_steps_per_second": 6.506, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 92.78, |
|
"learning_rate": 3.4537246049661404e-05, |
|
"loss": 1.8095, |
|
"step": 616500 |
|
}, |
|
{ |
|
"epoch": 92.78, |
|
"eval_loss": 2.0992209911346436, |
|
"eval_runtime": 118.1834, |
|
"eval_samples_per_second": 103.982, |
|
"eval_steps_per_second": 6.507, |
|
"step": 616500 |
|
}, |
|
{ |
|
"epoch": 93.45, |
|
"learning_rate": 3.442437923250564e-05, |
|
"loss": 1.807, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 93.45, |
|
"eval_loss": 2.098292827606201, |
|
"eval_runtime": 118.1816, |
|
"eval_samples_per_second": 103.984, |
|
"eval_steps_per_second": 6.507, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 94.13, |
|
"learning_rate": 3.431151241534989e-05, |
|
"loss": 1.805, |
|
"step": 625500 |
|
}, |
|
{ |
|
"epoch": 94.13, |
|
"eval_loss": 2.0988106727600098, |
|
"eval_runtime": 118.1659, |
|
"eval_samples_per_second": 103.998, |
|
"eval_steps_per_second": 6.508, |
|
"step": 625500 |
|
}, |
|
{ |
|
"epoch": 94.81, |
|
"learning_rate": 3.4198645598194133e-05, |
|
"loss": 1.8015, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 94.81, |
|
"eval_loss": 2.0965840816497803, |
|
"eval_runtime": 118.1678, |
|
"eval_samples_per_second": 103.996, |
|
"eval_steps_per_second": 6.508, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 95.49, |
|
"learning_rate": 3.408577878103837e-05, |
|
"loss": 1.7964, |
|
"step": 634500 |
|
}, |
|
{ |
|
"epoch": 95.49, |
|
"eval_loss": 2.0856127738952637, |
|
"eval_runtime": 118.3087, |
|
"eval_samples_per_second": 103.872, |
|
"eval_steps_per_second": 6.5, |
|
"step": 634500 |
|
}, |
|
{ |
|
"epoch": 96.16, |
|
"learning_rate": 3.397291196388262e-05, |
|
"loss": 1.7988, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 96.16, |
|
"eval_loss": 2.0870988368988037, |
|
"eval_runtime": 118.2923, |
|
"eval_samples_per_second": 103.887, |
|
"eval_steps_per_second": 6.501, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 96.84, |
|
"learning_rate": 3.386004514672686e-05, |
|
"loss": 1.794, |
|
"step": 643500 |
|
}, |
|
{ |
|
"epoch": 96.84, |
|
"eval_loss": 2.092172861099243, |
|
"eval_runtime": 118.3047, |
|
"eval_samples_per_second": 103.876, |
|
"eval_steps_per_second": 6.5, |
|
"step": 643500 |
|
}, |
|
{ |
|
"epoch": 97.52, |
|
"learning_rate": 3.374717832957111e-05, |
|
"loss": 1.7917, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 97.52, |
|
"eval_loss": 2.0861566066741943, |
|
"eval_runtime": 118.2327, |
|
"eval_samples_per_second": 103.939, |
|
"eval_steps_per_second": 6.504, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 98.19, |
|
"learning_rate": 3.3634311512415354e-05, |
|
"loss": 1.79, |
|
"step": 652500 |
|
}, |
|
{ |
|
"epoch": 98.19, |
|
"eval_loss": 2.0844566822052, |
|
"eval_runtime": 118.1297, |
|
"eval_samples_per_second": 104.03, |
|
"eval_steps_per_second": 6.51, |
|
"step": 652500 |
|
}, |
|
{ |
|
"epoch": 98.87, |
|
"learning_rate": 3.35214446952596e-05, |
|
"loss": 1.788, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 98.87, |
|
"eval_loss": 2.0832607746124268, |
|
"eval_runtime": 118.1145, |
|
"eval_samples_per_second": 104.043, |
|
"eval_steps_per_second": 6.511, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 99.55, |
|
"learning_rate": 3.3408577878103845e-05, |
|
"loss": 1.7833, |
|
"step": 661500 |
|
}, |
|
{ |
|
"epoch": 99.55, |
|
"eval_loss": 2.082475185394287, |
|
"eval_runtime": 118.0929, |
|
"eval_samples_per_second": 104.062, |
|
"eval_steps_per_second": 6.512, |
|
"step": 661500 |
|
}, |
|
{ |
|
"epoch": 100.23, |
|
"learning_rate": 3.3295711060948083e-05, |
|
"loss": 1.7821, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 100.23, |
|
"eval_loss": 2.0830888748168945, |
|
"eval_runtime": 118.1331, |
|
"eval_samples_per_second": 104.027, |
|
"eval_steps_per_second": 6.51, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 100.9, |
|
"learning_rate": 3.318284424379233e-05, |
|
"loss": 1.7809, |
|
"step": 670500 |
|
}, |
|
{ |
|
"epoch": 100.9, |
|
"eval_loss": 2.080984115600586, |
|
"eval_runtime": 118.0711, |
|
"eval_samples_per_second": 104.081, |
|
"eval_steps_per_second": 6.513, |
|
"step": 670500 |
|
}, |
|
{ |
|
"epoch": 101.58, |
|
"learning_rate": 3.3069977426636574e-05, |
|
"loss": 1.7757, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 101.58, |
|
"eval_loss": 2.078061819076538, |
|
"eval_runtime": 118.0799, |
|
"eval_samples_per_second": 104.074, |
|
"eval_steps_per_second": 6.513, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 102.26, |
|
"learning_rate": 3.295711060948081e-05, |
|
"loss": 1.7737, |
|
"step": 679500 |
|
}, |
|
{ |
|
"epoch": 102.26, |
|
"eval_loss": 2.079832077026367, |
|
"eval_runtime": 118.3274, |
|
"eval_samples_per_second": 103.856, |
|
"eval_steps_per_second": 6.499, |
|
"step": 679500 |
|
}, |
|
{ |
|
"epoch": 102.93, |
|
"learning_rate": 3.284424379232506e-05, |
|
"loss": 1.7738, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 102.93, |
|
"eval_loss": 2.0702972412109375, |
|
"eval_runtime": 118.0753, |
|
"eval_samples_per_second": 104.078, |
|
"eval_steps_per_second": 6.513, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 103.61, |
|
"learning_rate": 3.2731376975169304e-05, |
|
"loss": 1.7701, |
|
"step": 688500 |
|
}, |
|
{ |
|
"epoch": 103.61, |
|
"eval_loss": 2.0662286281585693, |
|
"eval_runtime": 118.9742, |
|
"eval_samples_per_second": 103.291, |
|
"eval_steps_per_second": 6.464, |
|
"step": 688500 |
|
}, |
|
{ |
|
"epoch": 104.29, |
|
"learning_rate": 3.261851015801354e-05, |
|
"loss": 1.7682, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 104.29, |
|
"eval_loss": 2.070204973220825, |
|
"eval_runtime": 118.3434, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 6.498, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 104.97, |
|
"learning_rate": 3.250564334085779e-05, |
|
"loss": 1.7669, |
|
"step": 697500 |
|
}, |
|
{ |
|
"epoch": 104.97, |
|
"eval_loss": 2.063176155090332, |
|
"eval_runtime": 118.357, |
|
"eval_samples_per_second": 103.83, |
|
"eval_steps_per_second": 6.497, |
|
"step": 697500 |
|
}, |
|
{ |
|
"epoch": 105.64, |
|
"learning_rate": 3.239277652370203e-05, |
|
"loss": 1.7616, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 105.64, |
|
"eval_loss": 2.067533254623413, |
|
"eval_runtime": 118.3349, |
|
"eval_samples_per_second": 103.849, |
|
"eval_steps_per_second": 6.499, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 106.32, |
|
"learning_rate": 3.227990970654628e-05, |
|
"loss": 1.7623, |
|
"step": 706500 |
|
}, |
|
{ |
|
"epoch": 106.32, |
|
"eval_loss": 2.0670344829559326, |
|
"eval_runtime": 118.3389, |
|
"eval_samples_per_second": 103.846, |
|
"eval_steps_per_second": 6.498, |
|
"step": 706500 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 3.216704288939052e-05, |
|
"loss": 1.7623, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_loss": 2.0591020584106445, |
|
"eval_runtime": 118.3407, |
|
"eval_samples_per_second": 103.844, |
|
"eval_steps_per_second": 6.498, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 107.67, |
|
"learning_rate": 3.205417607223476e-05, |
|
"loss": 1.7553, |
|
"step": 715500 |
|
}, |
|
{ |
|
"epoch": 107.67, |
|
"eval_loss": 2.0580272674560547, |
|
"eval_runtime": 118.3269, |
|
"eval_samples_per_second": 103.856, |
|
"eval_steps_per_second": 6.499, |
|
"step": 715500 |
|
}, |
|
{ |
|
"epoch": 108.35, |
|
"learning_rate": 3.194130925507901e-05, |
|
"loss": 1.753, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 108.35, |
|
"eval_loss": 2.0603325366973877, |
|
"eval_runtime": 118.3271, |
|
"eval_samples_per_second": 103.856, |
|
"eval_steps_per_second": 6.499, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 109.03, |
|
"learning_rate": 3.1828442437923254e-05, |
|
"loss": 1.7549, |
|
"step": 724500 |
|
}, |
|
{ |
|
"epoch": 109.03, |
|
"eval_loss": 2.0661327838897705, |
|
"eval_runtime": 118.3395, |
|
"eval_samples_per_second": 103.845, |
|
"eval_steps_per_second": 6.498, |
|
"step": 724500 |
|
}, |
|
{ |
|
"epoch": 109.71, |
|
"learning_rate": 3.17155756207675e-05, |
|
"loss": 1.7474, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 109.71, |
|
"eval_loss": 2.0576255321502686, |
|
"eval_runtime": 118.3567, |
|
"eval_samples_per_second": 103.83, |
|
"eval_steps_per_second": 6.497, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 110.38, |
|
"learning_rate": 3.1602708803611745e-05, |
|
"loss": 1.7504, |
|
"step": 733500 |
|
}, |
|
{ |
|
"epoch": 110.38, |
|
"eval_loss": 2.0617053508758545, |
|
"eval_runtime": 118.3259, |
|
"eval_samples_per_second": 103.857, |
|
"eval_steps_per_second": 6.499, |
|
"step": 733500 |
|
}, |
|
{ |
|
"epoch": 111.06, |
|
"learning_rate": 3.148984198645598e-05, |
|
"loss": 1.7464, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 111.06, |
|
"eval_loss": 2.049917459487915, |
|
"eval_runtime": 118.3445, |
|
"eval_samples_per_second": 103.841, |
|
"eval_steps_per_second": 6.498, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 111.74, |
|
"learning_rate": 3.137697516930023e-05, |
|
"loss": 1.7432, |
|
"step": 742500 |
|
}, |
|
{ |
|
"epoch": 111.74, |
|
"eval_loss": 2.056652069091797, |
|
"eval_runtime": 118.3587, |
|
"eval_samples_per_second": 103.828, |
|
"eval_steps_per_second": 6.497, |
|
"step": 742500 |
|
}, |
|
{ |
|
"epoch": 112.42, |
|
"learning_rate": 3.1264108352144474e-05, |
|
"loss": 1.7404, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 112.42, |
|
"eval_loss": 2.0593619346618652, |
|
"eval_runtime": 118.3421, |
|
"eval_samples_per_second": 103.843, |
|
"eval_steps_per_second": 6.498, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 113.09, |
|
"learning_rate": 3.115124153498871e-05, |
|
"loss": 1.7438, |
|
"step": 751500 |
|
}, |
|
{ |
|
"epoch": 113.09, |
|
"eval_loss": 2.045955181121826, |
|
"eval_runtime": 118.325, |
|
"eval_samples_per_second": 103.858, |
|
"eval_steps_per_second": 6.499, |
|
"step": 751500 |
|
}, |
|
{ |
|
"epoch": 113.77, |
|
"learning_rate": 3.103837471783296e-05, |
|
"loss": 1.7375, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 113.77, |
|
"eval_loss": 2.055434465408325, |
|
"eval_runtime": 118.3498, |
|
"eval_samples_per_second": 103.836, |
|
"eval_steps_per_second": 6.498, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 114.45, |
|
"learning_rate": 3.0925507900677204e-05, |
|
"loss": 1.7336, |
|
"step": 760500 |
|
}, |
|
{ |
|
"epoch": 114.45, |
|
"eval_loss": 2.040349006652832, |
|
"eval_runtime": 118.3546, |
|
"eval_samples_per_second": 103.832, |
|
"eval_steps_per_second": 6.497, |
|
"step": 760500 |
|
}, |
|
{ |
|
"epoch": 115.12, |
|
"learning_rate": 3.081264108352145e-05, |
|
"loss": 1.7344, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 115.12, |
|
"eval_loss": 2.054170846939087, |
|
"eval_runtime": 118.3514, |
|
"eval_samples_per_second": 103.835, |
|
"eval_steps_per_second": 6.498, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 115.8, |
|
"learning_rate": 3.069977426636569e-05, |
|
"loss": 1.7324, |
|
"step": 769500 |
|
}, |
|
{ |
|
"epoch": 115.8, |
|
"eval_loss": 2.042388439178467, |
|
"eval_runtime": 118.3282, |
|
"eval_samples_per_second": 103.855, |
|
"eval_steps_per_second": 6.499, |
|
"step": 769500 |
|
}, |
|
{ |
|
"epoch": 116.48, |
|
"learning_rate": 3.058690744920993e-05, |
|
"loss": 1.7255, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 116.48, |
|
"eval_loss": 2.0501296520233154, |
|
"eval_runtime": 118.1548, |
|
"eval_samples_per_second": 104.008, |
|
"eval_steps_per_second": 6.508, |
|
"step": 774000 |
|
} |
|
], |
|
"max_steps": 1993500, |
|
"num_train_epochs": 300, |
|
"total_flos": 6.520019673893634e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|