LOGION-50k_wordpiece / trainer_state.json
cabrooks's picture
Upload 7 files
565239c
raw
history blame
57.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 116.47855530474041,
"global_step": 774000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.68,
"learning_rate": 4.988713318284425e-05,
"loss": 6.9537,
"step": 4500
},
{
"epoch": 0.68,
"eval_loss": 6.797055244445801,
"eval_runtime": 118.7849,
"eval_samples_per_second": 103.456,
"eval_steps_per_second": 6.474,
"step": 4500
},
{
"epoch": 1.35,
"learning_rate": 4.9774266365688486e-05,
"loss": 6.6451,
"step": 9000
},
{
"epoch": 1.35,
"eval_loss": 6.689827919006348,
"eval_runtime": 118.3538,
"eval_samples_per_second": 103.833,
"eval_steps_per_second": 6.497,
"step": 9000
},
{
"epoch": 2.03,
"learning_rate": 4.966139954853273e-05,
"loss": 6.5518,
"step": 13500
},
{
"epoch": 2.03,
"eval_loss": 6.630918979644775,
"eval_runtime": 118.3302,
"eval_samples_per_second": 103.853,
"eval_steps_per_second": 6.499,
"step": 13500
},
{
"epoch": 2.71,
"learning_rate": 4.954853273137698e-05,
"loss": 6.4713,
"step": 18000
},
{
"epoch": 2.71,
"eval_loss": 6.497533798217773,
"eval_runtime": 118.3323,
"eval_samples_per_second": 103.852,
"eval_steps_per_second": 6.499,
"step": 18000
},
{
"epoch": 3.39,
"learning_rate": 4.9435665914221216e-05,
"loss": 6.0827,
"step": 22500
},
{
"epoch": 3.39,
"eval_loss": 5.684892654418945,
"eval_runtime": 118.3572,
"eval_samples_per_second": 103.83,
"eval_steps_per_second": 6.497,
"step": 22500
},
{
"epoch": 4.06,
"learning_rate": 4.932279909706546e-05,
"loss": 5.0663,
"step": 27000
},
{
"epoch": 4.06,
"eval_loss": 4.617100715637207,
"eval_runtime": 118.3432,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 6.498,
"step": 27000
},
{
"epoch": 4.74,
"learning_rate": 4.920993227990971e-05,
"loss": 4.3025,
"step": 31500
},
{
"epoch": 4.74,
"eval_loss": 4.159748077392578,
"eval_runtime": 118.3398,
"eval_samples_per_second": 103.845,
"eval_steps_per_second": 6.498,
"step": 31500
},
{
"epoch": 5.42,
"learning_rate": 4.909706546275395e-05,
"loss": 3.9214,
"step": 36000
},
{
"epoch": 5.42,
"eval_loss": 3.8544375896453857,
"eval_runtime": 118.3517,
"eval_samples_per_second": 103.835,
"eval_steps_per_second": 6.498,
"step": 36000
},
{
"epoch": 6.09,
"learning_rate": 4.89841986455982e-05,
"loss": 3.6779,
"step": 40500
},
{
"epoch": 6.09,
"eval_loss": 3.664363384246826,
"eval_runtime": 118.2895,
"eval_samples_per_second": 103.889,
"eval_steps_per_second": 6.501,
"step": 40500
},
{
"epoch": 6.77,
"learning_rate": 4.887133182844244e-05,
"loss": 3.502,
"step": 45000
},
{
"epoch": 6.77,
"eval_loss": 3.5141005516052246,
"eval_runtime": 118.2983,
"eval_samples_per_second": 103.881,
"eval_steps_per_second": 6.501,
"step": 45000
},
{
"epoch": 7.45,
"learning_rate": 4.875846501128669e-05,
"loss": 3.366,
"step": 49500
},
{
"epoch": 7.45,
"eval_loss": 3.4036142826080322,
"eval_runtime": 118.3098,
"eval_samples_per_second": 103.871,
"eval_steps_per_second": 6.5,
"step": 49500
},
{
"epoch": 8.13,
"learning_rate": 4.864559819413093e-05,
"loss": 3.2695,
"step": 54000
},
{
"epoch": 8.13,
"eval_loss": 3.321831226348877,
"eval_runtime": 118.3723,
"eval_samples_per_second": 103.817,
"eval_steps_per_second": 6.496,
"step": 54000
},
{
"epoch": 8.8,
"learning_rate": 4.853273137697517e-05,
"loss": 3.1758,
"step": 58500
},
{
"epoch": 8.8,
"eval_loss": 3.2432045936584473,
"eval_runtime": 118.355,
"eval_samples_per_second": 103.832,
"eval_steps_per_second": 6.497,
"step": 58500
},
{
"epoch": 9.48,
"learning_rate": 4.841986455981942e-05,
"loss": 3.1008,
"step": 63000
},
{
"epoch": 9.48,
"eval_loss": 3.186511754989624,
"eval_runtime": 118.3602,
"eval_samples_per_second": 103.827,
"eval_steps_per_second": 6.497,
"step": 63000
},
{
"epoch": 10.16,
"learning_rate": 4.830699774266366e-05,
"loss": 3.0354,
"step": 67500
},
{
"epoch": 10.16,
"eval_loss": 3.115652561187744,
"eval_runtime": 118.3431,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 6.498,
"step": 67500
},
{
"epoch": 10.84,
"learning_rate": 4.81941309255079e-05,
"loss": 2.9798,
"step": 72000
},
{
"epoch": 10.84,
"eval_loss": 3.0752041339874268,
"eval_runtime": 118.2033,
"eval_samples_per_second": 103.965,
"eval_steps_per_second": 6.506,
"step": 72000
},
{
"epoch": 11.51,
"learning_rate": 4.808126410835215e-05,
"loss": 2.9252,
"step": 76500
},
{
"epoch": 11.51,
"eval_loss": 3.028315305709839,
"eval_runtime": 118.2129,
"eval_samples_per_second": 103.956,
"eval_steps_per_second": 6.505,
"step": 76500
},
{
"epoch": 12.19,
"learning_rate": 4.796839729119639e-05,
"loss": 2.881,
"step": 81000
},
{
"epoch": 12.19,
"eval_loss": 2.9871439933776855,
"eval_runtime": 118.1982,
"eval_samples_per_second": 103.969,
"eval_steps_per_second": 6.506,
"step": 81000
},
{
"epoch": 12.87,
"learning_rate": 4.785553047404063e-05,
"loss": 2.8366,
"step": 85500
},
{
"epoch": 12.87,
"eval_loss": 2.9422881603240967,
"eval_runtime": 118.1796,
"eval_samples_per_second": 103.986,
"eval_steps_per_second": 6.507,
"step": 85500
},
{
"epoch": 13.54,
"learning_rate": 4.774266365688488e-05,
"loss": 2.7917,
"step": 90000
},
{
"epoch": 13.54,
"eval_loss": 2.9027907848358154,
"eval_runtime": 118.1933,
"eval_samples_per_second": 103.974,
"eval_steps_per_second": 6.506,
"step": 90000
},
{
"epoch": 14.22,
"learning_rate": 4.762979683972912e-05,
"loss": 2.7592,
"step": 94500
},
{
"epoch": 14.22,
"eval_loss": 2.8720462322235107,
"eval_runtime": 118.2133,
"eval_samples_per_second": 103.956,
"eval_steps_per_second": 6.505,
"step": 94500
},
{
"epoch": 14.9,
"learning_rate": 4.751693002257336e-05,
"loss": 2.7278,
"step": 99000
},
{
"epoch": 14.9,
"eval_loss": 2.8500328063964844,
"eval_runtime": 118.2044,
"eval_samples_per_second": 103.964,
"eval_steps_per_second": 6.506,
"step": 99000
},
{
"epoch": 15.58,
"learning_rate": 4.740406320541761e-05,
"loss": 2.693,
"step": 103500
},
{
"epoch": 15.58,
"eval_loss": 2.817178249359131,
"eval_runtime": 118.1867,
"eval_samples_per_second": 103.98,
"eval_steps_per_second": 6.507,
"step": 103500
},
{
"epoch": 16.25,
"learning_rate": 4.729119638826185e-05,
"loss": 2.6645,
"step": 108000
},
{
"epoch": 16.25,
"eval_loss": 2.786304235458374,
"eval_runtime": 118.2219,
"eval_samples_per_second": 103.949,
"eval_steps_per_second": 6.505,
"step": 108000
},
{
"epoch": 16.93,
"learning_rate": 4.71783295711061e-05,
"loss": 2.6361,
"step": 112500
},
{
"epoch": 16.93,
"eval_loss": 2.770569324493408,
"eval_runtime": 118.2234,
"eval_samples_per_second": 103.947,
"eval_steps_per_second": 6.505,
"step": 112500
},
{
"epoch": 17.61,
"learning_rate": 4.706546275395034e-05,
"loss": 2.6083,
"step": 117000
},
{
"epoch": 17.61,
"eval_loss": 2.7391059398651123,
"eval_runtime": 118.2576,
"eval_samples_per_second": 103.917,
"eval_steps_per_second": 6.503,
"step": 117000
},
{
"epoch": 18.28,
"learning_rate": 4.695259593679459e-05,
"loss": 2.5847,
"step": 121500
},
{
"epoch": 18.28,
"eval_loss": 2.718665838241577,
"eval_runtime": 118.2124,
"eval_samples_per_second": 103.957,
"eval_steps_per_second": 6.505,
"step": 121500
},
{
"epoch": 18.96,
"learning_rate": 4.6839729119638834e-05,
"loss": 2.5619,
"step": 126000
},
{
"epoch": 18.96,
"eval_loss": 2.7032158374786377,
"eval_runtime": 118.2283,
"eval_samples_per_second": 103.943,
"eval_steps_per_second": 6.504,
"step": 126000
},
{
"epoch": 19.64,
"learning_rate": 4.672686230248307e-05,
"loss": 2.5368,
"step": 130500
},
{
"epoch": 19.64,
"eval_loss": 2.6911468505859375,
"eval_runtime": 118.3184,
"eval_samples_per_second": 103.864,
"eval_steps_per_second": 6.499,
"step": 130500
},
{
"epoch": 20.32,
"learning_rate": 4.661399548532732e-05,
"loss": 2.5203,
"step": 135000
},
{
"epoch": 20.32,
"eval_loss": 2.666966676712036,
"eval_runtime": 118.3095,
"eval_samples_per_second": 103.872,
"eval_steps_per_second": 6.5,
"step": 135000
},
{
"epoch": 20.99,
"learning_rate": 4.6501128668171564e-05,
"loss": 2.4997,
"step": 139500
},
{
"epoch": 20.99,
"eval_loss": 2.6472320556640625,
"eval_runtime": 118.1415,
"eval_samples_per_second": 104.019,
"eval_steps_per_second": 6.509,
"step": 139500
},
{
"epoch": 21.67,
"learning_rate": 4.63882618510158e-05,
"loss": 2.4755,
"step": 144000
},
{
"epoch": 21.67,
"eval_loss": 2.6281678676605225,
"eval_runtime": 118.147,
"eval_samples_per_second": 104.014,
"eval_steps_per_second": 6.509,
"step": 144000
},
{
"epoch": 22.35,
"learning_rate": 4.627539503386005e-05,
"loss": 2.4593,
"step": 148500
},
{
"epoch": 22.35,
"eval_loss": 2.6077518463134766,
"eval_runtime": 118.4066,
"eval_samples_per_second": 103.786,
"eval_steps_per_second": 6.495,
"step": 148500
},
{
"epoch": 23.02,
"learning_rate": 4.616252821670429e-05,
"loss": 2.4468,
"step": 153000
},
{
"epoch": 23.02,
"eval_loss": 2.60119366645813,
"eval_runtime": 118.2017,
"eval_samples_per_second": 103.966,
"eval_steps_per_second": 6.506,
"step": 153000
},
{
"epoch": 23.7,
"learning_rate": 4.604966139954853e-05,
"loss": 2.4243,
"step": 157500
},
{
"epoch": 23.7,
"eval_loss": 2.583709239959717,
"eval_runtime": 118.0992,
"eval_samples_per_second": 104.057,
"eval_steps_per_second": 6.511,
"step": 157500
},
{
"epoch": 24.38,
"learning_rate": 4.593679458239278e-05,
"loss": 2.4093,
"step": 162000
},
{
"epoch": 24.38,
"eval_loss": 2.5716421604156494,
"eval_runtime": 118.1155,
"eval_samples_per_second": 104.042,
"eval_steps_per_second": 6.511,
"step": 162000
},
{
"epoch": 25.06,
"learning_rate": 4.582392776523702e-05,
"loss": 2.396,
"step": 166500
},
{
"epoch": 25.06,
"eval_loss": 2.561039686203003,
"eval_runtime": 118.1545,
"eval_samples_per_second": 104.008,
"eval_steps_per_second": 6.508,
"step": 166500
},
{
"epoch": 25.73,
"learning_rate": 4.571106094808127e-05,
"loss": 2.3764,
"step": 171000
},
{
"epoch": 25.73,
"eval_loss": 2.543470859527588,
"eval_runtime": 118.1796,
"eval_samples_per_second": 103.986,
"eval_steps_per_second": 6.507,
"step": 171000
},
{
"epoch": 26.41,
"learning_rate": 4.559819413092551e-05,
"loss": 2.3623,
"step": 175500
},
{
"epoch": 26.41,
"eval_loss": 2.5341155529022217,
"eval_runtime": 118.1214,
"eval_samples_per_second": 104.037,
"eval_steps_per_second": 6.51,
"step": 175500
},
{
"epoch": 27.09,
"learning_rate": 4.548532731376975e-05,
"loss": 2.3529,
"step": 180000
},
{
"epoch": 27.09,
"eval_loss": 2.5200819969177246,
"eval_runtime": 118.324,
"eval_samples_per_second": 103.859,
"eval_steps_per_second": 6.499,
"step": 180000
},
{
"epoch": 27.77,
"learning_rate": 4.5372460496614e-05,
"loss": 2.3393,
"step": 184500
},
{
"epoch": 27.77,
"eval_loss": 2.5085155963897705,
"eval_runtime": 118.3431,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 6.498,
"step": 184500
},
{
"epoch": 28.44,
"learning_rate": 4.525959367945824e-05,
"loss": 2.3247,
"step": 189000
},
{
"epoch": 28.44,
"eval_loss": 2.5002756118774414,
"eval_runtime": 118.3427,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 6.498,
"step": 189000
},
{
"epoch": 29.12,
"learning_rate": 4.514672686230249e-05,
"loss": 2.3127,
"step": 193500
},
{
"epoch": 29.12,
"eval_loss": 2.4838666915893555,
"eval_runtime": 118.3361,
"eval_samples_per_second": 103.848,
"eval_steps_per_second": 6.498,
"step": 193500
},
{
"epoch": 29.8,
"learning_rate": 4.5033860045146734e-05,
"loss": 2.3006,
"step": 198000
},
{
"epoch": 29.8,
"eval_loss": 2.480976104736328,
"eval_runtime": 118.3346,
"eval_samples_per_second": 103.85,
"eval_steps_per_second": 6.499,
"step": 198000
},
{
"epoch": 30.47,
"learning_rate": 4.492099322799097e-05,
"loss": 2.2896,
"step": 202500
},
{
"epoch": 30.47,
"eval_loss": 2.4641942977905273,
"eval_runtime": 118.3398,
"eval_samples_per_second": 103.845,
"eval_steps_per_second": 6.498,
"step": 202500
},
{
"epoch": 31.15,
"learning_rate": 4.480812641083522e-05,
"loss": 2.2789,
"step": 207000
},
{
"epoch": 31.15,
"eval_loss": 2.4657058715820312,
"eval_runtime": 118.3281,
"eval_samples_per_second": 103.855,
"eval_steps_per_second": 6.499,
"step": 207000
},
{
"epoch": 31.83,
"learning_rate": 4.4695259593679463e-05,
"loss": 2.2665,
"step": 211500
},
{
"epoch": 31.83,
"eval_loss": 2.4447216987609863,
"eval_runtime": 118.3432,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 6.498,
"step": 211500
},
{
"epoch": 32.51,
"learning_rate": 4.45823927765237e-05,
"loss": 2.2545,
"step": 216000
},
{
"epoch": 32.51,
"eval_loss": 2.4484477043151855,
"eval_runtime": 118.3442,
"eval_samples_per_second": 103.841,
"eval_steps_per_second": 6.498,
"step": 216000
},
{
"epoch": 33.18,
"learning_rate": 4.446952595936795e-05,
"loss": 2.2446,
"step": 220500
},
{
"epoch": 33.18,
"eval_loss": 2.4324302673339844,
"eval_runtime": 118.3492,
"eval_samples_per_second": 103.837,
"eval_steps_per_second": 6.498,
"step": 220500
},
{
"epoch": 33.86,
"learning_rate": 4.435665914221219e-05,
"loss": 2.2352,
"step": 225000
},
{
"epoch": 33.86,
"eval_loss": 2.426417827606201,
"eval_runtime": 118.3545,
"eval_samples_per_second": 103.832,
"eval_steps_per_second": 6.497,
"step": 225000
},
{
"epoch": 34.54,
"learning_rate": 4.424379232505644e-05,
"loss": 2.2218,
"step": 229500
},
{
"epoch": 34.54,
"eval_loss": 2.4175431728363037,
"eval_runtime": 121.4712,
"eval_samples_per_second": 101.168,
"eval_steps_per_second": 6.331,
"step": 229500
},
{
"epoch": 35.21,
"learning_rate": 4.413092550790068e-05,
"loss": 2.2153,
"step": 234000
},
{
"epoch": 35.21,
"eval_loss": 2.4122180938720703,
"eval_runtime": 121.3685,
"eval_samples_per_second": 101.254,
"eval_steps_per_second": 6.336,
"step": 234000
},
{
"epoch": 35.89,
"learning_rate": 4.401805869074492e-05,
"loss": 2.206,
"step": 238500
},
{
"epoch": 35.89,
"eval_loss": 2.392340660095215,
"eval_runtime": 121.4694,
"eval_samples_per_second": 101.169,
"eval_steps_per_second": 6.331,
"step": 238500
},
{
"epoch": 36.57,
"learning_rate": 4.390519187358917e-05,
"loss": 2.1931,
"step": 243000
},
{
"epoch": 36.57,
"eval_loss": 2.386526584625244,
"eval_runtime": 121.4241,
"eval_samples_per_second": 101.207,
"eval_steps_per_second": 6.333,
"step": 243000
},
{
"epoch": 37.25,
"learning_rate": 4.379232505643341e-05,
"loss": 2.1876,
"step": 247500
},
{
"epoch": 37.25,
"eval_loss": 2.383101224899292,
"eval_runtime": 121.3529,
"eval_samples_per_second": 101.267,
"eval_steps_per_second": 6.337,
"step": 247500
},
{
"epoch": 37.92,
"learning_rate": 4.367945823927765e-05,
"loss": 2.1817,
"step": 252000
},
{
"epoch": 37.92,
"eval_loss": 2.3782711029052734,
"eval_runtime": 121.4029,
"eval_samples_per_second": 101.225,
"eval_steps_per_second": 6.334,
"step": 252000
},
{
"epoch": 38.6,
"learning_rate": 4.35665914221219e-05,
"loss": 2.1661,
"step": 256500
},
{
"epoch": 38.6,
"eval_loss": 2.3761754035949707,
"eval_runtime": 121.5223,
"eval_samples_per_second": 101.126,
"eval_steps_per_second": 6.328,
"step": 256500
},
{
"epoch": 39.28,
"learning_rate": 4.3453724604966136e-05,
"loss": 2.1635,
"step": 261000
},
{
"epoch": 39.28,
"eval_loss": 2.365755319595337,
"eval_runtime": 121.4762,
"eval_samples_per_second": 101.164,
"eval_steps_per_second": 6.33,
"step": 261000
},
{
"epoch": 39.95,
"learning_rate": 4.334085778781038e-05,
"loss": 2.1533,
"step": 265500
},
{
"epoch": 39.95,
"eval_loss": 2.359434127807617,
"eval_runtime": 121.2612,
"eval_samples_per_second": 101.343,
"eval_steps_per_second": 6.342,
"step": 265500
},
{
"epoch": 40.63,
"learning_rate": 4.322799097065463e-05,
"loss": 2.1444,
"step": 270000
},
{
"epoch": 40.63,
"eval_loss": 2.3534085750579834,
"eval_runtime": 121.4588,
"eval_samples_per_second": 101.178,
"eval_steps_per_second": 6.331,
"step": 270000
},
{
"epoch": 41.31,
"learning_rate": 4.311512415349887e-05,
"loss": 2.1389,
"step": 274500
},
{
"epoch": 41.31,
"eval_loss": 2.3499608039855957,
"eval_runtime": 121.4347,
"eval_samples_per_second": 101.198,
"eval_steps_per_second": 6.333,
"step": 274500
},
{
"epoch": 41.99,
"learning_rate": 4.300225733634312e-05,
"loss": 2.1343,
"step": 279000
},
{
"epoch": 41.99,
"eval_loss": 2.33479642868042,
"eval_runtime": 121.4769,
"eval_samples_per_second": 101.163,
"eval_steps_per_second": 6.33,
"step": 279000
},
{
"epoch": 42.66,
"learning_rate": 4.2889390519187363e-05,
"loss": 2.1204,
"step": 283500
},
{
"epoch": 42.66,
"eval_loss": 2.338609457015991,
"eval_runtime": 119.9199,
"eval_samples_per_second": 102.477,
"eval_steps_per_second": 6.413,
"step": 283500
},
{
"epoch": 43.34,
"learning_rate": 4.277652370203161e-05,
"loss": 2.1149,
"step": 288000
},
{
"epoch": 43.34,
"eval_loss": 2.3366451263427734,
"eval_runtime": 121.3615,
"eval_samples_per_second": 101.259,
"eval_steps_per_second": 6.336,
"step": 288000
},
{
"epoch": 44.02,
"learning_rate": 4.266365688487585e-05,
"loss": 2.1124,
"step": 292500
},
{
"epoch": 44.02,
"eval_loss": 2.3272287845611572,
"eval_runtime": 121.4223,
"eval_samples_per_second": 101.209,
"eval_steps_per_second": 6.333,
"step": 292500
},
{
"epoch": 44.7,
"learning_rate": 4.255079006772009e-05,
"loss": 2.0996,
"step": 297000
},
{
"epoch": 44.7,
"eval_loss": 2.3160288333892822,
"eval_runtime": 121.4331,
"eval_samples_per_second": 101.2,
"eval_steps_per_second": 6.333,
"step": 297000
},
{
"epoch": 45.37,
"learning_rate": 4.243792325056434e-05,
"loss": 2.0954,
"step": 301500
},
{
"epoch": 45.37,
"eval_loss": 2.3165717124938965,
"eval_runtime": 121.3269,
"eval_samples_per_second": 101.288,
"eval_steps_per_second": 6.338,
"step": 301500
},
{
"epoch": 46.05,
"learning_rate": 4.232505643340858e-05,
"loss": 2.0901,
"step": 306000
},
{
"epoch": 46.05,
"eval_loss": 2.303679943084717,
"eval_runtime": 121.3845,
"eval_samples_per_second": 101.24,
"eval_steps_per_second": 6.335,
"step": 306000
},
{
"epoch": 46.73,
"learning_rate": 4.221218961625282e-05,
"loss": 2.0819,
"step": 310500
},
{
"epoch": 46.73,
"eval_loss": 2.2977073192596436,
"eval_runtime": 121.4379,
"eval_samples_per_second": 101.196,
"eval_steps_per_second": 6.332,
"step": 310500
},
{
"epoch": 47.4,
"learning_rate": 4.209932279909707e-05,
"loss": 2.0725,
"step": 315000
},
{
"epoch": 47.4,
"eval_loss": 2.3036298751831055,
"eval_runtime": 121.4278,
"eval_samples_per_second": 101.204,
"eval_steps_per_second": 6.333,
"step": 315000
},
{
"epoch": 48.08,
"learning_rate": 4.198645598194131e-05,
"loss": 2.0729,
"step": 319500
},
{
"epoch": 48.08,
"eval_loss": 2.2955193519592285,
"eval_runtime": 121.3513,
"eval_samples_per_second": 101.268,
"eval_steps_per_second": 6.337,
"step": 319500
},
{
"epoch": 48.76,
"learning_rate": 4.187358916478555e-05,
"loss": 2.0621,
"step": 324000
},
{
"epoch": 48.76,
"eval_loss": 2.284827947616577,
"eval_runtime": 121.3713,
"eval_samples_per_second": 101.251,
"eval_steps_per_second": 6.336,
"step": 324000
},
{
"epoch": 49.44,
"learning_rate": 4.17607223476298e-05,
"loss": 2.055,
"step": 328500
},
{
"epoch": 49.44,
"eval_loss": 2.2865021228790283,
"eval_runtime": 121.4042,
"eval_samples_per_second": 101.224,
"eval_steps_per_second": 6.334,
"step": 328500
},
{
"epoch": 50.11,
"learning_rate": 4.164785553047404e-05,
"loss": 2.0514,
"step": 333000
},
{
"epoch": 50.11,
"eval_loss": 2.2722549438476562,
"eval_runtime": 121.3871,
"eval_samples_per_second": 101.238,
"eval_steps_per_second": 6.335,
"step": 333000
},
{
"epoch": 50.79,
"learning_rate": 4.153498871331828e-05,
"loss": 2.0427,
"step": 337500
},
{
"epoch": 50.79,
"eval_loss": 2.2747364044189453,
"eval_runtime": 121.4348,
"eval_samples_per_second": 101.198,
"eval_steps_per_second": 6.333,
"step": 337500
},
{
"epoch": 51.47,
"learning_rate": 4.142212189616253e-05,
"loss": 2.0398,
"step": 342000
},
{
"epoch": 51.47,
"eval_loss": 2.275329113006592,
"eval_runtime": 121.4165,
"eval_samples_per_second": 101.214,
"eval_steps_per_second": 6.334,
"step": 342000
},
{
"epoch": 52.14,
"learning_rate": 4.130925507900677e-05,
"loss": 2.0373,
"step": 346500
},
{
"epoch": 52.14,
"eval_loss": 2.263934850692749,
"eval_runtime": 121.4832,
"eval_samples_per_second": 101.158,
"eval_steps_per_second": 6.33,
"step": 346500
},
{
"epoch": 52.82,
"learning_rate": 4.119638826185102e-05,
"loss": 2.0293,
"step": 351000
},
{
"epoch": 52.82,
"eval_loss": 2.2591116428375244,
"eval_runtime": 121.453,
"eval_samples_per_second": 101.183,
"eval_steps_per_second": 6.332,
"step": 351000
},
{
"epoch": 53.5,
"learning_rate": 4.108352144469526e-05,
"loss": 2.0222,
"step": 355500
},
{
"epoch": 53.5,
"eval_loss": 2.251147508621216,
"eval_runtime": 121.2819,
"eval_samples_per_second": 101.326,
"eval_steps_per_second": 6.341,
"step": 355500
},
{
"epoch": 54.18,
"learning_rate": 4.097065462753951e-05,
"loss": 2.018,
"step": 360000
},
{
"epoch": 54.18,
"eval_loss": 2.2465593814849854,
"eval_runtime": 121.4341,
"eval_samples_per_second": 101.199,
"eval_steps_per_second": 6.333,
"step": 360000
},
{
"epoch": 54.85,
"learning_rate": 4.085778781038375e-05,
"loss": 2.0129,
"step": 364500
},
{
"epoch": 54.85,
"eval_loss": 2.244495153427124,
"eval_runtime": 121.4224,
"eval_samples_per_second": 101.209,
"eval_steps_per_second": 6.333,
"step": 364500
},
{
"epoch": 55.53,
"learning_rate": 4.074492099322799e-05,
"loss": 2.0071,
"step": 369000
},
{
"epoch": 55.53,
"eval_loss": 2.244058609008789,
"eval_runtime": 121.4271,
"eval_samples_per_second": 101.205,
"eval_steps_per_second": 6.333,
"step": 369000
},
{
"epoch": 56.21,
"learning_rate": 4.063205417607224e-05,
"loss": 2.0026,
"step": 373500
},
{
"epoch": 56.21,
"eval_loss": 2.2374625205993652,
"eval_runtime": 121.4591,
"eval_samples_per_second": 101.178,
"eval_steps_per_second": 6.331,
"step": 373500
},
{
"epoch": 56.88,
"learning_rate": 4.0519187358916484e-05,
"loss": 1.9989,
"step": 378000
},
{
"epoch": 56.88,
"eval_loss": 2.2266647815704346,
"eval_runtime": 121.4333,
"eval_samples_per_second": 101.2,
"eval_steps_per_second": 6.333,
"step": 378000
},
{
"epoch": 57.56,
"learning_rate": 4.040632054176072e-05,
"loss": 1.9917,
"step": 382500
},
{
"epoch": 57.56,
"eval_loss": 2.2338218688964844,
"eval_runtime": 121.4506,
"eval_samples_per_second": 101.185,
"eval_steps_per_second": 6.332,
"step": 382500
},
{
"epoch": 58.24,
"learning_rate": 4.029345372460497e-05,
"loss": 1.9869,
"step": 387000
},
{
"epoch": 58.24,
"eval_loss": 2.226421594619751,
"eval_runtime": 121.395,
"eval_samples_per_second": 101.232,
"eval_steps_per_second": 6.335,
"step": 387000
},
{
"epoch": 58.92,
"learning_rate": 4.018058690744921e-05,
"loss": 1.9855,
"step": 391500
},
{
"epoch": 58.92,
"eval_loss": 2.22316837310791,
"eval_runtime": 121.4361,
"eval_samples_per_second": 101.197,
"eval_steps_per_second": 6.333,
"step": 391500
},
{
"epoch": 59.59,
"learning_rate": 4.006772009029345e-05,
"loss": 1.975,
"step": 396000
},
{
"epoch": 59.59,
"eval_loss": 2.221580743789673,
"eval_runtime": 121.4217,
"eval_samples_per_second": 101.209,
"eval_steps_per_second": 6.333,
"step": 396000
},
{
"epoch": 60.27,
"learning_rate": 3.99548532731377e-05,
"loss": 1.9738,
"step": 400500
},
{
"epoch": 60.27,
"eval_loss": 2.2099127769470215,
"eval_runtime": 121.4414,
"eval_samples_per_second": 101.193,
"eval_steps_per_second": 6.332,
"step": 400500
},
{
"epoch": 60.95,
"learning_rate": 3.984198645598194e-05,
"loss": 1.9724,
"step": 405000
},
{
"epoch": 60.95,
"eval_loss": 2.217116355895996,
"eval_runtime": 121.4225,
"eval_samples_per_second": 101.209,
"eval_steps_per_second": 6.333,
"step": 405000
},
{
"epoch": 61.63,
"learning_rate": 3.972911963882618e-05,
"loss": 1.9643,
"step": 409500
},
{
"epoch": 61.63,
"eval_loss": 2.2091891765594482,
"eval_runtime": 121.3749,
"eval_samples_per_second": 101.248,
"eval_steps_per_second": 6.336,
"step": 409500
},
{
"epoch": 62.3,
"learning_rate": 3.961625282167043e-05,
"loss": 1.9582,
"step": 414000
},
{
"epoch": 62.3,
"eval_loss": 2.2050740718841553,
"eval_runtime": 121.3877,
"eval_samples_per_second": 101.238,
"eval_steps_per_second": 6.335,
"step": 414000
},
{
"epoch": 62.98,
"learning_rate": 3.950338600451467e-05,
"loss": 1.9596,
"step": 418500
},
{
"epoch": 62.98,
"eval_loss": 2.2095320224761963,
"eval_runtime": 121.3734,
"eval_samples_per_second": 101.25,
"eval_steps_per_second": 6.336,
"step": 418500
},
{
"epoch": 63.66,
"learning_rate": 3.939051918735892e-05,
"loss": 1.9491,
"step": 423000
},
{
"epoch": 63.66,
"eval_loss": 2.201195478439331,
"eval_runtime": 121.4462,
"eval_samples_per_second": 101.189,
"eval_steps_per_second": 6.332,
"step": 423000
},
{
"epoch": 64.33,
"learning_rate": 3.927765237020316e-05,
"loss": 1.9493,
"step": 427500
},
{
"epoch": 64.33,
"eval_loss": 2.1953182220458984,
"eval_runtime": 121.4109,
"eval_samples_per_second": 101.218,
"eval_steps_per_second": 6.334,
"step": 427500
},
{
"epoch": 65.01,
"learning_rate": 3.916478555304741e-05,
"loss": 1.946,
"step": 432000
},
{
"epoch": 65.01,
"eval_loss": 2.1867878437042236,
"eval_runtime": 119.4236,
"eval_samples_per_second": 102.903,
"eval_steps_per_second": 6.439,
"step": 432000
},
{
"epoch": 65.69,
"learning_rate": 3.9051918735891654e-05,
"loss": 1.9359,
"step": 436500
},
{
"epoch": 65.69,
"eval_loss": 2.1980998516082764,
"eval_runtime": 121.4109,
"eval_samples_per_second": 101.218,
"eval_steps_per_second": 6.334,
"step": 436500
},
{
"epoch": 66.37,
"learning_rate": 3.893905191873589e-05,
"loss": 1.9333,
"step": 441000
},
{
"epoch": 66.37,
"eval_loss": 2.1945624351501465,
"eval_runtime": 121.3328,
"eval_samples_per_second": 101.283,
"eval_steps_per_second": 6.338,
"step": 441000
},
{
"epoch": 67.04,
"learning_rate": 3.882618510158014e-05,
"loss": 1.9327,
"step": 445500
},
{
"epoch": 67.04,
"eval_loss": 2.1841721534729004,
"eval_runtime": 121.4364,
"eval_samples_per_second": 101.197,
"eval_steps_per_second": 6.333,
"step": 445500
},
{
"epoch": 67.72,
"learning_rate": 3.8713318284424384e-05,
"loss": 1.9243,
"step": 450000
},
{
"epoch": 67.72,
"eval_loss": 2.189509391784668,
"eval_runtime": 121.4244,
"eval_samples_per_second": 101.207,
"eval_steps_per_second": 6.333,
"step": 450000
},
{
"epoch": 68.4,
"learning_rate": 3.860045146726862e-05,
"loss": 1.9241,
"step": 454500
},
{
"epoch": 68.4,
"eval_loss": 2.170930862426758,
"eval_runtime": 121.4233,
"eval_samples_per_second": 101.208,
"eval_steps_per_second": 6.333,
"step": 454500
},
{
"epoch": 69.07,
"learning_rate": 3.848758465011287e-05,
"loss": 1.9183,
"step": 459000
},
{
"epoch": 69.07,
"eval_loss": 2.18388032913208,
"eval_runtime": 121.4294,
"eval_samples_per_second": 101.203,
"eval_steps_per_second": 6.333,
"step": 459000
},
{
"epoch": 69.75,
"learning_rate": 3.837471783295711e-05,
"loss": 1.916,
"step": 463500
},
{
"epoch": 69.75,
"eval_loss": 2.179508686065674,
"eval_runtime": 121.3882,
"eval_samples_per_second": 101.237,
"eval_steps_per_second": 6.335,
"step": 463500
},
{
"epoch": 70.43,
"learning_rate": 3.826185101580136e-05,
"loss": 1.9105,
"step": 468000
},
{
"epoch": 70.43,
"eval_loss": 2.1771745681762695,
"eval_runtime": 121.4394,
"eval_samples_per_second": 101.194,
"eval_steps_per_second": 6.332,
"step": 468000
},
{
"epoch": 71.11,
"learning_rate": 3.81489841986456e-05,
"loss": 1.9117,
"step": 472500
},
{
"epoch": 71.11,
"eval_loss": 2.181852340698242,
"eval_runtime": 121.3465,
"eval_samples_per_second": 101.272,
"eval_steps_per_second": 6.337,
"step": 472500
},
{
"epoch": 71.78,
"learning_rate": 3.803611738148984e-05,
"loss": 1.9041,
"step": 477000
},
{
"epoch": 71.78,
"eval_loss": 2.1662580966949463,
"eval_runtime": 121.4449,
"eval_samples_per_second": 101.19,
"eval_steps_per_second": 6.332,
"step": 477000
},
{
"epoch": 72.46,
"learning_rate": 3.792325056433409e-05,
"loss": 1.9021,
"step": 481500
},
{
"epoch": 72.46,
"eval_loss": 2.165590524673462,
"eval_runtime": 121.4569,
"eval_samples_per_second": 101.18,
"eval_steps_per_second": 6.331,
"step": 481500
},
{
"epoch": 73.14,
"learning_rate": 3.781038374717833e-05,
"loss": 1.8995,
"step": 486000
},
{
"epoch": 73.14,
"eval_loss": 2.1610703468322754,
"eval_runtime": 121.4207,
"eval_samples_per_second": 101.21,
"eval_steps_per_second": 6.333,
"step": 486000
},
{
"epoch": 73.81,
"learning_rate": 3.769751693002257e-05,
"loss": 1.896,
"step": 490500
},
{
"epoch": 73.81,
"eval_loss": 2.1603007316589355,
"eval_runtime": 121.3989,
"eval_samples_per_second": 101.228,
"eval_steps_per_second": 6.334,
"step": 490500
},
{
"epoch": 74.49,
"learning_rate": 3.758465011286682e-05,
"loss": 1.8901,
"step": 495000
},
{
"epoch": 74.49,
"eval_loss": 2.158705711364746,
"eval_runtime": 121.4675,
"eval_samples_per_second": 101.171,
"eval_steps_per_second": 6.331,
"step": 495000
},
{
"epoch": 75.17,
"learning_rate": 3.747178329571106e-05,
"loss": 1.8887,
"step": 499500
},
{
"epoch": 75.17,
"eval_loss": 2.152022123336792,
"eval_runtime": 121.4184,
"eval_samples_per_second": 101.212,
"eval_steps_per_second": 6.333,
"step": 499500
},
{
"epoch": 75.85,
"learning_rate": 3.735891647855531e-05,
"loss": 1.8855,
"step": 504000
},
{
"epoch": 75.85,
"eval_loss": 2.1545896530151367,
"eval_runtime": 121.4036,
"eval_samples_per_second": 101.224,
"eval_steps_per_second": 6.334,
"step": 504000
},
{
"epoch": 76.52,
"learning_rate": 3.7246049661399554e-05,
"loss": 1.8802,
"step": 508500
},
{
"epoch": 76.52,
"eval_loss": 2.1495370864868164,
"eval_runtime": 121.4898,
"eval_samples_per_second": 101.153,
"eval_steps_per_second": 6.33,
"step": 508500
},
{
"epoch": 77.2,
"learning_rate": 3.71331828442438e-05,
"loss": 1.8783,
"step": 513000
},
{
"epoch": 77.2,
"eval_loss": 2.1447861194610596,
"eval_runtime": 121.3997,
"eval_samples_per_second": 101.228,
"eval_steps_per_second": 6.334,
"step": 513000
},
{
"epoch": 77.88,
"learning_rate": 3.702031602708804e-05,
"loss": 1.8743,
"step": 517500
},
{
"epoch": 77.88,
"eval_loss": 2.1373064517974854,
"eval_runtime": 121.3755,
"eval_samples_per_second": 101.248,
"eval_steps_per_second": 6.336,
"step": 517500
},
{
"epoch": 78.56,
"learning_rate": 3.6907449209932284e-05,
"loss": 1.8679,
"step": 522000
},
{
"epoch": 78.56,
"eval_loss": 2.1462478637695312,
"eval_runtime": 121.3988,
"eval_samples_per_second": 101.228,
"eval_steps_per_second": 6.334,
"step": 522000
},
{
"epoch": 79.23,
"learning_rate": 3.679458239277653e-05,
"loss": 1.8681,
"step": 526500
},
{
"epoch": 79.23,
"eval_loss": 2.14402174949646,
"eval_runtime": 121.4163,
"eval_samples_per_second": 101.214,
"eval_steps_per_second": 6.334,
"step": 526500
},
{
"epoch": 79.91,
"learning_rate": 3.668171557562077e-05,
"loss": 1.8649,
"step": 531000
},
{
"epoch": 79.91,
"eval_loss": 2.1338822841644287,
"eval_runtime": 121.3916,
"eval_samples_per_second": 101.234,
"eval_steps_per_second": 6.335,
"step": 531000
},
{
"epoch": 80.59,
"learning_rate": 3.656884875846501e-05,
"loss": 1.8593,
"step": 535500
},
{
"epoch": 80.59,
"eval_loss": 2.139404296875,
"eval_runtime": 121.4277,
"eval_samples_per_second": 101.204,
"eval_steps_per_second": 6.333,
"step": 535500
},
{
"epoch": 81.26,
"learning_rate": 3.645598194130926e-05,
"loss": 1.8592,
"step": 540000
},
{
"epoch": 81.26,
"eval_loss": 2.1354503631591797,
"eval_runtime": 121.402,
"eval_samples_per_second": 101.226,
"eval_steps_per_second": 6.334,
"step": 540000
},
{
"epoch": 81.94,
"learning_rate": 3.63431151241535e-05,
"loss": 1.8569,
"step": 544500
},
{
"epoch": 81.94,
"eval_loss": 2.135469436645508,
"eval_runtime": 121.3789,
"eval_samples_per_second": 101.245,
"eval_steps_per_second": 6.336,
"step": 544500
},
{
"epoch": 82.62,
"learning_rate": 3.623024830699774e-05,
"loss": 1.849,
"step": 549000
},
{
"epoch": 82.62,
"eval_loss": 2.1346044540405273,
"eval_runtime": 121.4745,
"eval_samples_per_second": 101.165,
"eval_steps_per_second": 6.331,
"step": 549000
},
{
"epoch": 83.3,
"learning_rate": 3.611738148984199e-05,
"loss": 1.8481,
"step": 553500
},
{
"epoch": 83.3,
"eval_loss": 2.1314146518707275,
"eval_runtime": 121.3262,
"eval_samples_per_second": 101.289,
"eval_steps_per_second": 6.338,
"step": 553500
},
{
"epoch": 83.97,
"learning_rate": 3.600451467268623e-05,
"loss": 1.8499,
"step": 558000
},
{
"epoch": 83.97,
"eval_loss": 2.126936197280884,
"eval_runtime": 121.4278,
"eval_samples_per_second": 101.204,
"eval_steps_per_second": 6.333,
"step": 558000
},
{
"epoch": 84.65,
"learning_rate": 3.589164785553047e-05,
"loss": 1.8394,
"step": 562500
},
{
"epoch": 84.65,
"eval_loss": 2.118168592453003,
"eval_runtime": 121.4516,
"eval_samples_per_second": 101.184,
"eval_steps_per_second": 6.332,
"step": 562500
},
{
"epoch": 85.33,
"learning_rate": 3.577878103837472e-05,
"loss": 1.8394,
"step": 567000
},
{
"epoch": 85.33,
"eval_loss": 2.1206483840942383,
"eval_runtime": 121.4259,
"eval_samples_per_second": 101.206,
"eval_steps_per_second": 6.333,
"step": 567000
},
{
"epoch": 86.0,
"learning_rate": 3.566591422121896e-05,
"loss": 1.8408,
"step": 571500
},
{
"epoch": 86.0,
"eval_loss": 2.1219327449798584,
"eval_runtime": 118.4077,
"eval_samples_per_second": 103.785,
"eval_steps_per_second": 6.495,
"step": 571500
},
{
"epoch": 86.68,
"learning_rate": 3.555304740406321e-05,
"loss": 1.832,
"step": 576000
},
{
"epoch": 86.68,
"eval_loss": 2.1104817390441895,
"eval_runtime": 118.1444,
"eval_samples_per_second": 104.017,
"eval_steps_per_second": 6.509,
"step": 576000
},
{
"epoch": 87.36,
"learning_rate": 3.5440180586907454e-05,
"loss": 1.8305,
"step": 580500
},
{
"epoch": 87.36,
"eval_loss": 2.1246631145477295,
"eval_runtime": 118.1028,
"eval_samples_per_second": 104.053,
"eval_steps_per_second": 6.511,
"step": 580500
},
{
"epoch": 88.04,
"learning_rate": 3.53273137697517e-05,
"loss": 1.8333,
"step": 585000
},
{
"epoch": 88.04,
"eval_loss": 2.1201488971710205,
"eval_runtime": 118.0958,
"eval_samples_per_second": 104.06,
"eval_steps_per_second": 6.512,
"step": 585000
},
{
"epoch": 88.71,
"learning_rate": 3.521444695259594e-05,
"loss": 1.8227,
"step": 589500
},
{
"epoch": 88.71,
"eval_loss": 2.104985475540161,
"eval_runtime": 118.0975,
"eval_samples_per_second": 104.058,
"eval_steps_per_second": 6.512,
"step": 589500
},
{
"epoch": 89.39,
"learning_rate": 3.5101580135440183e-05,
"loss": 1.8216,
"step": 594000
},
{
"epoch": 89.39,
"eval_loss": 2.1118545532226562,
"eval_runtime": 118.0937,
"eval_samples_per_second": 104.061,
"eval_steps_per_second": 6.512,
"step": 594000
},
{
"epoch": 90.07,
"learning_rate": 3.498871331828443e-05,
"loss": 1.8234,
"step": 598500
},
{
"epoch": 90.07,
"eval_loss": 2.109297037124634,
"eval_runtime": 118.1025,
"eval_samples_per_second": 104.054,
"eval_steps_per_second": 6.511,
"step": 598500
},
{
"epoch": 90.74,
"learning_rate": 3.487584650112867e-05,
"loss": 1.8162,
"step": 603000
},
{
"epoch": 90.74,
"eval_loss": 2.0999834537506104,
"eval_runtime": 118.0968,
"eval_samples_per_second": 104.059,
"eval_steps_per_second": 6.512,
"step": 603000
},
{
"epoch": 91.42,
"learning_rate": 3.476297968397291e-05,
"loss": 1.8153,
"step": 607500
},
{
"epoch": 91.42,
"eval_loss": 2.110783576965332,
"eval_runtime": 118.1749,
"eval_samples_per_second": 103.99,
"eval_steps_per_second": 6.507,
"step": 607500
},
{
"epoch": 92.1,
"learning_rate": 3.465011286681716e-05,
"loss": 1.8153,
"step": 612000
},
{
"epoch": 92.1,
"eval_loss": 2.1009647846221924,
"eval_runtime": 118.1986,
"eval_samples_per_second": 103.969,
"eval_steps_per_second": 6.506,
"step": 612000
},
{
"epoch": 92.78,
"learning_rate": 3.4537246049661404e-05,
"loss": 1.8095,
"step": 616500
},
{
"epoch": 92.78,
"eval_loss": 2.0992209911346436,
"eval_runtime": 118.1834,
"eval_samples_per_second": 103.982,
"eval_steps_per_second": 6.507,
"step": 616500
},
{
"epoch": 93.45,
"learning_rate": 3.442437923250564e-05,
"loss": 1.807,
"step": 621000
},
{
"epoch": 93.45,
"eval_loss": 2.098292827606201,
"eval_runtime": 118.1816,
"eval_samples_per_second": 103.984,
"eval_steps_per_second": 6.507,
"step": 621000
},
{
"epoch": 94.13,
"learning_rate": 3.431151241534989e-05,
"loss": 1.805,
"step": 625500
},
{
"epoch": 94.13,
"eval_loss": 2.0988106727600098,
"eval_runtime": 118.1659,
"eval_samples_per_second": 103.998,
"eval_steps_per_second": 6.508,
"step": 625500
},
{
"epoch": 94.81,
"learning_rate": 3.4198645598194133e-05,
"loss": 1.8015,
"step": 630000
},
{
"epoch": 94.81,
"eval_loss": 2.0965840816497803,
"eval_runtime": 118.1678,
"eval_samples_per_second": 103.996,
"eval_steps_per_second": 6.508,
"step": 630000
},
{
"epoch": 95.49,
"learning_rate": 3.408577878103837e-05,
"loss": 1.7964,
"step": 634500
},
{
"epoch": 95.49,
"eval_loss": 2.0856127738952637,
"eval_runtime": 118.3087,
"eval_samples_per_second": 103.872,
"eval_steps_per_second": 6.5,
"step": 634500
},
{
"epoch": 96.16,
"learning_rate": 3.397291196388262e-05,
"loss": 1.7988,
"step": 639000
},
{
"epoch": 96.16,
"eval_loss": 2.0870988368988037,
"eval_runtime": 118.2923,
"eval_samples_per_second": 103.887,
"eval_steps_per_second": 6.501,
"step": 639000
},
{
"epoch": 96.84,
"learning_rate": 3.386004514672686e-05,
"loss": 1.794,
"step": 643500
},
{
"epoch": 96.84,
"eval_loss": 2.092172861099243,
"eval_runtime": 118.3047,
"eval_samples_per_second": 103.876,
"eval_steps_per_second": 6.5,
"step": 643500
},
{
"epoch": 97.52,
"learning_rate": 3.374717832957111e-05,
"loss": 1.7917,
"step": 648000
},
{
"epoch": 97.52,
"eval_loss": 2.0861566066741943,
"eval_runtime": 118.2327,
"eval_samples_per_second": 103.939,
"eval_steps_per_second": 6.504,
"step": 648000
},
{
"epoch": 98.19,
"learning_rate": 3.3634311512415354e-05,
"loss": 1.79,
"step": 652500
},
{
"epoch": 98.19,
"eval_loss": 2.0844566822052,
"eval_runtime": 118.1297,
"eval_samples_per_second": 104.03,
"eval_steps_per_second": 6.51,
"step": 652500
},
{
"epoch": 98.87,
"learning_rate": 3.35214446952596e-05,
"loss": 1.788,
"step": 657000
},
{
"epoch": 98.87,
"eval_loss": 2.0832607746124268,
"eval_runtime": 118.1145,
"eval_samples_per_second": 104.043,
"eval_steps_per_second": 6.511,
"step": 657000
},
{
"epoch": 99.55,
"learning_rate": 3.3408577878103845e-05,
"loss": 1.7833,
"step": 661500
},
{
"epoch": 99.55,
"eval_loss": 2.082475185394287,
"eval_runtime": 118.0929,
"eval_samples_per_second": 104.062,
"eval_steps_per_second": 6.512,
"step": 661500
},
{
"epoch": 100.23,
"learning_rate": 3.3295711060948083e-05,
"loss": 1.7821,
"step": 666000
},
{
"epoch": 100.23,
"eval_loss": 2.0830888748168945,
"eval_runtime": 118.1331,
"eval_samples_per_second": 104.027,
"eval_steps_per_second": 6.51,
"step": 666000
},
{
"epoch": 100.9,
"learning_rate": 3.318284424379233e-05,
"loss": 1.7809,
"step": 670500
},
{
"epoch": 100.9,
"eval_loss": 2.080984115600586,
"eval_runtime": 118.0711,
"eval_samples_per_second": 104.081,
"eval_steps_per_second": 6.513,
"step": 670500
},
{
"epoch": 101.58,
"learning_rate": 3.3069977426636574e-05,
"loss": 1.7757,
"step": 675000
},
{
"epoch": 101.58,
"eval_loss": 2.078061819076538,
"eval_runtime": 118.0799,
"eval_samples_per_second": 104.074,
"eval_steps_per_second": 6.513,
"step": 675000
},
{
"epoch": 102.26,
"learning_rate": 3.295711060948081e-05,
"loss": 1.7737,
"step": 679500
},
{
"epoch": 102.26,
"eval_loss": 2.079832077026367,
"eval_runtime": 118.3274,
"eval_samples_per_second": 103.856,
"eval_steps_per_second": 6.499,
"step": 679500
},
{
"epoch": 102.93,
"learning_rate": 3.284424379232506e-05,
"loss": 1.7738,
"step": 684000
},
{
"epoch": 102.93,
"eval_loss": 2.0702972412109375,
"eval_runtime": 118.0753,
"eval_samples_per_second": 104.078,
"eval_steps_per_second": 6.513,
"step": 684000
},
{
"epoch": 103.61,
"learning_rate": 3.2731376975169304e-05,
"loss": 1.7701,
"step": 688500
},
{
"epoch": 103.61,
"eval_loss": 2.0662286281585693,
"eval_runtime": 118.9742,
"eval_samples_per_second": 103.291,
"eval_steps_per_second": 6.464,
"step": 688500
},
{
"epoch": 104.29,
"learning_rate": 3.261851015801354e-05,
"loss": 1.7682,
"step": 693000
},
{
"epoch": 104.29,
"eval_loss": 2.070204973220825,
"eval_runtime": 118.3434,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 6.498,
"step": 693000
},
{
"epoch": 104.97,
"learning_rate": 3.250564334085779e-05,
"loss": 1.7669,
"step": 697500
},
{
"epoch": 104.97,
"eval_loss": 2.063176155090332,
"eval_runtime": 118.357,
"eval_samples_per_second": 103.83,
"eval_steps_per_second": 6.497,
"step": 697500
},
{
"epoch": 105.64,
"learning_rate": 3.239277652370203e-05,
"loss": 1.7616,
"step": 702000
},
{
"epoch": 105.64,
"eval_loss": 2.067533254623413,
"eval_runtime": 118.3349,
"eval_samples_per_second": 103.849,
"eval_steps_per_second": 6.499,
"step": 702000
},
{
"epoch": 106.32,
"learning_rate": 3.227990970654628e-05,
"loss": 1.7623,
"step": 706500
},
{
"epoch": 106.32,
"eval_loss": 2.0670344829559326,
"eval_runtime": 118.3389,
"eval_samples_per_second": 103.846,
"eval_steps_per_second": 6.498,
"step": 706500
},
{
"epoch": 107.0,
"learning_rate": 3.216704288939052e-05,
"loss": 1.7623,
"step": 711000
},
{
"epoch": 107.0,
"eval_loss": 2.0591020584106445,
"eval_runtime": 118.3407,
"eval_samples_per_second": 103.844,
"eval_steps_per_second": 6.498,
"step": 711000
},
{
"epoch": 107.67,
"learning_rate": 3.205417607223476e-05,
"loss": 1.7553,
"step": 715500
},
{
"epoch": 107.67,
"eval_loss": 2.0580272674560547,
"eval_runtime": 118.3269,
"eval_samples_per_second": 103.856,
"eval_steps_per_second": 6.499,
"step": 715500
},
{
"epoch": 108.35,
"learning_rate": 3.194130925507901e-05,
"loss": 1.753,
"step": 720000
},
{
"epoch": 108.35,
"eval_loss": 2.0603325366973877,
"eval_runtime": 118.3271,
"eval_samples_per_second": 103.856,
"eval_steps_per_second": 6.499,
"step": 720000
},
{
"epoch": 109.03,
"learning_rate": 3.1828442437923254e-05,
"loss": 1.7549,
"step": 724500
},
{
"epoch": 109.03,
"eval_loss": 2.0661327838897705,
"eval_runtime": 118.3395,
"eval_samples_per_second": 103.845,
"eval_steps_per_second": 6.498,
"step": 724500
},
{
"epoch": 109.71,
"learning_rate": 3.17155756207675e-05,
"loss": 1.7474,
"step": 729000
},
{
"epoch": 109.71,
"eval_loss": 2.0576255321502686,
"eval_runtime": 118.3567,
"eval_samples_per_second": 103.83,
"eval_steps_per_second": 6.497,
"step": 729000
},
{
"epoch": 110.38,
"learning_rate": 3.1602708803611745e-05,
"loss": 1.7504,
"step": 733500
},
{
"epoch": 110.38,
"eval_loss": 2.0617053508758545,
"eval_runtime": 118.3259,
"eval_samples_per_second": 103.857,
"eval_steps_per_second": 6.499,
"step": 733500
},
{
"epoch": 111.06,
"learning_rate": 3.148984198645598e-05,
"loss": 1.7464,
"step": 738000
},
{
"epoch": 111.06,
"eval_loss": 2.049917459487915,
"eval_runtime": 118.3445,
"eval_samples_per_second": 103.841,
"eval_steps_per_second": 6.498,
"step": 738000
},
{
"epoch": 111.74,
"learning_rate": 3.137697516930023e-05,
"loss": 1.7432,
"step": 742500
},
{
"epoch": 111.74,
"eval_loss": 2.056652069091797,
"eval_runtime": 118.3587,
"eval_samples_per_second": 103.828,
"eval_steps_per_second": 6.497,
"step": 742500
},
{
"epoch": 112.42,
"learning_rate": 3.1264108352144474e-05,
"loss": 1.7404,
"step": 747000
},
{
"epoch": 112.42,
"eval_loss": 2.0593619346618652,
"eval_runtime": 118.3421,
"eval_samples_per_second": 103.843,
"eval_steps_per_second": 6.498,
"step": 747000
},
{
"epoch": 113.09,
"learning_rate": 3.115124153498871e-05,
"loss": 1.7438,
"step": 751500
},
{
"epoch": 113.09,
"eval_loss": 2.045955181121826,
"eval_runtime": 118.325,
"eval_samples_per_second": 103.858,
"eval_steps_per_second": 6.499,
"step": 751500
},
{
"epoch": 113.77,
"learning_rate": 3.103837471783296e-05,
"loss": 1.7375,
"step": 756000
},
{
"epoch": 113.77,
"eval_loss": 2.055434465408325,
"eval_runtime": 118.3498,
"eval_samples_per_second": 103.836,
"eval_steps_per_second": 6.498,
"step": 756000
},
{
"epoch": 114.45,
"learning_rate": 3.0925507900677204e-05,
"loss": 1.7336,
"step": 760500
},
{
"epoch": 114.45,
"eval_loss": 2.040349006652832,
"eval_runtime": 118.3546,
"eval_samples_per_second": 103.832,
"eval_steps_per_second": 6.497,
"step": 760500
},
{
"epoch": 115.12,
"learning_rate": 3.081264108352145e-05,
"loss": 1.7344,
"step": 765000
},
{
"epoch": 115.12,
"eval_loss": 2.054170846939087,
"eval_runtime": 118.3514,
"eval_samples_per_second": 103.835,
"eval_steps_per_second": 6.498,
"step": 765000
},
{
"epoch": 115.8,
"learning_rate": 3.069977426636569e-05,
"loss": 1.7324,
"step": 769500
},
{
"epoch": 115.8,
"eval_loss": 2.042388439178467,
"eval_runtime": 118.3282,
"eval_samples_per_second": 103.855,
"eval_steps_per_second": 6.499,
"step": 769500
},
{
"epoch": 116.48,
"learning_rate": 3.058690744920993e-05,
"loss": 1.7255,
"step": 774000
},
{
"epoch": 116.48,
"eval_loss": 2.0501296520233154,
"eval_runtime": 118.1548,
"eval_samples_per_second": 104.008,
"eval_steps_per_second": 6.508,
"step": 774000
}
],
"max_steps": 1993500,
"num_train_epochs": 300,
"total_flos": 6.520019673893634e+18,
"trial_name": null,
"trial_params": null
}