1.24.3.1 / trainer_state.json
nferruz's picture
Upload 13 files
b39a264
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 565,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 7.92920353982301e-05,
"loss": 2.8813,
"step": 5
},
{
"epoch": 0.09,
"learning_rate": 7.858407079646018e-05,
"loss": 1.9089,
"step": 10
},
{
"epoch": 0.09,
"eval_loss": 0.9185810685157776,
"eval_runtime": 1.818,
"eval_samples_per_second": 6.601,
"eval_steps_per_second": 1.65,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 7.787610619469027e-05,
"loss": 1.0124,
"step": 15
},
{
"epoch": 0.18,
"learning_rate": 7.716814159292036e-05,
"loss": 0.6625,
"step": 20
},
{
"epoch": 0.18,
"eval_loss": 0.5026406645774841,
"eval_runtime": 1.8132,
"eval_samples_per_second": 6.618,
"eval_steps_per_second": 1.655,
"step": 20
},
{
"epoch": 0.22,
"learning_rate": 7.646017699115045e-05,
"loss": 0.7557,
"step": 25
},
{
"epoch": 0.27,
"learning_rate": 7.575221238938054e-05,
"loss": 0.6228,
"step": 30
},
{
"epoch": 0.27,
"eval_loss": 0.42135417461395264,
"eval_runtime": 1.8168,
"eval_samples_per_second": 6.605,
"eval_steps_per_second": 1.651,
"step": 30
},
{
"epoch": 0.31,
"learning_rate": 7.504424778761063e-05,
"loss": 0.5134,
"step": 35
},
{
"epoch": 0.35,
"learning_rate": 7.433628318584072e-05,
"loss": 0.6733,
"step": 40
},
{
"epoch": 0.35,
"eval_loss": 0.3993542194366455,
"eval_runtime": 1.8194,
"eval_samples_per_second": 6.595,
"eval_steps_per_second": 1.649,
"step": 40
},
{
"epoch": 0.4,
"learning_rate": 7.362831858407081e-05,
"loss": 0.5836,
"step": 45
},
{
"epoch": 0.44,
"learning_rate": 7.292035398230088e-05,
"loss": 0.5581,
"step": 50
},
{
"epoch": 0.44,
"eval_loss": 0.33813050389289856,
"eval_runtime": 1.8219,
"eval_samples_per_second": 6.586,
"eval_steps_per_second": 1.647,
"step": 50
},
{
"epoch": 0.49,
"learning_rate": 7.221238938053097e-05,
"loss": 0.4934,
"step": 55
},
{
"epoch": 0.53,
"learning_rate": 7.150442477876106e-05,
"loss": 0.3853,
"step": 60
},
{
"epoch": 0.53,
"eval_loss": 0.3289722502231598,
"eval_runtime": 1.8241,
"eval_samples_per_second": 6.579,
"eval_steps_per_second": 1.645,
"step": 60
},
{
"epoch": 0.58,
"learning_rate": 7.079646017699116e-05,
"loss": 0.4525,
"step": 65
},
{
"epoch": 0.62,
"learning_rate": 7.008849557522125e-05,
"loss": 0.4146,
"step": 70
},
{
"epoch": 0.62,
"eval_loss": 0.2982443571090698,
"eval_runtime": 1.8249,
"eval_samples_per_second": 6.576,
"eval_steps_per_second": 1.644,
"step": 70
},
{
"epoch": 0.66,
"learning_rate": 6.938053097345134e-05,
"loss": 0.483,
"step": 75
},
{
"epoch": 0.71,
"learning_rate": 6.867256637168142e-05,
"loss": 0.4702,
"step": 80
},
{
"epoch": 0.71,
"eval_loss": 0.28516504168510437,
"eval_runtime": 1.8265,
"eval_samples_per_second": 6.57,
"eval_steps_per_second": 1.642,
"step": 80
},
{
"epoch": 0.75,
"learning_rate": 6.79646017699115e-05,
"loss": 0.3644,
"step": 85
},
{
"epoch": 0.8,
"learning_rate": 6.72566371681416e-05,
"loss": 0.2309,
"step": 90
},
{
"epoch": 0.8,
"eval_loss": 0.3017539978027344,
"eval_runtime": 1.8277,
"eval_samples_per_second": 6.566,
"eval_steps_per_second": 1.641,
"step": 90
},
{
"epoch": 0.84,
"learning_rate": 6.654867256637168e-05,
"loss": 0.3725,
"step": 95
},
{
"epoch": 0.88,
"learning_rate": 6.584070796460177e-05,
"loss": 0.4707,
"step": 100
},
{
"epoch": 0.88,
"eval_loss": 0.2674517333507538,
"eval_runtime": 1.8288,
"eval_samples_per_second": 6.562,
"eval_steps_per_second": 1.64,
"step": 100
},
{
"epoch": 0.93,
"learning_rate": 6.513274336283187e-05,
"loss": 0.2723,
"step": 105
},
{
"epoch": 0.97,
"learning_rate": 6.442477876106195e-05,
"loss": 0.3001,
"step": 110
},
{
"epoch": 0.97,
"eval_loss": 0.25267747044563293,
"eval_runtime": 1.8294,
"eval_samples_per_second": 6.56,
"eval_steps_per_second": 1.64,
"step": 110
},
{
"epoch": 1.02,
"learning_rate": 6.371681415929204e-05,
"loss": 0.2697,
"step": 115
},
{
"epoch": 1.06,
"learning_rate": 6.300884955752213e-05,
"loss": 0.4044,
"step": 120
},
{
"epoch": 1.06,
"eval_loss": 0.25363221764564514,
"eval_runtime": 1.8299,
"eval_samples_per_second": 6.558,
"eval_steps_per_second": 1.639,
"step": 120
},
{
"epoch": 1.11,
"learning_rate": 6.230088495575222e-05,
"loss": 0.2452,
"step": 125
},
{
"epoch": 1.15,
"learning_rate": 6.15929203539823e-05,
"loss": 0.3605,
"step": 130
},
{
"epoch": 1.15,
"eval_loss": 0.24785174429416656,
"eval_runtime": 1.8306,
"eval_samples_per_second": 6.555,
"eval_steps_per_second": 1.639,
"step": 130
},
{
"epoch": 1.19,
"learning_rate": 6.0884955752212394e-05,
"loss": 0.2825,
"step": 135
},
{
"epoch": 1.24,
"learning_rate": 6.0176991150442476e-05,
"loss": 0.2309,
"step": 140
},
{
"epoch": 1.24,
"eval_loss": 0.23038876056671143,
"eval_runtime": 1.8337,
"eval_samples_per_second": 6.544,
"eval_steps_per_second": 1.636,
"step": 140
},
{
"epoch": 1.28,
"learning_rate": 5.946902654867257e-05,
"loss": 0.4162,
"step": 145
},
{
"epoch": 1.33,
"learning_rate": 5.876106194690266e-05,
"loss": 0.2481,
"step": 150
},
{
"epoch": 1.33,
"eval_loss": 0.2184617966413498,
"eval_runtime": 1.8365,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.634,
"step": 150
},
{
"epoch": 1.37,
"learning_rate": 5.805309734513275e-05,
"loss": 0.1906,
"step": 155
},
{
"epoch": 1.42,
"learning_rate": 5.734513274336284e-05,
"loss": 0.3251,
"step": 160
},
{
"epoch": 1.42,
"eval_loss": 0.2109968066215515,
"eval_runtime": 1.8352,
"eval_samples_per_second": 6.539,
"eval_steps_per_second": 1.635,
"step": 160
},
{
"epoch": 1.46,
"learning_rate": 5.663716814159293e-05,
"loss": 0.1837,
"step": 165
},
{
"epoch": 1.5,
"learning_rate": 5.592920353982301e-05,
"loss": 0.227,
"step": 170
},
{
"epoch": 1.5,
"eval_loss": 0.21275126934051514,
"eval_runtime": 1.8352,
"eval_samples_per_second": 6.539,
"eval_steps_per_second": 1.635,
"step": 170
},
{
"epoch": 1.55,
"learning_rate": 5.52212389380531e-05,
"loss": 0.2545,
"step": 175
},
{
"epoch": 1.59,
"learning_rate": 5.451327433628319e-05,
"loss": 0.238,
"step": 180
},
{
"epoch": 1.59,
"eval_loss": 0.2064710259437561,
"eval_runtime": 1.8347,
"eval_samples_per_second": 6.541,
"eval_steps_per_second": 1.635,
"step": 180
},
{
"epoch": 1.64,
"learning_rate": 5.380530973451328e-05,
"loss": 0.2433,
"step": 185
},
{
"epoch": 1.68,
"learning_rate": 5.309734513274337e-05,
"loss": 0.2171,
"step": 190
},
{
"epoch": 1.68,
"eval_loss": 0.2167491912841797,
"eval_runtime": 1.8363,
"eval_samples_per_second": 6.535,
"eval_steps_per_second": 1.634,
"step": 190
},
{
"epoch": 1.73,
"learning_rate": 5.2389380530973454e-05,
"loss": 0.2056,
"step": 195
},
{
"epoch": 1.77,
"learning_rate": 5.168141592920354e-05,
"loss": 0.2844,
"step": 200
},
{
"epoch": 1.77,
"eval_loss": 0.20672880113124847,
"eval_runtime": 1.836,
"eval_samples_per_second": 6.536,
"eval_steps_per_second": 1.634,
"step": 200
},
{
"epoch": 1.81,
"learning_rate": 5.097345132743363e-05,
"loss": 0.1808,
"step": 205
},
{
"epoch": 1.86,
"learning_rate": 5.026548672566372e-05,
"loss": 0.2822,
"step": 210
},
{
"epoch": 1.86,
"eval_loss": 0.2064841240644455,
"eval_runtime": 1.8357,
"eval_samples_per_second": 6.537,
"eval_steps_per_second": 1.634,
"step": 210
},
{
"epoch": 1.9,
"learning_rate": 4.955752212389381e-05,
"loss": 0.2273,
"step": 215
},
{
"epoch": 1.95,
"learning_rate": 4.884955752212389e-05,
"loss": 0.2111,
"step": 220
},
{
"epoch": 1.95,
"eval_loss": 0.20206451416015625,
"eval_runtime": 1.8356,
"eval_samples_per_second": 6.537,
"eval_steps_per_second": 1.634,
"step": 220
},
{
"epoch": 1.99,
"learning_rate": 4.814159292035399e-05,
"loss": 0.2043,
"step": 225
},
{
"epoch": 2.04,
"learning_rate": 4.7433628318584076e-05,
"loss": 0.1915,
"step": 230
},
{
"epoch": 2.04,
"eval_loss": 0.21362699568271637,
"eval_runtime": 1.8362,
"eval_samples_per_second": 6.535,
"eval_steps_per_second": 1.634,
"step": 230
},
{
"epoch": 2.08,
"learning_rate": 4.6725663716814165e-05,
"loss": 0.1787,
"step": 235
},
{
"epoch": 2.12,
"learning_rate": 4.6017699115044254e-05,
"loss": 0.122,
"step": 240
},
{
"epoch": 2.12,
"eval_loss": 0.22454655170440674,
"eval_runtime": 1.837,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 240
},
{
"epoch": 2.17,
"learning_rate": 4.5309734513274336e-05,
"loss": 0.1402,
"step": 245
},
{
"epoch": 2.21,
"learning_rate": 4.4601769911504425e-05,
"loss": 0.1845,
"step": 250
},
{
"epoch": 2.21,
"eval_loss": 0.20348918437957764,
"eval_runtime": 1.837,
"eval_samples_per_second": 6.532,
"eval_steps_per_second": 1.633,
"step": 250
},
{
"epoch": 2.26,
"learning_rate": 4.3893805309734514e-05,
"loss": 0.1514,
"step": 255
},
{
"epoch": 2.3,
"learning_rate": 4.31858407079646e-05,
"loss": 0.1597,
"step": 260
},
{
"epoch": 2.3,
"eval_loss": 0.1980418711900711,
"eval_runtime": 1.8389,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 260
},
{
"epoch": 2.35,
"learning_rate": 4.24778761061947e-05,
"loss": 0.1923,
"step": 265
},
{
"epoch": 2.39,
"learning_rate": 4.176991150442479e-05,
"loss": 0.1037,
"step": 270
},
{
"epoch": 2.39,
"eval_loss": 0.19392161071300507,
"eval_runtime": 1.8366,
"eval_samples_per_second": 6.534,
"eval_steps_per_second": 1.633,
"step": 270
},
{
"epoch": 2.43,
"learning_rate": 4.106194690265487e-05,
"loss": 0.1511,
"step": 275
},
{
"epoch": 2.48,
"learning_rate": 4.035398230088496e-05,
"loss": 0.109,
"step": 280
},
{
"epoch": 2.48,
"eval_loss": 0.19458027184009552,
"eval_runtime": 1.8368,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 280
},
{
"epoch": 2.52,
"learning_rate": 3.964601769911505e-05,
"loss": 0.1201,
"step": 285
},
{
"epoch": 2.57,
"learning_rate": 3.893805309734514e-05,
"loss": 0.1312,
"step": 290
},
{
"epoch": 2.57,
"eval_loss": 0.19362002611160278,
"eval_runtime": 1.8369,
"eval_samples_per_second": 6.533,
"eval_steps_per_second": 1.633,
"step": 290
},
{
"epoch": 2.61,
"learning_rate": 3.8230088495575226e-05,
"loss": 0.1315,
"step": 295
},
{
"epoch": 2.65,
"learning_rate": 3.7522123893805314e-05,
"loss": 0.2261,
"step": 300
},
{
"epoch": 2.65,
"eval_loss": 0.1917983889579773,
"eval_runtime": 1.8378,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.632,
"step": 300
},
{
"epoch": 2.7,
"learning_rate": 3.6814159292035403e-05,
"loss": 0.1398,
"step": 305
},
{
"epoch": 2.74,
"learning_rate": 3.6106194690265486e-05,
"loss": 0.113,
"step": 310
},
{
"epoch": 2.74,
"eval_loss": 0.1863226443529129,
"eval_runtime": 1.838,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 310
},
{
"epoch": 2.79,
"learning_rate": 3.539823008849558e-05,
"loss": 0.1173,
"step": 315
},
{
"epoch": 2.83,
"learning_rate": 3.469026548672567e-05,
"loss": 0.1762,
"step": 320
},
{
"epoch": 2.83,
"eval_loss": 0.1789919137954712,
"eval_runtime": 1.8392,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 320
},
{
"epoch": 2.88,
"learning_rate": 3.398230088495575e-05,
"loss": 0.1439,
"step": 325
},
{
"epoch": 2.92,
"learning_rate": 3.327433628318584e-05,
"loss": 0.1431,
"step": 330
},
{
"epoch": 2.92,
"eval_loss": 0.17829616367816925,
"eval_runtime": 1.8377,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.632,
"step": 330
},
{
"epoch": 2.96,
"learning_rate": 3.256637168141594e-05,
"loss": 0.2121,
"step": 335
},
{
"epoch": 3.01,
"learning_rate": 3.185840707964602e-05,
"loss": 0.2109,
"step": 340
},
{
"epoch": 3.01,
"eval_loss": 0.1760822981595993,
"eval_runtime": 1.838,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 340
},
{
"epoch": 3.05,
"learning_rate": 3.115044247787611e-05,
"loss": 0.0792,
"step": 345
},
{
"epoch": 3.1,
"learning_rate": 3.0442477876106197e-05,
"loss": 0.0885,
"step": 350
},
{
"epoch": 3.1,
"eval_loss": 0.18443678319454193,
"eval_runtime": 1.8392,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 1.631,
"step": 350
},
{
"epoch": 3.14,
"learning_rate": 2.9734513274336286e-05,
"loss": 0.088,
"step": 355
},
{
"epoch": 3.19,
"learning_rate": 2.9026548672566375e-05,
"loss": 0.0647,
"step": 360
},
{
"epoch": 3.19,
"eval_loss": 0.19218747317790985,
"eval_runtime": 1.8398,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.631,
"step": 360
},
{
"epoch": 3.23,
"learning_rate": 2.8318584070796464e-05,
"loss": 0.0756,
"step": 365
},
{
"epoch": 3.27,
"learning_rate": 2.761061946902655e-05,
"loss": 0.126,
"step": 370
},
{
"epoch": 3.27,
"eval_loss": 0.190928652882576,
"eval_runtime": 1.8379,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 370
},
{
"epoch": 3.32,
"learning_rate": 2.690265486725664e-05,
"loss": 0.0702,
"step": 375
},
{
"epoch": 3.36,
"learning_rate": 2.6194690265486727e-05,
"loss": 0.0965,
"step": 380
},
{
"epoch": 3.36,
"eval_loss": 0.18783879280090332,
"eval_runtime": 1.838,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 380
},
{
"epoch": 3.41,
"learning_rate": 2.5486725663716816e-05,
"loss": 0.085,
"step": 385
},
{
"epoch": 3.45,
"learning_rate": 2.4778761061946905e-05,
"loss": 0.1068,
"step": 390
},
{
"epoch": 3.45,
"eval_loss": 0.19145984947681427,
"eval_runtime": 1.8387,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.632,
"step": 390
},
{
"epoch": 3.5,
"learning_rate": 2.4070796460176994e-05,
"loss": 0.0922,
"step": 395
},
{
"epoch": 3.54,
"learning_rate": 2.3362831858407083e-05,
"loss": 0.0973,
"step": 400
},
{
"epoch": 3.54,
"eval_loss": 0.18135036528110504,
"eval_runtime": 1.8379,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 400
},
{
"epoch": 3.58,
"learning_rate": 2.2654867256637168e-05,
"loss": 0.0887,
"step": 405
},
{
"epoch": 3.63,
"learning_rate": 2.1946902654867257e-05,
"loss": 0.074,
"step": 410
},
{
"epoch": 3.63,
"eval_loss": 0.18348699808120728,
"eval_runtime": 1.8391,
"eval_samples_per_second": 6.525,
"eval_steps_per_second": 1.631,
"step": 410
},
{
"epoch": 3.67,
"learning_rate": 2.123893805309735e-05,
"loss": 0.0987,
"step": 415
},
{
"epoch": 3.72,
"learning_rate": 2.0530973451327435e-05,
"loss": 0.0899,
"step": 420
},
{
"epoch": 3.72,
"eval_loss": 0.18212918937206268,
"eval_runtime": 1.8382,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 420
},
{
"epoch": 3.76,
"learning_rate": 1.9823008849557524e-05,
"loss": 0.0738,
"step": 425
},
{
"epoch": 3.81,
"learning_rate": 1.9115044247787613e-05,
"loss": 0.1126,
"step": 430
},
{
"epoch": 3.81,
"eval_loss": 0.18066109716892242,
"eval_runtime": 1.8399,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.63,
"step": 430
},
{
"epoch": 3.85,
"learning_rate": 1.8407079646017702e-05,
"loss": 0.0881,
"step": 435
},
{
"epoch": 3.89,
"learning_rate": 1.769911504424779e-05,
"loss": 0.0969,
"step": 440
},
{
"epoch": 3.89,
"eval_loss": 0.1776157021522522,
"eval_runtime": 1.838,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 1.632,
"step": 440
},
{
"epoch": 3.94,
"learning_rate": 1.6991150442477876e-05,
"loss": 0.0559,
"step": 445
},
{
"epoch": 3.98,
"learning_rate": 1.628318584070797e-05,
"loss": 0.0644,
"step": 450
},
{
"epoch": 3.98,
"eval_loss": 0.1763620376586914,
"eval_runtime": 1.8403,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 450
},
{
"epoch": 4.03,
"learning_rate": 1.5575221238938054e-05,
"loss": 0.0818,
"step": 455
},
{
"epoch": 4.07,
"learning_rate": 1.4867256637168143e-05,
"loss": 0.049,
"step": 460
},
{
"epoch": 4.07,
"eval_loss": 0.1785365492105484,
"eval_runtime": 1.8386,
"eval_samples_per_second": 6.527,
"eval_steps_per_second": 1.632,
"step": 460
},
{
"epoch": 4.12,
"learning_rate": 1.4159292035398232e-05,
"loss": 0.0452,
"step": 465
},
{
"epoch": 4.16,
"learning_rate": 1.345132743362832e-05,
"loss": 0.0466,
"step": 470
},
{
"epoch": 4.16,
"eval_loss": 0.18220937252044678,
"eval_runtime": 1.8402,
"eval_samples_per_second": 6.521,
"eval_steps_per_second": 1.63,
"step": 470
},
{
"epoch": 4.2,
"learning_rate": 1.2743362831858408e-05,
"loss": 0.0407,
"step": 475
},
{
"epoch": 4.25,
"learning_rate": 1.2035398230088497e-05,
"loss": 0.0545,
"step": 480
},
{
"epoch": 4.25,
"eval_loss": 0.1870112270116806,
"eval_runtime": 1.8384,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 1.632,
"step": 480
},
{
"epoch": 4.29,
"learning_rate": 1.1327433628318584e-05,
"loss": 0.0489,
"step": 485
},
{
"epoch": 4.34,
"learning_rate": 1.0619469026548675e-05,
"loss": 0.0391,
"step": 490
},
{
"epoch": 4.34,
"eval_loss": 0.19078491628170013,
"eval_runtime": 1.8372,
"eval_samples_per_second": 6.532,
"eval_steps_per_second": 1.633,
"step": 490
},
{
"epoch": 4.38,
"learning_rate": 9.911504424778762e-06,
"loss": 0.0447,
"step": 495
},
{
"epoch": 4.42,
"learning_rate": 9.203539823008851e-06,
"loss": 0.0614,
"step": 500
},
{
"epoch": 4.42,
"eval_loss": 0.19175942242145538,
"eval_runtime": 1.8389,
"eval_samples_per_second": 6.526,
"eval_steps_per_second": 1.631,
"step": 500
},
{
"epoch": 4.47,
"learning_rate": 8.495575221238938e-06,
"loss": 0.055,
"step": 505
},
{
"epoch": 4.51,
"learning_rate": 7.787610619469027e-06,
"loss": 0.0597,
"step": 510
},
{
"epoch": 4.51,
"eval_loss": 0.18947459757328033,
"eval_runtime": 1.8278,
"eval_samples_per_second": 6.565,
"eval_steps_per_second": 1.641,
"step": 510
},
{
"epoch": 4.56,
"learning_rate": 7.079646017699116e-06,
"loss": 0.0498,
"step": 515
},
{
"epoch": 4.6,
"learning_rate": 6.371681415929204e-06,
"loss": 0.0461,
"step": 520
},
{
"epoch": 4.6,
"eval_loss": 0.18631692230701447,
"eval_runtime": 1.8309,
"eval_samples_per_second": 6.554,
"eval_steps_per_second": 1.639,
"step": 520
},
{
"epoch": 4.65,
"learning_rate": 5.663716814159292e-06,
"loss": 0.0432,
"step": 525
},
{
"epoch": 4.69,
"learning_rate": 4.955752212389381e-06,
"loss": 0.0456,
"step": 530
},
{
"epoch": 4.69,
"eval_loss": 0.1867295503616333,
"eval_runtime": 1.8305,
"eval_samples_per_second": 6.556,
"eval_steps_per_second": 1.639,
"step": 530
},
{
"epoch": 4.73,
"learning_rate": 4.247787610619469e-06,
"loss": 0.0455,
"step": 535
},
{
"epoch": 4.78,
"learning_rate": 3.539823008849558e-06,
"loss": 0.0438,
"step": 540
},
{
"epoch": 4.78,
"eval_loss": 0.186712846159935,
"eval_runtime": 1.8333,
"eval_samples_per_second": 6.545,
"eval_steps_per_second": 1.636,
"step": 540
},
{
"epoch": 4.82,
"learning_rate": 2.831858407079646e-06,
"loss": 0.0475,
"step": 545
},
{
"epoch": 4.87,
"learning_rate": 2.1238938053097345e-06,
"loss": 0.0394,
"step": 550
},
{
"epoch": 4.87,
"eval_loss": 0.1870775818824768,
"eval_runtime": 1.8346,
"eval_samples_per_second": 6.541,
"eval_steps_per_second": 1.635,
"step": 550
},
{
"epoch": 4.91,
"learning_rate": 1.415929203539823e-06,
"loss": 0.0486,
"step": 555
},
{
"epoch": 4.96,
"learning_rate": 7.079646017699115e-07,
"loss": 0.0454,
"step": 560
},
{
"epoch": 4.96,
"eval_loss": 0.18716545403003693,
"eval_runtime": 1.835,
"eval_samples_per_second": 6.539,
"eval_steps_per_second": 1.635,
"step": 560
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 0.0503,
"step": 565
},
{
"epoch": 5.0,
"step": 565,
"total_flos": 2459078098944000.0,
"train_loss": 0.24255070493812056,
"train_runtime": 469.174,
"train_samples_per_second": 1.204,
"train_steps_per_second": 1.204
}
],
"max_steps": 565,
"num_train_epochs": 5,
"total_flos": 2459078098944000.0,
"trial_name": null,
"trial_params": null
}